Browse Source

* synchronized with trunk

git-svn-id: branches/unicodekvm@48450 -
nickysn 4 years ago
parent
commit
20a6e682a9
100 changed files with 16921 additions and 4363 deletions
  1. 312 17
      .gitattributes
  2. 118 0
      .gitignore
  3. 285 40
      Makefile
  4. 25 8
      Makefile.fpc
  5. 442 69
      compiler/Makefile
  6. 115 77
      compiler/Makefile.fpc
  7. 266 56
      compiler/aarch64/a64att.inc
  8. 210 0
      compiler/aarch64/a64atts.inc
  9. 498 76
      compiler/aarch64/a64ins.dat
  10. 266 56
      compiler/aarch64/a64op.inc
  11. 453 37
      compiler/aarch64/a64reg.dat
  12. 254 38
      compiler/aarch64/aasmcpu.pas
  13. 536 28
      compiler/aarch64/agcpugas.pas
  14. 728 57
      compiler/aarch64/aoptcpu.pas
  15. 483 208
      compiler/aarch64/cgcpu.pas
  16. 82 7
      compiler/aarch64/cpubase.pas
  17. 2 2
      compiler/aarch64/cpuinfo.pas
  18. 1 1
      compiler/aarch64/cpunode.pas
  19. 50 37
      compiler/aarch64/cpupara.pas
  20. 81 2
      compiler/aarch64/cpupi.pas
  21. 5 2
      compiler/aarch64/cputarg.pas
  22. 1 1
      compiler/aarch64/hlcgcpu.pas
  23. 90 0
      compiler/aarch64/ncpucon.pas
  24. 564 0
      compiler/aarch64/ncpuflw.pas
  25. 30 1
      compiler/aarch64/ncpuinl.pas
  26. 77 10
      compiler/aarch64/ncpumat.pas
  27. 144 16
      compiler/aarch64/ncpuset.pas
  28. 452 36
      compiler/aarch64/ra64con.inc
  29. 420 4
      compiler/aarch64/ra64dwa.inc
  30. 1 1
      compiler/aarch64/ra64nor.inc
  31. 452 36
      compiler/aarch64/ra64num.inc
  32. 565 149
      compiler/aarch64/ra64rni.inc
  33. 561 145
      compiler/aarch64/ra64sri.inc
  34. 420 4
      compiler/aarch64/ra64sta.inc
  35. 420 4
      compiler/aarch64/ra64std.inc
  36. 420 4
      compiler/aarch64/ra64sup.inc
  37. 12 3
      compiler/aarch64/racpu.pas
  38. 412 19
      compiler/aarch64/racpugas.pas
  39. 10 0
      compiler/aarch64/rgcpu.pas
  40. 48 0
      compiler/aarch64/tripletcpu.pas
  41. 31 8
      compiler/aasmbase.pas
  42. 48 28
      compiler/aasmcnst.pas
  43. 1 1
      compiler/aasmdef.pas
  44. 130 27
      compiler/aasmtai.pas
  45. 181 112
      compiler/aggas.pas
  46. 10 16
      compiler/aopt.pas
  47. 62 27
      compiler/aoptbase.pas
  48. 1041 129
      compiler/aoptobj.pas
  49. 2 2
      compiler/aoptutils.pas
  50. 26 9
      compiler/arm/aasmcpu.pas
  51. 34 9
      compiler/arm/agarmgas.pas
  52. 1398 1755
      compiler/arm/aoptcpu.pas
  53. 1 1
      compiler/arm/armins.dat
  54. 1 1
      compiler/arm/armtab.inc
  55. 102 70
      compiler/arm/cgcpu.pas
  56. 36 9
      compiler/arm/cpubase.pas
  57. 22 6
      compiler/arm/cpuelf.pas
  58. 44 26
      compiler/arm/cpuinfo.pas
  59. 2 1
      compiler/arm/cpunode.pas
  60. 8 8
      compiler/arm/cpupara.pas
  61. 1 1
      compiler/arm/cpupi.pas
  62. 7 0
      compiler/arm/cputarg.pas
  63. 1 1
      compiler/arm/hlcgcpu.pas
  64. 9 67
      compiler/arm/narmadd.pas
  65. 1 1
      compiler/arm/narmcal.pas
  66. 19 32
      compiler/arm/narmcnv.pas
  67. 6 2
      compiler/arm/narmcon.pas
  68. 13 13
      compiler/arm/narminl.pas
  69. 93 16
      compiler/arm/narmld.pas
  70. 11 19
      compiler/arm/narmmat.pas
  71. 332 0
      compiler/arm/narmutil.pas
  72. 1 1
      compiler/arm/raarmgas.pas
  73. 2 2
      compiler/arm/rgcpu.pas
  74. 51 0
      compiler/arm/tripletcpu.pas
  75. 1275 0
      compiler/armgen/aoptarm.pas
  76. 2 2
      compiler/armgen/armpara.pas
  77. 221 27
      compiler/assemble.pas
  78. 114 30
      compiler/avr/aasmcpu.pas
  79. 4 5
      compiler/avr/agavrgas.pas
  80. 94 29
      compiler/avr/aoptcpu.pas
  81. 8 7
      compiler/avr/ccpuinnr.inc
  82. 318 197
      compiler/avr/cgcpu.pas
  83. 46 7
      compiler/avr/cpubase.pas
  84. 442 284
      compiler/avr/cpuinfo.pas
  85. 23 21
      compiler/avr/cpupara.pas
  86. 3 2
      compiler/avr/cpupi.pas
  87. 4 22
      compiler/avr/itcpugas.pas
  88. 13 7
      compiler/avr/navradd.pas
  89. 50 9
      compiler/avr/navrmat.pas
  90. 5 9
      compiler/avr/raavrgas.pas
  91. 9 11
      compiler/avr/rgcpu.pas
  92. 45 0
      compiler/avr/tripletcpu.pas
  93. 3 3
      compiler/blockutl.pas
  94. 3 8
      compiler/browcol.pas
  95. 1 2
      compiler/ccharset.pas
  96. 46 4
      compiler/cclasses.pas
  97. 31 0
      compiler/cepiktimer.pas
  98. 52 13
      compiler/cfidwarf.pas
  99. 10 7
      compiler/cfileutl.pas
  100. 61 6
      compiler/cg64f32.pas

File diff suppressed because it is too large
+ 312 - 17
.gitattributes


+ 118 - 0
.gitignore

@@ -3,6 +3,7 @@
 /*.o
 /*.ppu
 /*.s
+/bin
 /build-stamp.*
 compiler/*.bak
 compiler/*.exe
@@ -202,8 +203,26 @@ compiler/utils/fpcmade.*
 compiler/utils/fpcmkcfg
 compiler/utils/fpcsubst
 compiler/utils/fppkg
+compiler/utils/gia64reg
+compiler/utils/gppc386
+compiler/utils/mk68kins
+compiler/utils/mk68kreg
+compiler/utils/mka64ins
+compiler/utils/mka64reg
+compiler/utils/mkarmins
+compiler/utils/mkarmreg
+compiler/utils/mkavrreg
+compiler/utils/mkia64reg
+compiler/utils/mkjvmreg
+compiler/utils/mkmpsreg
+compiler/utils/mkppcreg
+compiler/utils/mkspreg
 compiler/utils/mkx86ins
 compiler/utils/mkx86reg
+compiler/utils/mkxtensareg
+compiler/utils/mkz80ins
+compiler/utils/mkz80reg
+compiler/utils/msg2inc
 compiler/utils/ppudump
 compiler/utils/ppufiles
 compiler/utils/ppumove
@@ -555,6 +574,7 @@ packages/chm/*.o
 packages/chm/*.ppu
 packages/chm/*.s
 packages/chm/Package.fpc
+packages/chm/bin
 packages/chm/build-stamp.*
 packages/chm/examples/*.bak
 packages/chm/examples/*.exe
@@ -1778,6 +1798,7 @@ packages/gdbint/*.o
 packages/gdbint/*.ppu
 packages/gdbint/*.s
 packages/gdbint/Package.fpc
+packages/gdbint/bin
 packages/gdbint/build-stamp.*
 packages/gdbint/examples/*.bak
 packages/gdbint/examples/*.exe
@@ -2673,6 +2694,7 @@ packages/ide/*.exe
 packages/ide/*.o
 packages/ide/*.ppu
 packages/ide/*.s
+packages/ide/bin
 packages/ide/compiler/*.bak
 packages/ide/compiler/*.exe
 packages/ide/compiler/*.o
@@ -6154,6 +6176,7 @@ packages/xforms/*.o
 packages/xforms/*.ppu
 packages/xforms/*.s
 packages/xforms/Package.fpc
+packages/xforms/bin
 packages/xforms/build-stamp.*
 packages/xforms/examples/*.bak
 packages/xforms/examples/*.exe
@@ -7652,6 +7675,7 @@ utils/*.o
 utils/*.ppu
 utils/*.s
 utils/Package.fpc
+utils/bin
 utils/bin2obj
 utils/bin2obj.exe
 utils/data2inc
@@ -7704,15 +7728,20 @@ utils/fpcm/*.o
 utils/fpcm/*.ppu
 utils/fpcm/*.s
 utils/fpcm/Package.fpc
+utils/fpcm/bin
 utils/fpcm/fpcmade.*
+utils/fpcm/fpcmake
 utils/fpcm/units
 utils/fpcmade.*
+utils/fpcmkcfg/bin
+utils/fpcmkcfg/units
 utils/fpcres/*.bak
 utils/fpcres/*.exe
 utils/fpcres/*.o
 utils/fpcres/*.ppu
 utils/fpcres/*.s
 utils/fpcres/Package.fpc
+utils/fpcres/bin
 utils/fpcres/build-stamp.*
 utils/fpcres/fpcmade.*
 utils/fpcres/units
@@ -7722,6 +7751,7 @@ utils/fpcreslipo/*.o
 utils/fpcreslipo/*.ppu
 utils/fpcreslipo/*.s
 utils/fpcreslipo/Package.fpc
+utils/fpcreslipo/bin
 utils/fpcreslipo/build-stamp.*
 utils/fpcreslipo/fpcmade.*
 utils/fpcreslipo/units
@@ -7731,6 +7761,7 @@ utils/fpdoc/*.o
 utils/fpdoc/*.ppu
 utils/fpdoc/*.s
 utils/fpdoc/Package.fpc
+utils/fpdoc/bin
 utils/fpdoc/fpcmade.*
 utils/fpdoc/fpde/*.bak
 utils/fpdoc/fpde/*.exe
@@ -7757,6 +7788,8 @@ utils/fpdoc/intl/Package.fpc
 utils/fpdoc/intl/fpcmade.*
 utils/fpdoc/intl/units
 utils/fpdoc/units
+utils/fpmake
+utils/fpmake.exe
 utils/fpmc/*.bak
 utils/fpmc/*.exe
 utils/fpmc/*.o
@@ -7771,6 +7804,7 @@ utils/fppkg/*.o
 utils/fppkg/*.ppu
 utils/fppkg/*.s
 utils/fppkg/Package.fpc
+utils/fppkg/bin
 utils/fppkg/build-stamp.*
 utils/fppkg/examples/*.bak
 utils/fppkg/examples/*.exe
@@ -7807,6 +7841,7 @@ utils/fprcp/*.o
 utils/fprcp/*.ppu
 utils/fprcp/*.s
 utils/fprcp/Package.fpc
+utils/fprcp/bin
 utils/fprcp/fpcmade.*
 utils/fprcp/units
 utils/h2pas/*.bak
@@ -7815,8 +7850,45 @@ utils/h2pas/*.o
 utils/h2pas/*.ppu
 utils/h2pas/*.s
 utils/h2pas/Package.fpc
+utils/h2pas/bin
 utils/h2pas/fpcmade.*
 utils/h2pas/units
+utils/ihx2tzx/*.bak
+utils/ihx2tzx/*.exe
+utils/ihx2tzx/*.o
+utils/ihx2tzx/*.ppu
+utils/ihx2tzx/*.s
+utils/ihx2tzx/Package.fpc
+utils/ihx2tzx/bin
+utils/ihx2tzx/fpcmade.*
+utils/ihx2tzx/units
+utils/ihxutil/*.bak
+utils/ihxutil/*.exe
+utils/ihxutil/*.o
+utils/ihxutil/*.ppu
+utils/ihxutil/*.s
+utils/ihxutil/Package.fpc
+utils/ihxutil/bin
+utils/ihxutil/fpcmade.*
+utils/ihxutil/units
+utils/instantfpc/*.bak
+utils/instantfpc/*.exe
+utils/instantfpc/*.o
+utils/instantfpc/*.ppu
+utils/instantfpc/*.s
+utils/instantfpc/Package.fpc
+utils/instantfpc/bin
+utils/instantfpc/fpcmade.*
+utils/instantfpc/units
+utils/json2pas/*.bak
+utils/json2pas/*.exe
+utils/json2pas/*.o
+utils/json2pas/*.ppu
+utils/json2pas/*.s
+utils/json2pas/Package.fpc
+utils/json2pas/bin
+utils/json2pas/fpcmade.*
+utils/json2pas/units
 utils/mksymbian/*.bak
 utils/mksymbian/*.exe
 utils/mksymbian/*.o
@@ -7826,6 +7898,42 @@ utils/mksymbian/Package.fpc
 utils/mksymbian/build-stamp.*
 utils/mksymbian/fpcmade.*
 utils/mksymbian/units
+utils/pas2fpm/*.bak
+utils/pas2fpm/*.exe
+utils/pas2fpm/*.o
+utils/pas2fpm/*.ppu
+utils/pas2fpm/*.s
+utils/pas2fpm/Package.fpc
+utils/pas2fpm/bin
+utils/pas2fpm/fpcmade.*
+utils/pas2fpm/units
+utils/pas2jni/*.bak
+utils/pas2jni/*.exe
+utils/pas2jni/*.o
+utils/pas2jni/*.ppu
+utils/pas2jni/*.s
+utils/pas2jni/Package.fpc
+utils/pas2jni/bin
+utils/pas2jni/fpcmade.*
+utils/pas2jni/units
+utils/pas2js/*.bak
+utils/pas2js/*.exe
+utils/pas2js/*.o
+utils/pas2js/*.ppu
+utils/pas2js/*.s
+utils/pas2js/Package.fpc
+utils/pas2js/bin
+utils/pas2js/fpcmade.*
+utils/pas2js/units
+utils/pas2ut/*.bak
+utils/pas2ut/*.exe
+utils/pas2ut/*.o
+utils/pas2ut/*.ppu
+utils/pas2ut/*.s
+utils/pas2ut/Package.fpc
+utils/pas2ut/bin
+utils/pas2ut/fpcmade.*
+utils/pas2ut/units
 utils/postw32
 utils/postw32.exe
 utils/ppdep
@@ -7867,6 +7975,16 @@ utils/tply/*.o
 utils/tply/*.ppu
 utils/tply/*.s
 utils/tply/Package.fpc
+utils/tply/bin
 utils/tply/fpcmade.*
 utils/tply/units
+utils/unicode/*.bak
+utils/unicode/*.exe
+utils/unicode/*.o
+utils/unicode/*.ppu
+utils/unicode/*.s
+utils/unicode/Package.fpc
+utils/unicode/bin
+utils/unicode/fpcmade.*
+utils/unicode/units
 utils/units

+ 285 - 40
Makefile

@@ -2,7 +2,7 @@
 # Don't edit, this file is generated by FPCMake Version 2.0.0
 #
 default: help
-MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim i386-android i386-aros m68k-linux m68k-netbsd m68k-amiga m68k-atari m68k-palmos m68k-macos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii powerpc-aix sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-haiku x86_64-netbsd x86_64-solaris x86_64-openbsd x86_64-darwin x86_64-win64 x86_64-embedded x86_64-iphonesim x86_64-android x86_64-aros x86_64-dragonfly arm-linux arm-netbsd arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian arm-android arm-aros powerpc64-linux powerpc64-darwin powerpc64-embedded powerpc64-aix avr-embedded armeb-linux armeb-embedded mips-linux mipsel-linux mipsel-embedded mipsel-android jvm-java jvm-android i8086-embedded i8086-msdos i8086-win16 aarch64-linux aarch64-darwin aarch64-android wasm-wasm sparc64-linux riscv32-linux riscv32-embedded riscv64-linux riscv64-embedded
+MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim i386-android i386-aros m68k-linux m68k-netbsd m68k-amiga m68k-atari m68k-palmos m68k-macosclassic m68k-embedded m68k-sinclairql powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macosclassic powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii powerpc-aix sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-haiku x86_64-netbsd x86_64-solaris x86_64-openbsd x86_64-darwin x86_64-win64 x86_64-embedded x86_64-iphonesim x86_64-android x86_64-aros x86_64-dragonfly arm-linux arm-netbsd arm-palmos arm-wince arm-gba arm-nds arm-embedded arm-symbian arm-android arm-aros arm-freertos arm-ios powerpc64-linux powerpc64-darwin powerpc64-embedded powerpc64-aix avr-embedded armeb-linux armeb-embedded mips-linux mipsel-linux mipsel-embedded mipsel-android mips64el-linux jvm-java jvm-android i8086-embedded i8086-msdos i8086-win16 aarch64-linux aarch64-darwin aarch64-win64 aarch64-android aarch64-ios wasm-wasm sparc64-linux riscv32-linux riscv32-embedded riscv64-linux riscv64-embedded xtensa-linux xtensa-embedded xtensa-freertos z80-embedded z80-zxspectrum z80-msxdos z80-amstradcpc
 BSDs = freebsd netbsd openbsd darwin dragonfly
 UNIXs = linux $(BSDs) solaris qnx haiku aix
 LIMIT83fs = go32v2 os2 emx watcom msdos win16 atari
@@ -196,6 +196,24 @@ $(error When compiling for mipsel-embedded, a sub-architecture (e.g. SUBARCH=pic
 endif
 override FPCOPT+=-Cp$(SUBARCH)
 endif
+ifeq ($(FULL_TARGET),xtensa-embedded)
+ifeq ($(SUBARCH),)
+$(error When compiling for xtensa-embedded, a sub-architecture (e.g. SUBARCH=lx106 or SUBARCH=lx6) must be defined)
+endif
+override FPCOPT+=-Cp$(SUBARCH)
+endif
+ifeq ($(FULL_TARGET),xtensa-freertos)
+ifeq ($(SUBARCH),)
+$(error When compiling for xtensa-freertos, a sub-architecture (e.g. SUBARCH=lx106 or SUBARCH=lx6) must be defined)
+endif
+override FPCOPT+=-Cp$(SUBARCH)
+endif
+ifeq ($(FULL_TARGET),arm-freertos)
+ifeq ($(SUBARCH),)
+$(error When compiling for arm-freertos, a sub-architecture (e.g. SUBARCH=armv6m or SUBARCH=armv7em) must be defined)
+endif
+override FPCOPT+=-Cp$(SUBARCH)
+endif
 ifneq ($(findstring $(OS_SOURCE),$(LIMIT83fs)),)
 TARGETSUFFIX=$(OS_TARGET)
 SOURCESUFFIX=$(OS_SOURCE)
@@ -269,8 +287,8 @@ endif
 ifndef CROSSBINDIR
 CROSSBINDIR:=$(wildcard $(FPCDIR)/bin/$(TARGETSUFFIX))
 endif
-ifneq ($(findstring $(OS_TARGET),darwin iphonesim),)
-ifeq ($(OS_SOURCE),darwin)
+ifneq ($(findstring $(OS_TARGET),darwin iphonesim ios),)
+ifneq ($(findstring $(OS_SOURCE),darwin ios),)
 DARWIN2DARWIN=1
 endif
 endif
@@ -331,8 +349,8 @@ endif
 endif
 override PACKAGE_NAME=fpc
 override PACKAGE_VERSION=3.3.1
-REQUIREDVERSION=3.0.4
-REQUIREDVERSION2=3.0.2
+REQUIREDVERSION=3.2.0
+REQUIREDVERSION2=3.0.4
 ifndef inOS2
 override FPCDIR:=$(BASEDIR)
 export FPCDIR
@@ -395,6 +413,12 @@ endif
 ifeq ($(CPU_TARGET),riscv64)
 PPSUF=rv64
 endif
+ifeq ($(CPU_TARGET),xtensa)
+PPSUF=xtensa
+endif
+ifeq ($(CPU_TARGET),z80)
+PPSUF=z80
+endif
 ifdef CROSSCOMPILE
 ifneq ($(CPU_TARGET),jvm)
 PPPRE=ppcross
@@ -470,12 +494,17 @@ endif
 ifneq ($(OPT),)
 OPTNEW+=$(OPT)
 endif
+ifneq ($(findstring $(OS_TARGET),linux),)
+ifneq ($(findstring $(CPU_TARGET),i386 arm),)
+override OPTNEW+=-CVglobal-dynamic
+endif
+endif
 CLEANOPTS=FPC=$(PPNEW)
-BUILDOPTS=FPC=$(PPNEW) FPCFPMAKE=$(FPCFPMAKENEW) RELEASE=1 'OPT=$(OPTNEW)'
+BUILDOPTS=FPC=$(PPNEW) FPCFPMAKE=$(FPCFPMAKENEW) RELEASE=1 'OPT=$(OPTNEW)' 'FPCMAKEOPT=$(OPT)'
 INSTALLOPTS=FPC=$(PPNEW) ZIPDESTDIR=$(BASEDIR) FPCMAKE=$(FPCMAKENEW)
 BuildOnlyBaseCPUs=jvm
 ifneq ($(wildcard utils),)
-NOUTILSTARGETS=embedded gba nds msdos win16 macos $(BuildOnlyBaseCPUs)
+NOUTILSTARGETS=embedded gba nds msdos win16 macosclassic $(BuildOnlyBaseCPUs) freertos
 ifeq ($(findstring $(OS_TARGET),$(NOUTILSTARGETS)),)
 ifdef BUILDFULLNATIVE
 UTILS=1
@@ -566,12 +595,15 @@ endif
 ifeq ($(FULL_TARGET),m68k-palmos)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
-ifeq ($(FULL_TARGET),m68k-macos)
+ifeq ($(FULL_TARGET),m68k-macosclassic)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
 ifeq ($(FULL_TARGET),m68k-embedded)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
+ifeq ($(FULL_TARGET),m68k-sinclairql)
+override TARGET_DIRS+=compiler rtl utils packages installer
+endif
 ifeq ($(FULL_TARGET),powerpc-linux)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
@@ -581,7 +613,7 @@ endif
 ifeq ($(FULL_TARGET),powerpc-amiga)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
-ifeq ($(FULL_TARGET),powerpc-macos)
+ifeq ($(FULL_TARGET),powerpc-macosclassic)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
 ifeq ($(FULL_TARGET),powerpc-darwin)
@@ -659,9 +691,6 @@ endif
 ifeq ($(FULL_TARGET),arm-palmos)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
-ifeq ($(FULL_TARGET),arm-darwin)
-override TARGET_DIRS+=compiler rtl utils packages installer
-endif
 ifeq ($(FULL_TARGET),arm-wince)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
@@ -683,6 +712,12 @@ endif
 ifeq ($(FULL_TARGET),arm-aros)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
+ifeq ($(FULL_TARGET),arm-freertos)
+override TARGET_DIRS+=compiler rtl utils packages installer
+endif
+ifeq ($(FULL_TARGET),arm-ios)
+override TARGET_DIRS+=compiler rtl utils packages installer
+endif
 ifeq ($(FULL_TARGET),powerpc64-linux)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
@@ -716,6 +751,9 @@ endif
 ifeq ($(FULL_TARGET),mipsel-android)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
+ifeq ($(FULL_TARGET),mips64el-linux)
+override TARGET_DIRS+=compiler rtl utils packages installer
+endif
 ifeq ($(FULL_TARGET),jvm-java)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
@@ -737,9 +775,15 @@ endif
 ifeq ($(FULL_TARGET),aarch64-darwin)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
+ifeq ($(FULL_TARGET),aarch64-win64)
+override TARGET_DIRS+=compiler rtl utils packages installer
+endif
 ifeq ($(FULL_TARGET),aarch64-android)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
+ifeq ($(FULL_TARGET),aarch64-ios)
+override TARGET_DIRS+=compiler rtl utils packages installer
+endif
 ifeq ($(FULL_TARGET),wasm-wasm)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
@@ -758,6 +802,27 @@ endif
 ifeq ($(FULL_TARGET),riscv64-embedded)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
+ifeq ($(FULL_TARGET),xtensa-linux)
+override TARGET_DIRS+=compiler rtl utils packages installer
+endif
+ifeq ($(FULL_TARGET),xtensa-embedded)
+override TARGET_DIRS+=compiler rtl utils packages installer
+endif
+ifeq ($(FULL_TARGET),xtensa-freertos)
+override TARGET_DIRS+=compiler rtl utils packages installer
+endif
+ifeq ($(FULL_TARGET),z80-embedded)
+override TARGET_DIRS+=compiler rtl utils packages installer
+endif
+ifeq ($(FULL_TARGET),z80-zxspectrum)
+override TARGET_DIRS+=compiler rtl utils packages installer
+endif
+ifeq ($(FULL_TARGET),z80-msxdos)
+override TARGET_DIRS+=compiler rtl utils packages installer
+endif
+ifeq ($(FULL_TARGET),z80-amstradcpc)
+override TARGET_DIRS+=compiler rtl utils packages installer
+endif
 override INSTALL_FPCPACKAGE=y
 ifdef REQUIRE_UNITSDIR
 override UNITSDIR+=$(REQUIRE_UNITSDIR)
@@ -955,6 +1020,82 @@ endif
 else
 CROSSBINDIR=
 endif
+ifeq ($(OS_SOURCE),linux)
+ifndef GCCLIBDIR
+ifeq ($(CPU_TARGET),i386)
+ifneq ($(findstring x86_64,$(shell uname -a)),)
+ifeq ($(BINUTILSPREFIX),)
+GCCLIBDIR:=$(shell dirname `gcc -m32 -print-libgcc-file-name`)
+else
+CROSSGCCOPT=-m32
+endif
+endif
+endif
+ifeq ($(CPU_TARGET),powerpc)
+ifeq ($(BINUTILSPREFIX),)
+GCCLIBDIR:=$(shell dirname `gcc -m32 -print-libgcc-file-name`)
+else
+CROSSGCCOPT=-m32
+endif
+endif
+ifeq ($(CPU_TARGET),powerpc64)
+ifeq ($(BINUTILSPREFIX),)
+GCCLIBDIR:=$(shell dirname `gcc -m64 -print-libgcc-file-name`)
+else
+CROSSGCCOPT=-m64
+endif
+endif
+ifeq ($(CPU_TARGET),sparc)
+ifneq ($(findstring sparc64,$(shell uname -a)),)
+ifeq ($(BINUTILSPREFIX),)
+GCCLIBDIR:=$(shell dirname `gcc -m32 -print-libgcc-file-name`)
+else
+ifneq ($(findstring $(FPCFPMAKE_CPU_OPT),mips mipsel),)
+CROSSGCCOPT=-mabi=32
+else
+CROSSGCCOPT=-m32
+endif
+endif
+endif
+endif
+endif
+ifdef FPCFPMAKE
+FPCFPMAKE_CPU_TARGET=$(shell $(FPCFPMAKE) -iTP)
+ifeq ($(CPU_TARGET),$(FPCFPMAKE_CPU_TARGET))
+FPCMAKEGCCLIBDIR:=$(GCCLIBDIR)
+else
+ifneq ($(findstring $(FPCFPMAKE_CPU_TARGET),aarch64 powerpc64 riscv64 sparc64 x86_64),)
+FPCMAKE_CROSSGCCOPT=-m64
+else
+ifneq ($(findstring $(FPCFPMAKE_CPU_OPT),mips64 mips64el),)
+FPCMAKE_CROSSGCCOPT=-mabi=64
+else
+ifneq ($(findstring $(FPCFPMAKE_CPU_OPT),mips mipsel),)
+FPCMAKE_CROSSGCCOPT=-mabi=32
+else
+FPCMAKE_CROSSGCCOPT=-m32
+endif
+endif
+endif
+FPCMAKEGCCLIBDIR:=$(shell dirname `gcc $(FPCMAKE_CROSSGCCOPT) -print-libgcc-file-name`)
+endif
+endif
+ifndef FPCMAKEGCCLIBDIR
+FPCMAKEGCCLIBDIR:=$(shell dirname `gcc -print-libgcc-file-name`)
+endif
+ifndef GCCLIBDIR
+CROSSGCC=$(strip $(wildcard $(addsuffix /$(BINUTILSPREFIX)gcc$(SRCEXEEXT),$(SEARCHPATH))))
+ifneq ($(CROSSGCC),)
+GCCLIBDIR:=$(shell dirname `$(CROSSGCC) $(CROSSGCCOPT) -print-libgcc-file-name`)
+endif
+endif
+endif
+ifdef inUnix
+ifeq ($(OS_SOURCE),netbsd)
+OTHERLIBDIR:=/usr/pkg/lib
+endif
+export GCCLIBDIR FPCMAKEGCCLIBDIR OTHERLIBDIR
+endif
 BATCHEXT=.bat
 LOADEREXT=.as
 EXEEXT=.exe
@@ -1099,14 +1240,14 @@ STATICLIBPREFIX=
 SHORTSUFFIX=nwl
 IMPORTLIBPREFIX=imp
 endif
-ifeq ($(OS_TARGET),macos)
+ifeq ($(OS_TARGET),macosclassic)
 BATCHEXT=
 EXEEXT=
 DEBUGSYMEXT=.xcoff
 SHORTSUFFIX=mac
 IMPORTLIBPREFIX=imp
 endif
-ifneq ($(findstring $(OS_TARGET),darwin iphonesim),)
+ifneq ($(findstring $(OS_TARGET),darwin iphonesim ios),)
 BATCHEXT=.sh
 EXEEXT=
 HASSHAREDLIB=1
@@ -1156,6 +1297,11 @@ STATICLIBPREFIX=
 STATICLIBEXT=.a
 SHORTSUFFIX=d16
 endif
+ifeq ($(OS_TARGET),msxdos)
+STATICLIBPREFIX=
+STATICLIBEXT=.a
+SHORTSUFFIX=msd
+endif
 ifeq ($(OS_TARGET),embedded)
 ifeq ($(CPU_TARGET),i8086)
 STATICLIBPREFIX=
@@ -1163,6 +1309,9 @@ STATICLIBEXT=.a
 else
 EXEEXT=.bin
 endif
+ifeq ($(CPU_TARGET),z80)
+OEXT=.rel
+endif
 SHORTSUFFIX=emb
 endif
 ifeq ($(OS_TARGET),win16)
@@ -1171,6 +1320,9 @@ STATICLIBEXT=.a
 SHAREDLIBEXT=.dll
 SHORTSUFFIX=w16
 endif
+ifeq ($(OS_TARGET),zxspectrum)
+OEXT=.rel
+endif
 ifneq ($(findstring $(OS_SOURCE),$(LIMIT83fs)),)
 FPCMADE=fpcmade.$(SHORTSUFFIX)
 ZIPSUFFIX=$(SHORTSUFFIX)
@@ -1442,11 +1594,11 @@ endif
 ifndef CROSSBOOTSTRAP
 ifneq ($(BINUTILSPREFIX),)
 override FPCOPT+=-XP$(BINUTILSPREFIX)
-endif
-ifneq ($(BINUTILSPREFIX),)
+ifneq ($(RLINKPATH),)
 override FPCOPT+=-Xr$(RLINKPATH)
 endif
 endif
+endif
 ifndef CROSSCOMPILE
 ifneq ($(BINUTILSPREFIX),)
 override FPCMAKEOPT+=-XP$(BINUTILSPREFIX)
@@ -1476,16 +1628,7 @@ override FPCOPT+=-gl
 override FPCOPTDEF+=DEBUG
 endif
 ifdef RELEASE
-ifneq ($(findstring 2.0.,$(FPC_VERSION)),)
-ifeq ($(CPU_TARGET),i386)
-FPCCPUOPT:=-OG2p3
-endif
-ifeq ($(CPU_TARGET),powerpc)
-FPCCPUOPT:=-O1r
-endif
-else
 FPCCPUOPT:=-O2
-endif
 override FPCOPT+=-Ur -Xs $(FPCCPUOPT) -n
 override FPCOPTDEF+=RELEASE
 endif
@@ -1547,6 +1690,17 @@ endif
 endif
 ifdef LINKSHARED
 endif
+ifdef GCCLIBDIR
+override FPCOPT+=-Fl$(GCCLIBDIR)
+ifdef FPCMAKEGCCLIBDIR
+override FPCMAKEOPT+=-Fl$(FPCMAKEGCCLIBDIR)
+else
+override FPCMAKEOPT+=-Fl$(GCCLIBDIR)
+endif
+endif
+ifdef OTHERLIBDIR
+override FPCOPT+=$(addprefix -Fl,$(OTHERLIBDIR))
+endif
 ifdef OPT
 override FPCOPT+=$(OPT)
 endif
@@ -1608,8 +1762,10 @@ else
 override INSTALLPPULINKFILES:=$(subst $(PPUEXT),$(OEXT),$(INSTALLPPUFILES)) $(subst $(PPUEXT),$(LTOEXT),$(INSTALLPPUFILES)) $(addprefix $(STATICLIBPREFIX),$(subst $(PPUEXT),$(STATICLIBEXT),$(INSTALLPPUFILES)))
 endif
 ifneq ($(UNITTARGETDIRPREFIX),)
-override INSTALLPPUFILES:=$(addprefix $(UNITTARGETDIRPREFIX),$(notdir $(INSTALLPPUFILES)))
-override INSTALLPPULINKFILES:=$(wildcard $(addprefix $(UNITTARGETDIRPREFIX),$(notdir $(INSTALLPPULINKFILES))))
+override INSTALLPPUFILENAMES:=$(notdir $(INSTALLPPUFILES))
+override INSTALLPPULINKFILENAMES:=$(notdir $(INSTALLPPULINKFILES))
+override INSTALLPPUFILES=$(addprefix $(UNITTARGETDIRPREFIX),$(INSTALLPPUFILENAMES))
+override INSTALLPPULINKFILES=$(wildcard $(addprefix $(UNITTARGETDIRPREFIX),$(INSTALLPPULINKFILENAMES)))
 endif
 override INSTALL_CREATEPACKAGEFPC=1
 endif
@@ -1768,8 +1924,10 @@ override CLEANPPULINKFILES:=$(subst $(PPUEXT),$(OEXT),$(CLEANPPUFILES)) $(subst
 ifdef DEBUGSYMEXT
 override CLEANPPULINKFILES+=$(subst $(PPUEXT),$(DEBUGSYMEXT),$(CLEANPPUFILES))
 endif
-override CLEANPPUFILES:=$(addprefix $(UNITTARGETDIRPREFIX),$(CLEANPPUFILES))
-override CLEANPPULINKFILES:=$(wildcard $(addprefix $(UNITTARGETDIRPREFIX),$(CLEANPPULINKFILES)))
+override CLEANPPUFILENAMES:=$(CLEANPPUFILES)
+override CLEANPPUFILES=$(addprefix $(UNITTARGETDIRPREFIX),$(CLEANPPUFILENAMES))
+override CLEANPPULINKFILENAMES:=$(CLEANPPULINKFILES)
+override CLEANPPULINKFILES=$(wildcard $(addprefix $(UNITTARGETDIRPREFIX),$(CLEANPPULINKFILENAMES)))
 endif
 fpc_clean: $(CLEANTARGET)
 ifdef CLEANEXEFILES
@@ -2138,7 +2296,7 @@ TARGET_DIRS_UTILS=1
 TARGET_DIRS_PACKAGES=1
 TARGET_DIRS_INSTALLER=1
 endif
-ifeq ($(FULL_TARGET),m68k-macos)
+ifeq ($(FULL_TARGET),m68k-macosclassic)
 TARGET_DIRS_COMPILER=1
 TARGET_DIRS_RTL=1
 TARGET_DIRS_UTILS=1
@@ -2152,6 +2310,13 @@ TARGET_DIRS_UTILS=1
 TARGET_DIRS_PACKAGES=1
 TARGET_DIRS_INSTALLER=1
 endif
+ifeq ($(FULL_TARGET),m68k-sinclairql)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_INSTALLER=1
+endif
 ifeq ($(FULL_TARGET),powerpc-linux)
 TARGET_DIRS_COMPILER=1
 TARGET_DIRS_RTL=1
@@ -2173,7 +2338,7 @@ TARGET_DIRS_UTILS=1
 TARGET_DIRS_PACKAGES=1
 TARGET_DIRS_INSTALLER=1
 endif
-ifeq ($(FULL_TARGET),powerpc-macos)
+ifeq ($(FULL_TARGET),powerpc-macosclassic)
 TARGET_DIRS_COMPILER=1
 TARGET_DIRS_RTL=1
 TARGET_DIRS_UTILS=1
@@ -2355,13 +2520,6 @@ TARGET_DIRS_UTILS=1
 TARGET_DIRS_PACKAGES=1
 TARGET_DIRS_INSTALLER=1
 endif
-ifeq ($(FULL_TARGET),arm-darwin)
-TARGET_DIRS_COMPILER=1
-TARGET_DIRS_RTL=1
-TARGET_DIRS_UTILS=1
-TARGET_DIRS_PACKAGES=1
-TARGET_DIRS_INSTALLER=1
-endif
 ifeq ($(FULL_TARGET),arm-wince)
 TARGET_DIRS_COMPILER=1
 TARGET_DIRS_RTL=1
@@ -2411,6 +2569,20 @@ TARGET_DIRS_UTILS=1
 TARGET_DIRS_PACKAGES=1
 TARGET_DIRS_INSTALLER=1
 endif
+ifeq ($(FULL_TARGET),arm-freertos)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_INSTALLER=1
+endif
+ifeq ($(FULL_TARGET),arm-ios)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_INSTALLER=1
+endif
 ifeq ($(FULL_TARGET),powerpc64-linux)
 TARGET_DIRS_COMPILER=1
 TARGET_DIRS_RTL=1
@@ -2488,6 +2660,13 @@ TARGET_DIRS_UTILS=1
 TARGET_DIRS_PACKAGES=1
 TARGET_DIRS_INSTALLER=1
 endif
+ifeq ($(FULL_TARGET),mips64el-linux)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_INSTALLER=1
+endif
 ifeq ($(FULL_TARGET),jvm-java)
 TARGET_DIRS_COMPILER=1
 TARGET_DIRS_RTL=1
@@ -2537,6 +2716,13 @@ TARGET_DIRS_UTILS=1
 TARGET_DIRS_PACKAGES=1
 TARGET_DIRS_INSTALLER=1
 endif
+ifeq ($(FULL_TARGET),aarch64-win64)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_INSTALLER=1
+endif
 ifeq ($(FULL_TARGET),aarch64-android)
 TARGET_DIRS_COMPILER=1
 TARGET_DIRS_RTL=1
@@ -2544,6 +2730,13 @@ TARGET_DIRS_UTILS=1
 TARGET_DIRS_PACKAGES=1
 TARGET_DIRS_INSTALLER=1
 endif
+ifeq ($(FULL_TARGET),aarch64-ios)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_INSTALLER=1
+endif
 ifeq ($(FULL_TARGET),wasm-wasm)
 TARGET_DIRS_COMPILER=1
 TARGET_DIRS_RTL=1
@@ -2586,6 +2779,55 @@ TARGET_DIRS_UTILS=1
 TARGET_DIRS_PACKAGES=1
 TARGET_DIRS_INSTALLER=1
 endif
+ifeq ($(FULL_TARGET),xtensa-linux)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_INSTALLER=1
+endif
+ifeq ($(FULL_TARGET),xtensa-embedded)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_INSTALLER=1
+endif
+ifeq ($(FULL_TARGET),xtensa-freertos)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_INSTALLER=1
+endif
+ifeq ($(FULL_TARGET),z80-embedded)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_INSTALLER=1
+endif
+ifeq ($(FULL_TARGET),z80-zxspectrum)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_INSTALLER=1
+endif
+ifeq ($(FULL_TARGET),z80-msxdos)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_INSTALLER=1
+endif
+ifeq ($(FULL_TARGET),z80-amstradcpc)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_INSTALLER=1
+endif
 ifdef TARGET_DIRS_COMPILER
 compiler_all:
 	$(MAKE) -C compiler all
@@ -2830,7 +3072,6 @@ makefiles: fpc_makefiles
 ifneq ($(wildcard fpcmake.loc),)
 include fpcmake.loc
 endif
-.NOTPARALLEL:
 unexport FPC_VERSION FPC_COMPILERINFO OS_SOURCE
 override TARGET_DIRS:=$(wildcard $(TARGET_DIRS))
 .PHONY: help
@@ -2926,7 +3167,11 @@ ifeq ($(findstring $(CPU_TARGET), $(BuildOnlyBaseCPUs)),)
 endif
 singlezipinstall: zipinstallall
 zipinstallall: $(BUILDSTAMP)
+ifeq ($(FULL_SOURCE),$(FULL_TARGET))
 	$(MAKE) fpc_zipinstall ZIPTARGET=install FULLZIPNAME=fpc-$(PACKAGE_VERSION).$(TARGETSUFFIX) $(INSTALLOPTS)
+else
+	$(MAKE) fpc_zipinstall ZIPTARGET=install FULLZIPNAME=fpc-$(PACKAGE_VERSION).$(TARGETSUFFIX).built.on.$(SOURCESUFFIX) $(INSTALLOPTS)
+endif
 .PHONY: crossall crossinstall crosszipinstall crosssinglezipinstall
 crossall:
 	$(MAKE) all CROSSINSTALL=1
@@ -2935,4 +3180,4 @@ crossinstall:
 crosszipinstall:
 	$(MAKE) zipinstall CROSSINSTALL=1
 crosssinglezipinstall:
-	$(MAKE) fpc_zipinstall ZIPTARGET=crossinstall ZIPNAME=fpc $(INSTALLOPTS)
+	$(MAKE) fpc_zipinstall ZIPTARGET=crossinstall FULLZIPNAME=fpc-$(PACKAGE_VERSION).$(SOURCESUFFIX).cross.$(TARGETSUFFIX) $(INSTALLOPTS)

+ 25 - 8
Makefile.fpc

@@ -20,8 +20,8 @@ fpcdir=.
 rule=help
 
 [prerules]
-REQUIREDVERSION=3.0.4
-REQUIREDVERSION2=3.0.2
+REQUIREDVERSION=3.2.0
+REQUIREDVERSION2=3.0.4
 
 
 # make versions < 3.77 (OS2 version) are buggy
@@ -91,6 +91,12 @@ endif
 ifeq ($(CPU_TARGET),riscv64)
 PPSUF=rv64
 endif
+ifeq ($(CPU_TARGET),xtensa)
+PPSUF=xtensa
+endif
+ifeq ($(CPU_TARGET),z80)
+PPSUF=z80
+endif
 
 # cross compilers uses full cpu_target, not just ppc-suffix
 # (except if the target cannot run a native compiler)
@@ -197,15 +203,24 @@ endif
 ifneq ($(OPT),)
 OPTNEW+=$(OPT)
 endif
+
+# some targets do not generate PIC by default so we have select explicitly
+# the general threading model when compiling the final versions of rtl and packages
+ifneq ($(findstring $(OS_TARGET),linux),)
+ifneq ($(findstring $(CPU_TARGET),i386 arm),)
+override OPTNEW+=-CVglobal-dynamic
+endif
+endif
+
 CLEANOPTS=FPC=$(PPNEW)
-BUILDOPTS=FPC=$(PPNEW) FPCFPMAKE=$(FPCFPMAKENEW) RELEASE=1 'OPT=$(OPTNEW)'
+BUILDOPTS=FPC=$(PPNEW) FPCFPMAKE=$(FPCFPMAKENEW) RELEASE=1 'OPT=$(OPTNEW)' 'FPCMAKEOPT=$(OPT)'
 INSTALLOPTS=FPC=$(PPNEW) ZIPDESTDIR=$(BASEDIR) FPCMAKE=$(FPCMAKENEW)
 
 # CPU targets for which we only build the compiler/rtl
 BuildOnlyBaseCPUs=jvm
 
 ifneq ($(wildcard utils),)
-NOUTILSTARGETS=embedded gba nds msdos win16 macos $(BuildOnlyBaseCPUs)
+NOUTILSTARGETS=embedded gba nds msdos win16 macosclassic $(BuildOnlyBaseCPUs) freertos
 ifeq ($(findstring $(OS_TARGET),$(NOUTILSTARGETS)),)
 ifdef BUILDFULLNATIVE
 UTILS=1
@@ -214,8 +229,6 @@ endif
 endif
 
 [rules]
-.NOTPARALLEL:
-
 # These values can change
 unexport FPC_VERSION FPC_COMPILERINFO OS_SOURCE
 
@@ -363,7 +376,11 @@ endif
 
 singlezipinstall: zipinstallall
 zipinstallall: $(BUILDSTAMP)
-        $(MAKE) fpc_zipinstall ZIPTARGET=install FULLZIPNAME=fpc-$(PACKAGE_VERSION).$(TARGETSUFFIX) $(INSTALLOPTS)
+ifeq ($(FULL_SOURCE),$(FULL_TARGET))
+	$(MAKE) fpc_zipinstall ZIPTARGET=install FULLZIPNAME=fpc-$(PACKAGE_VERSION).$(TARGETSUFFIX) $(INSTALLOPTS)
+else
+	$(MAKE) fpc_zipinstall ZIPTARGET=install FULLZIPNAME=fpc-$(PACKAGE_VERSION).$(TARGETSUFFIX).built.on.$(SOURCESUFFIX) $(INSTALLOPTS)
+endif
 
 
 ##########################################################################
@@ -382,4 +399,4 @@ crosszipinstall:
         $(MAKE) zipinstall CROSSINSTALL=1
 
 crosssinglezipinstall:
-        $(MAKE) fpc_zipinstall ZIPTARGET=crossinstall ZIPNAME=fpc $(INSTALLOPTS)
+	$(MAKE) fpc_zipinstall ZIPTARGET=crossinstall FULLZIPNAME=fpc-$(PACKAGE_VERSION).$(SOURCESUFFIX).cross.$(TARGETSUFFIX) $(INSTALLOPTS)

File diff suppressed because it is too large
+ 442 - 69
compiler/Makefile


+ 115 - 77
compiler/Makefile.fpc

@@ -32,11 +32,38 @@ fpcdir=..
 unexport FPC_VERSION FPC_COMPILERINFO
 
 # Which platforms are ready for inclusion in the cycle
-CYCLETARGETS=i386 powerpc sparc arm x86_64 powerpc64 m68k armeb mipsel mips avr jvm i8086 aarch64 sparc64 riscv32 riscv64
+CYCLETARGETS=i386 powerpc sparc arm x86_64 powerpc64 m68k armeb mipsel mips avr jvm i8086 aarch64 sparc64 riscv32 riscv64 xtensa z80
 
 # All supported targets used for clean
 ALLTARGETS=$(CYCLETARGETS)
 
+# All OS targets that do not support native compiler
+NO_NATIVE_COMPILER_OS_LIST=amstradcpc embedded freertos gba macosclassic msdos msxdos nds palmos symbian watcom wii win16 zxspectrum
+# All CPU targets that do not support native compiler
+NO_NATIVE_COMPILER_CPU_LIST=avr i8086 jvm z80
+
+# Don't compile a native compiler & utilities for targets
+# which do not support it
+ifneq ($(CPU_SOURCE),$(CPU_TARGET))
+ifneq ($(findstring $(CPU_TARGET),$(NO_NATIVE_COMPILER_CPU_LIST)),)
+NoNativeBinaries=1
+endif
+endif
+
+ifneq ($(OS_SOURCE),$(OS_TARGET))
+ifneq ($(findstring $(OS_TARGET),$(NO_NATIVE_COMPILER_OS_LIST)),)
+NoNativeBinaries=1
+endif
+endif
+
+ifndef FORCE_NATIVE_BINARIES
+ifeq ($(NoNativeBinaries),1)
+override EXEEXT=$(SRCEXEEXT)
+# In those cases, installation in a cross-installation
+CROSSINSTALL=1
+endif
+endif
+
 # Allow POWERPC, POWERPC64, M68K, I386, jvm defines for target cpu
 ifdef POWERPC
 PPC_TARGET=powerpc
@@ -89,6 +116,12 @@ endif
 ifdef RISCV64
 PPC_TARGET=riscv64
 endif
+ifdef XTENSA
+PPC_TARGET=xtensa
+endif
+ifdef Z80
+PPC_TARGET=z80
+endif
 
 # Default is to generate a compiler for the same
 # platform as CPU_TARGET (a native compiler)
@@ -135,6 +168,22 @@ ifndef RTLOPT
 RTLOPT:=$(OPT)
 endif
 
+DATE_FMT = +%Y/%m/%d
+ifdef SOURCE_DATE_EPOCH
+    COMPDATESTR ?= $(shell date -u -d "@$(SOURCE_DATE_EPOCH)" "$(DATE_FMT)" 2>/dev/null || date -u -r "$(SOURCE_DATE_EPOCH)" "$(DATE_FMT)" 2>/dev/null || date -u "$(DATE_FMT)")
+else
+   # does a git directory exist? ...
+   GIT_DIR = $(wildcard ../.git)
+   ifneq ($(GIT_DIR),)
+      # ... then take date from head
+      COMPDATESTR:=$(shell git log -1 --pretty=%cd --date=format:'%Y/%m/%d')
+   endif
+endif
+
+ifdef COMPDATESTR
+override OPTNEW+=-DD$(COMPDATESTR)
+endif
+
 ifdef CYCLELEVEL
 ifeq ($(CYCLELEVEL),1)
 override LOCALOPT+=$(OPTLEVEL1)
@@ -180,7 +229,6 @@ CPUSUF=386
 endif
 ifeq ($(CPC_TARGET),m68k)
 CPUSUF=68k
-ALLOW_WARNINGS=1
 endif
 ifeq ($(CPC_TARGET),powerpc)
 CPUSUF=ppc
@@ -225,6 +273,12 @@ endif
 ifeq ($(CPC_TARGET),riscv64)
 CPUSUF=rv64
 endif
+ifeq ($(CPC_TARGET),xtensa)
+CPUSUF=xtensa
+endif
+ifeq ($(CPC_TARGET),z80)
+CPUSUF=z80
+endif
 
 # Do not define the default -d$(CPU_TARGET) because that
 # will conflict with our -d$(CPC_TARGET)
@@ -283,12 +337,12 @@ endif
 
 # i386 specific
 ifeq ($(PPC_TARGET),i386)
-override LOCALOPT+=-Fux86
+override LOCALOPT+=-Fux86 -Fix86
 endif
 
 # x86_64 specific
 ifeq ($(PPC_TARGET),x86_64)
-override LOCALOPT+=-Fux86
+override LOCALOPT+=-Fux86 -Fix86
 endif
 
 # PowerPC specific
@@ -369,37 +423,6 @@ endif
 endif
 endif
 
-# Don't compile a native compiler & utilities for JVM and embedded
-# targets
-ifeq ($(CPU_TARGET),jvm)
-NoNativeBinaries=1
-endif
-ifeq ($(OS_TARGET),embedded)
-NoNativeBinaries=1
-endif
-ifeq ($(OS_TARGET),gba)
-NoNativeBinaries=1
-endif
-ifeq ($(OS_TARGET),msdos)
-NoNativeBinaries=1
-endif
-ifeq ($(OS_TARGET),nds)
-NoNativeBinaries=1
-endif
-ifeq ($(OS_TARGET),win16)
-NoNativeBinaries=1
-endif
-ifeq ($(OS_TARGET),macos)
-NoNativeBinaries=1
-endif
-
-# Allow install for jvm
-ifeq ($(NoNativeBinaries),1)
-override EXEEXT=$(SRCEXEEXT)
-# In those cases, installation in a cross-installation
-CROSSINSTALL=1
-endif
-
 [rules]
 #####################################################################
 # Setup Targets
@@ -586,23 +609,23 @@ endif
 # cpu targets
 #####################################################################
 
-PPC_TARGETS=i386 m68k powerpc sparc arm armeb x86_64 powerpc64 mips mipsel avr jvm i8086 aarch64 sparc64 riscv32 riscv64
-PPC_SUFFIXES=386 68k ppc sparc arm armeb x64 ppc64 mips mipsel avr jvm 8086 a64 sparc64 rv32 rv64
+PPC_TARGETS=i386 m68k powerpc sparc arm armeb x86_64 powerpc64 mips mipsel avr jvm i8086 aarch64 sparc64 riscv32 riscv64 xtensa z80
+PPC_SUFFIXES=386 68k ppc sparc arm armeb x64 ppc64 mips mipsel avr jvm 8086 a64 sparc64 rv32 rv64 xtensa z80
 INSTALL_TARGETS=$(addsuffix _exe_install,$(sort $(CYCLETARGETS) $(PPC_TARGETS)))
 SYMLINKINSTALL_TARGETS=$(addsuffix _symlink_install,$(sort $(CYCLETARGETS) $(PPC_TARGETS)))
 
 .PHONY: $(PPC_TARGETS) $(INSTALL_TARGETS)$(SYMLINKINSTALL_TARGETS)
 
 $(PPC_TARGETS):
-        $(MAKE) PPC_TARGET=$@ CPU_UNITDIR=$@ compiler
+	$(MAKE) PPC_TARGET=$@ CPU_UNITDIR=$@ compiler
 
 $(INSTALL_TARGETS):
-        $(MAKE) PPC_TARGET=$(subst _exe_install,,$@) CPU_UNITDIR=$(subst _exe_install,,$@) compiler
-		$(MAKE) PPC_TARGET=$(subst _exe_install,,$@) CPU_UNITDIR=$(subst _exe_install,,$@) exeinstall
+	$(MAKE) PPC_TARGET=$(subst _exe_install,,$@) CPU_UNITDIR=$(subst _exe_install,,$@) compiler
+	$(MAKE) PPC_TARGET=$(subst _exe_install,,$@) CPU_UNITDIR=$(subst _exe_install,,$@) exeinstall
 
 $(SYMLINKINSTALL_TARGETS):
-        $(MAKE) PPC_TARGET=$(subst _symlink_install,,$@) CPU_UNITDIR=$(subst _symlink_install,,$@) compiler
-		$(MAKE) PPC_TARGET=$(subst _symlink_install,,$@) CPU_UNITDIR=$(subst _symlink_install,,$@) installsymlink
+	$(MAKE) PPC_TARGET=$(subst _symlink_install,,$@) CPU_UNITDIR=$(subst _symlink_install,,$@) compiler
+	$(MAKE) PPC_TARGET=$(subst _symlink_install,,$@) CPU_UNITDIR=$(subst _symlink_install,,$@) installsymlink
 
 alltargets: $(ALLTARGETS)
 
@@ -682,9 +705,9 @@ msg: msgtxt.inc
 
 insdatx86 : $(COMPILER_UNITTARGETDIR) x86/x86ins.dat
 	$(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkx86ins.pp
-        cd x86 && ..$(PATHSEP)utils$(PATHSEP)mkx86ins$(SRCEXEEXT) i8086 && mv -f *.inc ../i8086
-        cd x86 && ..$(PATHSEP)utils$(PATHSEP)mkx86ins$(SRCEXEEXT) && mv -f *.inc ../i386
-        cd x86 && ..$(PATHSEP)utils$(PATHSEP)mkx86ins$(SRCEXEEXT) x86_64 && mv -f *.inc ../x86_64
+        cd x86 && ..$(PATHSEP)utils$(PATHSEP)mkx86ins$(SRCEXEEXT) i8086 && mv -f i8086tab.inc i8086op.inc i8086nop.inc i8086att.inc i8086atts.inc i8086int.inc i8086prop.inc ../i8086
+        cd x86 && ..$(PATHSEP)utils$(PATHSEP)mkx86ins$(SRCEXEEXT) && mv -f i386tab.inc i386op.inc i386nop.inc i386att.inc i386atts.inc i386int.inc i386prop.inc ../i386
+        cd x86 && ..$(PATHSEP)utils$(PATHSEP)mkx86ins$(SRCEXEEXT) x86_64 && mv -f x8664tab.inc x8664op.inc x8664nop.inc x8664att.inc x8664ats.inc x8664int.inc x8664pro.inc ../x86_64
 
 insdatarm : arm/armins.dat
 	    $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkarmins.pp
@@ -694,7 +717,11 @@ insdataarch64 : aarch64/a64ins.dat
 	    $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mka64ins.pp
         cd aarch64 && ..$(PATHSEP)utils$(PATHSEP)mka64ins$(SRCEXEEXT)
 
-insdat: insdatx86 insdatarm insdataarch64
+insdatz80 : z80/z80ins.dat
+	$(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkz80ins.pp
+        cd z80 && ..$(PATHSEP)utils$(PATHSEP)mkz80ins$(SRCEXEEXT)
+
+insdat: insdatx86 insdatarm insdataarch64 insdatz80
 
 regdatx86 : x86/x86reg.dat
 	$(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkx86reg.pp
@@ -718,7 +745,7 @@ regdatsp64 : sparcgen/spreg.dat
             $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkspreg.pp
         cd sparcgen && ..$(PATHSEP)utils$(PATHSEP)mkspreg$(SRCEXEEXT) sparc64
 		mv -f sparcgen/rsp*.inc sparc64
-		
+
 regdatavr : avr/avrreg.dat
             $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkavrreg.pp
         cd avr && ..$(PATHSEP)utils$(PATHSEP)mkavrreg$(SRCEXEEXT)
@@ -731,7 +758,21 @@ regdatmips : mips/mipsreg.dat
 	    $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkmpsreg.pp
         cd mips && ..$(PATHSEP)utils$(PATHSEP)mkmpsreg$(SRCEXEEXT)
 
-regdat : regdatx86 regdatarm regdatsp regdatavr regdataarch64 regdatmips regdatsp64
+regdatz80 : z80/z80reg.dat
+            $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkz80reg.pp
+        cd z80 && ..$(PATHSEP)utils$(PATHSEP)mkz80reg$(SRCEXEEXT)
+
+regdat : regdatx86 regdatarm regdatsp regdatavr regdataarch64 regdatmips regdatsp64 regdatz80
+
+intrdatx86 : x86/x86intr.dat
+		$(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkx86inl.pp
+        cd x86 && ..$(PATHSEP)utils$(PATHSEP)mkx86inl$(SRCEXEEXT)
+        cp -f x86/cpummprocs.inc ../rtl/x86_64
+		cp -f x86/cpumminnr.inc ../rtl/x86_64
+        cp -f x86/cpummprocs.inc ../rtl/i386
+		cp -f x86/cpumminnr.inc ../rtl/i386
+
+intrdat : intrdatx86
 
 # revision.inc rule
 revision.inc :
@@ -819,7 +860,7 @@ wpocycle:
         $(RM) $(EXENAME)
         $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3PREFIX)$(TEMPNAME3)' 'OPT=$(strip $(RTLOPT) $(OPTWPOPERFORM) $(OPTNEW))' rtlclean
         $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3PREFIX)$(TEMPNAME3)' 'OPT=$(strip $(RTLOPT) $(OPTWPOPERFORM) $(OPTNEW))' rtl
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3PREFIX)$(TEMPNAME3)' 'OPT=$(strip $(LOCALOPT) $(OPTNEW) $(OPTWPOPERFORM) $(subst pp1.wpo,pp2.wpo,$(OPTWPOCOLLECT)))' $(addsuffix _clean,$(ALLTARGETS)) 
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3PREFIX)$(TEMPNAME3)' 'OPT=$(strip $(LOCALOPT) $(OPTNEW) $(OPTWPOPERFORM) $(subst pp1.wpo,pp2.wpo,$(OPTWPOCOLLECT)))' $(addsuffix _clean,$(ALLTARGETS))
 	$(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3PREFIX)$(TEMPNAME3)' 'OPT=$(strip $(LOCALOPT) $(OPTNEW) $(OPTWPOPERFORM) $(subst pp1.wpo,pp2.wpo,$(OPTWPOCOLLECT)))' compiler
         $(MOVE) $(EXENAME) $(TEMPWPONAME1)
         $(MAKE) 'FPC=$(BASEDIR)/$(TEMPWPONAME1PREFIX)$(TEMPWPONAME1)' 'OPT=$(strip $(RTLOPT) $(OPTNEW) $(subst pp1.wpo,pp2.wpo,$(OPTWPOPERFORM)))' rtlclean
@@ -937,13 +978,13 @@ endif
 # Clear detected compiler binary, because it can be existing crosscompiler binary, but we need native compiler here
         $(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=1 rtlclean
         $(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=1 rtl
-        $(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) EXENAME=$(TEMPNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=1 cycleclean 
-        $(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) EXENAME=$(TEMPNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=1 compiler 
+        $(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) EXENAME=$(TEMPNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=1 cycleclean
+        $(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) EXENAME=$(TEMPNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=1 compiler
 # ppcross<ARCH> (source native)
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAMEPREFIX)$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=2 rtlclean 
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAMEPREFIX)$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=2 rtl 
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAMEPREFIX)$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) PPC_TARGET=$(CPU_TARGET) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=2 cycleclean 
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAMEPREFIX)$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) PPC_TARGET=$(CPU_TARGET) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=2 compiler 
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAMEPREFIX)$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=2 rtlclean
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAMEPREFIX)$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=2 rtl
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAMEPREFIX)$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) PPC_TARGET=$(CPU_TARGET) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=2 cycleclean
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAMEPREFIX)$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) PPC_TARGET=$(CPU_TARGET) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=2 compiler
 # ppc<ARCH> (target native)
 ifndef CROSSINSTALL
         $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAMEPREFIX)$(PPCROSSNAME)' 'OPT=$(RTLOPT) $(CROSSOPT)' CYCLELEVEL=3 rtlclean
@@ -981,13 +1022,21 @@ cvstest:
 # This is also the case for other CPUs that don't support
 # 80bit real type.
 
+ifeq ($(findstring -dFPC_SOFT_FPUX80,$(LOCALOPT)),)
 ifeq ($(OS_SOURCE),win64)
   EXCLUDE_80BIT_TARGETS=1
 endif
 
-ifneq ($(findstring $(CPU_SOURCE),aarch64 arm avr jvm m68k mips mipsel powerpc powerpc64 sparc sparc64 riscv32 riscv64),)
+ifneq ($(findstring $(CPU_SOURCE),aarch64 arm avr jvm m68k mips mipsel powerpc powerpc64 sparc sparc64 riscv32 riscv64 xtensa),)
   EXCLUDE_80BIT_TARGETS=1
 endif
+endif
+
+ifndef EXCLUDE_80BIT_TARGETS
+FULL_TARGETS=$(filter-out $(PPC_TARGET),$(CYCLETARGETS))
+else
+FULL_TARGETS=$(filter-out i8086,$(filter-out i386,$(filter-out $(PPC_TARGET),$(CYCLETARGETS))))
+endif
 
 full: fullcycle
 
@@ -999,11 +1048,7 @@ ifdef DOWPOCYCLE
         $(MAKE) rtlclean
         $(MAKE) rtl 'FPC=$(BASEDIR)/$(EXENAMEPREFIX)$(EXENAME)'
 endif
-ifndef EXCLUDE_80BIT_TARGETS
-        $(MAKE) $(filter-out $(PPC_TARGET),$(CYCLETARGETS)) 'FPC=$(BASEDIR)/$(EXENAMEPREFIX)$(EXENAME)'
-else
-        $(MAKE) $(filter-out i8086,$(filter-out i386,$(filter-out $(PPC_TARGET),$(CYCLETARGETS)))) 'FPC=$(BASEDIR)/$(EXENAMEPREFIX)$(EXENAME)'
-endif
+        $(MAKE) $(FULL_TARGETS) 'FPC=$(BASEDIR)/$(EXENAMEPREFIX)$(EXENAME)'
 
 #####################################################################
 # Docs
@@ -1049,13 +1094,10 @@ endif
 endif
 
 fullinstall:
-ifndef EXCLUDE_80BIT_TARGETS
-        $(MAKE) $(addsuffix _exe_install,$(filter-out $(PPC_TARGET),$(CYCLETARGETS))) $(addsuffix _all,$(TARGET_DIRS))
-else
-        $(MAKE) $(addsuffix _exe_install,$(filter-out i8086,$(filter-out i386,$(filter-out $(PPC_TARGET),$(CYCLETARGETS))))) $(addsuffix _all,$(TARGET_DIRS))
-endif
-        $(MAKE) $(addsuffix _install,$(TARGET_DIRS))
-        
+	$(MAKE) $(addsuffix _exe_install,$($(FULL_TARGETS)))
+	$(MAKE) $(addsuffix _all,$(TARGET_DIRS))
+	$(MAKE) $(addsuffix _install,$(TARGET_DIRS))
+
 auxfilesinstall:
 ifndef CROSSINSTALL
 ifdef UNIXHier
@@ -1065,9 +1107,9 @@ endif
         $(MKDIR) $(MSGINSTALLDIR)
         $(INSTALL) $(MSGFILES) $(MSGINSTALLDIR)
 endif
-	
 
-install: 
+
+install:
 # if no FPC is passed, use that one we assume, we just build
 ifndef FPC
 	$(MAKE) quickinstall auxfilesinstall FPC=$(BASEDIR)/$(INSTALLEXEFILE)
@@ -1077,18 +1119,14 @@ endif
 
 # This also installs a link from bin to the actual executable.
 # The .deb does that later.
-installsymlink: install
+installsymlink: exeinstall
 ifneq ($(PPCCPULOCATION),$(INSTALL_BINDIR))
         $(MKDIR) $(INSTALL_BINDIR)
         ln -sf $(INSTALL_BASEDIR)/$(EXENAME) $(INSTALL_BINDIR)/$(EXENAME)
 endif
 
-fullinstallsymlink: fullinstall
-ifndef EXCLUDE_80BIT_TARGETS
-        $(MAKE) $(addsuffix _symlink_install,$(filter-out $(PPC_TARGET),$(CYCLETARGETS)))
-else
-        $(MAKE) $(addsuffix _symlink_install,$(filter-out i8086,$(filter-out i386,$(filter-out $(PPC_TARGET),$(CYCLETARGETS)))))
-endif
+fullinstallsymlink:
+	$(MAKE) $(addsuffix _symlink_install,$(FULL_TARGETS))
 
 
 #####################################################################

+ 266 - 56
compiler/aarch64/a64att.inc

@@ -47,18 +47,6 @@
 'ldaxr',
 'stlxr',
 'stlxp',
-'ld1',
-'ld2',
-'ld3',
-'ld4',
-'st1',
-'st2',
-'st3',
-'st4',
-'ld1r',
-'ld2r',
-'ld3r',
-'ld4r',
 'prfm',
 'prfum',
 'add',
@@ -111,32 +99,11 @@
 'csneg',
 'ccmn',
 'ccmp',
-'nop',
-'yield',
-'wfe',
-'wfi',
-'sev',
-'sevl',
-'mov',
-'bfi',
-'bfxil',
-'sbfiz',
-'sbfx',
-'ubfiz',
-'ubfx',
-'asr',
-'lsl',
-'lsr',
-'ror',
-'sxt',
-'uxt',
-'neg',
 'ngc',
 'mvn',
 'mneg',
 'mul',
 'smnegl',
-'smull',
 'umnegl',
 'umull',
 'cset',
@@ -144,20 +111,102 @@
 'cinc',
 'cinv',
 'cneg',
-'fmov',
+'sxtb',
+'sxth',
+'sxtw',
+'uxtb',
+'uxth',
+'bfi',
+'bfxil',
+'sbfiz',
+'sbfx',
+'ubfiz',
+'ubfx',
+'yield',
+'wfe',
+'wfi',
+'sev',
+'sevl',
+'mov',
+'addhn',
+'addhn2',
+'addp',
+'addv',
+'aesd',
+'aese',
+'aesimc',
+'easmc',
+'bif',
+'bit',
+'bsl',
+'cmeq',
+'cmge',
+'cmgt',
+'cmhi',
+'cmhs',
+'cmle',
+'cmlt',
+'cmtst',
+'cnt',
+'dup',
+'ext',
+'fabd',
+'bacge',
+'fabs',
+'facgt',
+'fadd',
+'fccmp',
+'fccmpe',
+'fcmeq',
+'fcmge',
+'fcmgt',
+'fcmle',
+'fcmlt',
+'fcmp',
+'fcmpe',
+'fcsel',
 'fcvt',
 'fcvtas',
 'fcvtau',
+'fcvtl',
+'fcvtl2',
 'fcvtms',
 'fcvtmu',
 'fcvtns',
 'fcvtnu',
 'fcvtps',
 'fcvtpu',
+'fcvtxn',
+'fcvtxn2',
 'fcvtzs',
 'fcvtzu',
-'scvtf',
-'ucvtf',
+'fdiv',
+'fmadd',
+'fmax',
+'fmaxnm',
+'fmaxnmp',
+'fmanmv',
+'fmaxp',
+'fmaxv',
+'fmin',
+'fminnm',
+'fminnmp',
+'fminnmv',
+'fminp',
+'fminv',
+'fmla',
+'fmls',
+'fmov',
+'fmsub',
+'fmul',
+'fmulx',
+'fneg',
+'fnmadd',
+'fnmsub',
+'fnmul',
+'frecpe',
+'frecps',
+'frecpx',
 'frinta',
 'frinti',
 'frintm',
@@ -165,27 +214,188 @@
 'frintp',
 'frintx',
 'frintz',
-'fabs',
-'fneg',
+'frsqrte',
+'frsqrts',
 'fsqrt',
-'fadd',
-'fdiv',
-'fmul',
-'fnmul',
 'fsub',
-'fmax',
-'fmin',
-'fminnm',
-'fmadd',
-'fmsub',
-'fnmadd',
-'fnmsub',
-'fcmp',
-'fcmpe',
-'fccmp',
-'fcmmpe',
-'fcsel',
+'ld1',
+'ld1r',
+'ld2',
+'ld2r',
+'ld3',
+'ld3r',
+'ld4',
+'ld4r',
+'mla',
+'mls',
+'movi',
+'mvni',
+'pmul',
+'pmull',
+'pmull2',
+'raddhn',
+'raddhn2',
+'rev64',
+'rshrn',
+'rshrn2',
+'srubhn',
+'rsubhn2',
+'saba',
+'sabal',
+'sabal2',
+'sadalp',
+'saddl',
+'saddl2',
+'saddlp',
+'saddlv',
+'saddw',
+'saddw2',
+'scvtf',
+'shac1c',
+'sha1h',
+'sha1m',
+'sha1p',
+'sha1su0',
+'sha1su1',
+'sha256h2',
+'sha256h',
+'sha256su0',
+'sha256su1',
+'shadd',
+'shl',
+'shll',
+'shll2',
+'shrn',
+'shrn2',
+'shsub',
+'sli',
+'smax',
+'smaxp',
+'smaxc',
+'smin',
+'sminp',
+'sminv',
+'smlal',
+'smlal2',
+'smlsl',
+'smlsl2',
+'smov',
+'smull',
+'smull2',
+'sqabs',
+'sqadd',
+'sqdmlal',
+'sqdmlal2',
+'sqdmlsl',
+'sqdmlsl2',
+'sqdmulh',
+'sqdmull',
+'sqdmull2',
+'sqneg',
+'sqrdmulh',
+'sqrshl',
+'sqrshrn',
+'sqrshrn2',
+'sqrshrun',
+'sqrshrun2',
+'sqshl',
+'sqshlu',
+'sqshrn',
+'sqsrhn2',
+'sqshrun',
+'sqshrun2',
+'sqsub',
+'sqxtn',
+'sqxtn2',
+'sqxtun',
+'sqxtun2',
+'srhqdd',
+'sri',
+'srshl',
+'srshr',
+'srsra',
+'sshl',
+'sshll',
+'sshll2',
+'sshr',
+'ssra',
+'ssubl',
+'ssubl2',
+'ssubw',
+'ssubw2',
+'st1',
+'st2',
+'st3',
+'st4',
+'subqadd',
+'sxtl',
+'tbl',
+'tbx',
+'trn1',
+'trn2',
+'uaba',
+'uabal',
+'uabal2',
+'uabd',
+'uabdl',
+'uabdl2',
+'uadalp',
+'uaddll',
+'uaddll2',
+'uaddlp',
+'uaddlv',
+'uaddw',
+'uaddw2',
+'ucvtf',
+'uhadd',
+'uhsub',
+'umax',
+'umaxp',
+'umaxv',
+'umin',
+'uminp',
+'uminv',
+'umlal',
+'umlal2',
+'umlsl',
+'umlsl2',
 'umov',
-'ins',
-'movi'
+'uqadd',
+'uqrshl',
+'uqrshrn',
+'uqrshrn2',
+'uqshl',
+'uqshrn',
+'uqsub',
+'uqxtn',
+'uqxtn2',
+'urecpe',
+'urhadd',
+'urshl',
+'urshr',
+'ursqrte',
+'ursra',
+'ushl',
+'ushll2',
+'ushr',
+'usqadd',
+'usra',
+'usubl',
+'usubl2',
+'usubw',
+'usubw2',
+'uxtl',
+'uzp1',
+'uzp2',
+'xtn1',
+'xtn2',
+'zip1',
+'zip2',
+'nop',
+'asr',
+'lsl',
+'lsr',
+'ror',
+'neg',
+'ins'
 );

+ 210 - 0
compiler/aarch64/a64atts.inc

@@ -187,5 +187,215 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufNONE
 );

+ 498 - 76
compiler/aarch64/a64ins.dat

@@ -93,30 +93,6 @@
 
 [STLXP]
 
-[LD1]
-
-[LD2]
-
-[LD3]
-
-[LD4]
-
-[ST1]
-
-[ST2]
-
-[ST3]
-
-[ST4]
-
-[LD1R]
-
-[LD2R]
-
-[LD3R]
-
-[LD4R]
-
 [PRFM]
 
 [PRFUM]
@@ -221,21 +197,39 @@
 
 [CCMP]
 
-; Aliases
-; they are not generated by the compiler, they are only used for inline assembler
-[NOP]
+[NGC]
 
-[YIELD]
+[MVN]
 
-[WFE]
+[MNEG]
 
-[WFI]
+[MUL]
 
-[SEV]
+[SMNEGL]
 
-[SEVL]
+[UMNEGL]
 
-[MOV]
+[UMULL]
+
+[CSET]
+
+[CSETM]
+
+[CINC]
+
+[CINV]
+
+[CNEG]
+
+[SXTB]
+
+[SXTH]
+
+[SXTW]
+
+[UXTB]
+
+[UXTH]
 
 [BFI]
 
@@ -249,47 +243,92 @@
 
 [UBFX]
 
-[ASR]
+[YIELD]
 
-[LSL]
+[WFE]
 
-[LSR]
+[WFI]
 
-[ROR]
+[SEV]
 
-[SXT]
+[SEVL]
 
-[UXT]
+[MOV]
 
-[NEG]
+; Vector/float instructions
+[ADDHN]
 
-[NGC]
+[ADDHN2]
 
-[MVN]
+[ADDP]
 
-[MNEG]
+[ADDV]
 
-[MUL]
+[AESD]
 
-[SMNEGL]
+[AESE]
 
-[SMULL]
+[AESIMC]
 
-[UMNEGL]
+[EASMC]
 
-[UMULL]
+[BIF]
 
-[CSET]
+[BIT]
 
-[CSETM]
+[BSL]
 
-[CINC]
+[CMEQ]
 
-[CINV]
+[CMGE]
 
-[CNEG]
+[CMGT]
 
-[FMOV]
+[CMHI]
+
+[CMHS]
+
+[CMLE]
+
+[CMLT]
+
+[CMTST]
+
+[CNT]
+
+[DUP]
+
+[EXT]
+
+[FABD]
+
+[BACGE]
+
+[FABS]
+
+[FACGT]
+
+[FADD]
+
+[FCCMP]
+
+[FCCMPE]
+
+[FCMEQ]
+
+[FCMGE]
+
+[FCMGT]
+
+[FCMLE]
+
+[FCMLT]
+
+[FCMP]
+
+[FCMPE]
+
+[FCSEL]
 
 [FCVT]
 
@@ -297,6 +336,10 @@
 
 [FCVTAU]
 
+[FCVTL]
+
+[FCVTL2]
+
 [FCVTMS]
 
 [FCVTMU]
@@ -309,13 +352,67 @@
 
 [FCVTPU]
 
+[FCVTXN]
+
+[FCVTXN2]
+
 [FCVTZS]
 
 [FCVTZU]
 
-[SCVTF]
+[FDIV]
 
-[UCVTF]
+[FMADD]
+
+[FMAX]
+
+[FMAXNM]
+
+[FMAXNMP]
+
+[FMANMV]
+
+[FMAXP]
+
+[FMAXV]
+
+[FMIN]
+
+[FMINNM]
+
+[FMINNMP]
+
+[FMINNMV]
+
+[FMINP]
+
+[FMINV]
+
+[FMLA]
+
+[FMLS]
+
+[FMOV]
+
+[FMSUB]
+
+[FMUL]
+
+[FMULX]
+
+[FNEG]
+
+[FNMADD]
+
+[FNMSUB]
+
+[FNMUL]
+
+[FRECPE]
+
+[FRECPS]
+
+[FRECPX]
 
 [FRINTA]
 
@@ -331,48 +428,373 @@
 
 [FRINTZ]
 
-[FABS]
+[FRSQRTE]
 
-[FNEG]
+[FRSQRTS]
 
 [FSQRT]
 
-[FADD]
+[FSUB]
 
-[FDIV]
+[LD1]
 
-[FMUL]
+[LD1R]
 
-[FNMUL]
+[LD2]
 
-[FSUB]
+[LD2R]
 
-[FMAX]
+[LD3]
 
-[FMIN]
+[LD3R]
 
-[FMINNM]
+[LD4]
 
-[FMADD]
+[LD4R]
 
-[FMSUB]
+[MLA]
 
-[FNMADD]
+[MLS]
 
-[FNMSUB]
+[MOVI]
 
-[FCMP]
+[MVNI]
 
-[FCMPE]
+[PMUL]
 
-[FCCMP]
+[PMULL]
 
-[FCMMPE]
+[PMULL2]
 
-[FCSEL]
+[RADDHN]
+
+[RADDHN2]
+
+[REV64]
+
+[RSHRN]
+
+[RSHRN2]
+
+[SRUBHN]
+
+[RSUBHN2]
+
+[SABA]
+
+[SABAL]
+
+[SABAL2]
+
+[SADALP]
+
+[SADDL]
+
+[SADDL2]
+
+[SADDLP]
+
+[SADDLV]
+
+[SADDW]
+
+[SADDW2]
+
+[SCVTF]
+
+[SHAC1C]
+
+[SHA1H]
+
+[SHA1M]
+
+[SHA1P]
+
+[SHA1SU0]
+
+[SHA1SU1]
+
+[SHA256H2]
+
+[SHA256H]
+
+[SHA256SU0]
+
+[SHA256SU1]
+
+[SHADD]
+
+[SHL]
+
+[SHLL]
+
+[SHLL2]
+
+[SHRN]
+
+[SHRN2]
+
+[SHSUB]
+
+[SLI]
+
+[SMAX]
+
+[SMAXP]
+
+[SMAXC]
+
+[SMIN]
+
+[SMINP]
+
+[SMINV]
+
+[SMLAL]
+
+[SMLAL2]
+
+[SMLSL]
+
+[SMLSL2]
+
+[SMOV]
+
+[SMULL]
+
+[SMULL2]
+
+[SQABS]
+
+[SQADD]
+
+[SQDMLAL]
+
+[SQDMLAL2]
+
+[SQDMLSL]
+
+[SQDMLSL2]
+
+[SQDMULH]
+
+[SQDMULL]
+
+[SQDMULL2]
+
+[SQNEG]
+
+[SQRDMULH]
+
+[SQRSHL]
+
+[SQRSHRN]
+
+[SQRSHRN2]
+
+[SQRSHRUN]
+
+[SQRSHRUN2]
+
+[SQSHL]
+
+[SQSHLU]
+
+[SQSHRN]
+
+[SQSRHN2]
+
+[SQSHRUN]
+
+[SQSHRUN2]
+
+[SQSUB]
+
+[SQXTN]
+
+[SQXTN2]
+
+[SQXTUN]
+
+[SQXTUN2]
+
+[SRHQDD]
+
+[SRI]
+
+[SRSHL]
+
+[SRSHR]
+
+[SRSRA]
+
+[SSHL]
+
+[SSHLL]
+
+[SSHLL2]
+
+[SSHR]
+
+[SSRA]
+
+[SSUBL]
+
+[SSUBL2]
+
+[SSUBW]
+
+[SSUBW2]
+
+[ST1]
+
+[ST2]
+
+[ST3]
+
+[ST4]
+
+[SUBQADD]
+
+[SXTL]
+
+[TBL]
+
+[TBX]
+
+[TRN1]
+
+[TRN2]
+
+[UABA]
+
+[UABAL]
+
+[UABAL2]
+
+[UABD]
+
+[UABDL]
+
+[UABDL2]
+
+[UADALP]
+
+[UADDLL]
+
+[UADDLL2]
+
+[UADDLP]
+
+[UADDLV]
+
+[UADDW]
+
+[UADDW2]
+
+[UCVTF]
+
+[UHADD]
+
+[UHSUB]
+
+[UMAX]
+
+[UMAXP]
+
+[UMAXV]
+
+[UMIN]
+
+[UMINP]
+
+[UMINV]
+
+[UMLAL]
+
+[UMLAL2]
+
+[UMLSL]
+
+[UMLSL2]
 
 [UMOV]
 
+[UQADD]
+
+[UQRSHL]
+
+[UQRSHRN]
+
+[UQRSHRN2]
+
+[UQSHL]
+
+[UQSHRN]
+
+[UQSUB]
+
+[UQXTN]
+
+[UQXTN2]
+
+[URECPE]
+
+[URHADD]
+
+[URSHL]
+
+[URSHR]
+
+[URSQRTE]
+
+[URSRA]
+
+[USHL]
+
+[USHLL2]
+
+[USHR]
+
+[USQADD]
+
+[USRA]
+
+[USUBL]
+
+[USUBL2]
+
+[USUBW]
+
+[USUBW2]
+
+[UXTL]
+
+[UZP1]
+
+[UZP2]
+
+[XTN1]
+
+[XTN2]
+
+[ZIP1]
+
+[ZIP2]
+
+; Aliases
+; they are not generated by the compiler, they are only used for inline assembler
+[NOP]
+
+[ASR]
+
+[LSL]
+
+[LSR]
+
+[ROR]
+
+[NEG]
+
 [INS]
 
-[MOVI]

+ 266 - 56
compiler/aarch64/a64op.inc

@@ -47,18 +47,6 @@ A_STLR,
 A_LDAXR,
 A_STLXR,
 A_STLXP,
-A_LD1,
-A_LD2,
-A_LD3,
-A_LD4,
-A_ST1,
-A_ST2,
-A_ST3,
-A_ST4,
-A_LD1R,
-A_LD2R,
-A_LD3R,
-A_LD4R,
 A_PRFM,
 A_PRFUM,
 A_ADD,
@@ -111,32 +99,11 @@ A_CSINV,
 A_CSNEG,
 A_CCMN,
 A_CCMP,
-A_NOP,
-A_YIELD,
-A_WFE,
-A_WFI,
-A_SEV,
-A_SEVL,
-A_MOV,
-A_BFI,
-A_BFXIL,
-A_SBFIZ,
-A_SBFX,
-A_UBFIZ,
-A_UBFX,
-A_ASR,
-A_LSL,
-A_LSR,
-A_ROR,
-A_SXT,
-A_UXT,
-A_NEG,
 A_NGC,
 A_MVN,
 A_MNEG,
 A_MUL,
 A_SMNEGL,
-A_SMULL,
 A_UMNEGL,
 A_UMULL,
 A_CSET,
@@ -144,20 +111,102 @@ A_CSETM,
 A_CINC,
 A_CINV,
 A_CNEG,
-A_FMOV,
+A_SXTB,
+A_SXTH,
+A_SXTW,
+A_UXTB,
+A_UXTH,
+A_BFI,
+A_BFXIL,
+A_SBFIZ,
+A_SBFX,
+A_UBFIZ,
+A_UBFX,
+A_YIELD,
+A_WFE,
+A_WFI,
+A_SEV,
+A_SEVL,
+A_MOV,
+A_ADDHN,
+A_ADDHN2,
+A_ADDP,
+A_ADDV,
+A_AESD,
+A_AESE,
+A_AESIMC,
+A_EASMC,
+A_BIF,
+A_BIT,
+A_BSL,
+A_CMEQ,
+A_CMGE,
+A_CMGT,
+A_CMHI,
+A_CMHS,
+A_CMLE,
+A_CMLT,
+A_CMTST,
+A_CNT,
+A_DUP,
+A_EXT,
+A_FABD,
+A_BACGE,
+A_FABS,
+A_FACGT,
+A_FADD,
+A_FCCMP,
+A_FCCMPE,
+A_FCMEQ,
+A_FCMGE,
+A_FCMGT,
+A_FCMLE,
+A_FCMLT,
+A_FCMP,
+A_FCMPE,
+A_FCSEL,
 A_FCVT,
 A_FCVTAS,
 A_FCVTAU,
+A_FCVTL,
+A_FCVTL2,
 A_FCVTMS,
 A_FCVTMU,
 A_FCVTNS,
 A_FCVTNU,
 A_FCVTPS,
 A_FCVTPU,
+A_FCVTXN,
+A_FCVTXN2,
 A_FCVTZS,
 A_FCVTZU,
-A_SCVTF,
-A_UCVTF,
+A_FDIV,
+A_FMADD,
+A_FMAX,
+A_FMAXNM,
+A_FMAXNMP,
+A_FMANMV,
+A_FMAXP,
+A_FMAXV,
+A_FMIN,
+A_FMINNM,
+A_FMINNMP,
+A_FMINNMV,
+A_FMINP,
+A_FMINV,
+A_FMLA,
+A_FMLS,
+A_FMOV,
+A_FMSUB,
+A_FMUL,
+A_FMULX,
+A_FNEG,
+A_FNMADD,
+A_FNMSUB,
+A_FNMUL,
+A_FRECPE,
+A_FRECPS,
+A_FRECPX,
 A_FRINTA,
 A_FRINTI,
 A_FRINTM,
@@ -165,27 +214,188 @@ A_FRINTN,
 A_FRINTP,
 A_FRINTX,
 A_FRINTZ,
-A_FABS,
-A_FNEG,
+A_FRSQRTE,
+A_FRSQRTS,
 A_FSQRT,
-A_FADD,
-A_FDIV,
-A_FMUL,
-A_FNMUL,
 A_FSUB,
-A_FMAX,
-A_FMIN,
-A_FMINNM,
-A_FMADD,
-A_FMSUB,
-A_FNMADD,
-A_FNMSUB,
-A_FCMP,
-A_FCMPE,
-A_FCCMP,
-A_FCMMPE,
-A_FCSEL,
+A_LD1,
+A_LD1R,
+A_LD2,
+A_LD2R,
+A_LD3,
+A_LD3R,
+A_LD4,
+A_LD4R,
+A_MLA,
+A_MLS,
+A_MOVI,
+A_MVNI,
+A_PMUL,
+A_PMULL,
+A_PMULL2,
+A_RADDHN,
+A_RADDHN2,
+A_REV64,
+A_RSHRN,
+A_RSHRN2,
+A_SRUBHN,
+A_RSUBHN2,
+A_SABA,
+A_SABAL,
+A_SABAL2,
+A_SADALP,
+A_SADDL,
+A_SADDL2,
+A_SADDLP,
+A_SADDLV,
+A_SADDW,
+A_SADDW2,
+A_SCVTF,
+A_SHAC1C,
+A_SHA1H,
+A_SHA1M,
+A_SHA1P,
+A_SHA1SU0,
+A_SHA1SU1,
+A_SHA256H2,
+A_SHA256H,
+A_SHA256SU0,
+A_SHA256SU1,
+A_SHADD,
+A_SHL,
+A_SHLL,
+A_SHLL2,
+A_SHRN,
+A_SHRN2,
+A_SHSUB,
+A_SLI,
+A_SMAX,
+A_SMAXP,
+A_SMAXC,
+A_SMIN,
+A_SMINP,
+A_SMINV,
+A_SMLAL,
+A_SMLAL2,
+A_SMLSL,
+A_SMLSL2,
+A_SMOV,
+A_SMULL,
+A_SMULL2,
+A_SQABS,
+A_SQADD,
+A_SQDMLAL,
+A_SQDMLAL2,
+A_SQDMLSL,
+A_SQDMLSL2,
+A_SQDMULH,
+A_SQDMULL,
+A_SQDMULL2,
+A_SQNEG,
+A_SQRDMULH,
+A_SQRSHL,
+A_SQRSHRN,
+A_SQRSHRN2,
+A_SQRSHRUN,
+A_SQRSHRUN2,
+A_SQSHL,
+A_SQSHLU,
+A_SQSHRN,
+A_SQSRHN2,
+A_SQSHRUN,
+A_SQSHRUN2,
+A_SQSUB,
+A_SQXTN,
+A_SQXTN2,
+A_SQXTUN,
+A_SQXTUN2,
+A_SRHQDD,
+A_SRI,
+A_SRSHL,
+A_SRSHR,
+A_SRSRA,
+A_SSHL,
+A_SSHLL,
+A_SSHLL2,
+A_SSHR,
+A_SSRA,
+A_SSUBL,
+A_SSUBL2,
+A_SSUBW,
+A_SSUBW2,
+A_ST1,
+A_ST2,
+A_ST3,
+A_ST4,
+A_SUBQADD,
+A_SXTL,
+A_TBL,
+A_TBX,
+A_TRN1,
+A_TRN2,
+A_UABA,
+A_UABAL,
+A_UABAL2,
+A_UABD,
+A_UABDL,
+A_UABDL2,
+A_UADALP,
+A_UADDLL,
+A_UADDLL2,
+A_UADDLP,
+A_UADDLV,
+A_UADDW,
+A_UADDW2,
+A_UCVTF,
+A_UHADD,
+A_UHSUB,
+A_UMAX,
+A_UMAXP,
+A_UMAXV,
+A_UMIN,
+A_UMINP,
+A_UMINV,
+A_UMLAL,
+A_UMLAL2,
+A_UMLSL,
+A_UMLSL2,
 A_UMOV,
-A_INS,
-A_MOVI
+A_UQADD,
+A_UQRSHL,
+A_UQRSHRN,
+A_UQRSHRN2,
+A_UQSHL,
+A_UQSHRN,
+A_UQSUB,
+A_UQXTN,
+A_UQXTN2,
+A_URECPE,
+A_URHADD,
+A_URSHL,
+A_URSHR,
+A_URSQRTE,
+A_URSRA,
+A_USHL,
+A_USHLL2,
+A_USHR,
+A_USQADD,
+A_USRA,
+A_USUBL,
+A_USUBL2,
+A_USUBW,
+A_USUBW2,
+A_UXTL,
+A_UZP1,
+A_UZP2,
+A_XTN1,
+A_XTN2,
+A_ZIP1,
+A_ZIP2,
+A_NOP,
+A_ASR,
+A_LSL,
+A_LSR,
+A_ROR,
+A_NEG,
+A_INS
 );

+ 453 - 37
compiler/aarch64/a64reg.dat

@@ -76,170 +76,586 @@ XZR,$01,$05,$1F,xzr,31,31
 WSP,$01,$04,$20,wsp,31,31
 SP,$01,$05,$20,sp,31,31
 
+NZCV,$05,$00,$00,nzcv,0,0
+FPCR,$05,$00,$01,fpcr,0,0
+FPSR,$05,$00,$02,fpsr,0,0
+TPIDR_EL0,$05,$00,$03,tpidr_el0,0,0
 
 ; vfp registers
+; generated by fpc/compiler/utils/gena64vfp.pp to avoid tedious typing
 B0,$04,$01,$00,b0,64,64
 H0,$04,$03,$00,h0,64,64
 S0,$04,$09,$00,s0,64,64
 D0,$04,$0a,$00,d0,64,64
-Q0,$04,$05,$00,q0,64,64
+Q0,$04,$0b,$00,q0,64,64
+V0,$04,$00,$00,v0,64,64
+V0_B,$04,$20,$00,v0.b,64,64
+V0_H,$04,$21,$00,v0.h,64,64
+V0_S,$04,$22,$00,v0.s,64,64
+V0_D,$04,$23,$00,v0.d,64,64
+V0_8B,$04,$18,$00,v0.8b,64,64
+V0_16B,$04,$19,$00,v0.16b,64,64
+V0_4H,$04,$1a,$00,v0.4h,64,64
+V0_8H,$04,$1b,$00,v0.8h,64,64
+V0_2S,$04,$1c,$00,v0.2s,64,64
+V0_4S,$04,$1d,$00,v0.4s,64,64
+V0_1D,$04,$1e,$00,v0.1d,64,64
+V0_2D,$04,$1f,$00,v0.2d,64,64
 B1,$04,$01,$01,b1,65,65
 H1,$04,$03,$01,h1,65,65
 S1,$04,$09,$01,s1,65,65
 D1,$04,$0a,$01,d1,65,65
-Q1,$04,$05,$01,q1,65,65
+Q1,$04,$0b,$01,q1,65,65
+V1,$04,$00,$01,v1,65,65
+V1_B,$04,$20,$01,v1.b,65,65
+V1_H,$04,$21,$01,v1.h,65,65
+V1_S,$04,$22,$01,v1.s,65,65
+V1_D,$04,$23,$01,v1.d,65,65
+V1_8B,$04,$18,$01,v1.8b,65,65
+V1_16B,$04,$19,$01,v1.16b,65,65
+V1_4H,$04,$1a,$01,v1.4h,65,65
+V1_8H,$04,$1b,$01,v1.8h,65,65
+V1_2S,$04,$1c,$01,v1.2s,65,65
+V1_4S,$04,$1d,$01,v1.4s,65,65
+V1_1D,$04,$1e,$01,v1.1d,65,65
+V1_2D,$04,$1f,$01,v1.2d,65,65
 B2,$04,$01,$02,b2,66,66
 H2,$04,$03,$02,h2,66,66
 S2,$04,$09,$02,s2,66,66
 D2,$04,$0a,$02,d2,66,66
-Q2,$04,$05,$02,q2,66,66
+Q2,$04,$0b,$02,q2,66,66
+V2,$04,$00,$02,v2,66,66
+V2_B,$04,$20,$02,v2.b,66,66
+V2_H,$04,$21,$02,v2.h,66,66
+V2_S,$04,$22,$02,v2.s,66,66
+V2_D,$04,$23,$02,v2.d,66,66
+V2_8B,$04,$18,$02,v2.8b,66,66
+V2_16B,$04,$19,$02,v2.16b,66,66
+V2_4H,$04,$1a,$02,v2.4h,66,66
+V2_8H,$04,$1b,$02,v2.8h,66,66
+V2_2S,$04,$1c,$02,v2.2s,66,66
+V2_4S,$04,$1d,$02,v2.4s,66,66
+V2_1D,$04,$1e,$02,v2.1d,66,66
+V2_2D,$04,$1f,$02,v2.2d,66,66
 B3,$04,$01,$03,b3,67,67
 H3,$04,$03,$03,h3,67,67
 S3,$04,$09,$03,s3,67,67
 D3,$04,$0a,$03,d3,67,67
-Q3,$04,$05,$03,q3,67,67
+Q3,$04,$0b,$03,q3,67,67
+V3,$04,$00,$03,v3,67,67
+V3_B,$04,$20,$03,v3.b,67,67
+V3_H,$04,$21,$03,v3.h,67,67
+V3_S,$04,$22,$03,v3.s,67,67
+V3_D,$04,$23,$03,v3.d,67,67
+V3_8B,$04,$18,$03,v3.8b,67,67
+V3_16B,$04,$19,$03,v3.16b,67,67
+V3_4H,$04,$1a,$03,v3.4h,67,67
+V3_8H,$04,$1b,$03,v3.8h,67,67
+V3_2S,$04,$1c,$03,v3.2s,67,67
+V3_4S,$04,$1d,$03,v3.4s,67,67
+V3_1D,$04,$1e,$03,v3.1d,67,67
+V3_2D,$04,$1f,$03,v3.2d,67,67
 B4,$04,$01,$04,b4,68,68
 H4,$04,$03,$04,h4,68,68
 S4,$04,$09,$04,s4,68,68
 D4,$04,$0a,$04,d4,68,68
-Q4,$04,$05,$04,q4,68,68
+Q4,$04,$0b,$04,q4,68,68
+V4,$04,$00,$04,v4,68,68
+V4_B,$04,$20,$04,v4.b,68,68
+V4_H,$04,$21,$04,v4.h,68,68
+V4_S,$04,$22,$04,v4.s,68,68
+V4_D,$04,$23,$04,v4.d,68,68
+V4_8B,$04,$18,$04,v4.8b,68,68
+V4_16B,$04,$19,$04,v4.16b,68,68
+V4_4H,$04,$1a,$04,v4.4h,68,68
+V4_8H,$04,$1b,$04,v4.8h,68,68
+V4_2S,$04,$1c,$04,v4.2s,68,68
+V4_4S,$04,$1d,$04,v4.4s,68,68
+V4_1D,$04,$1e,$04,v4.1d,68,68
+V4_2D,$04,$1f,$04,v4.2d,68,68
 B5,$04,$01,$05,b5,69,69
 H5,$04,$03,$05,h5,69,69
 S5,$04,$09,$05,s5,69,69
 D5,$04,$0a,$05,d5,69,69
-Q5,$04,$05,$05,q5,69,69
+Q5,$04,$0b,$05,q5,69,69
+V5,$04,$00,$05,v5,69,69
+V5_B,$04,$20,$05,v5.b,69,69
+V5_H,$04,$21,$05,v5.h,69,69
+V5_S,$04,$22,$05,v5.s,69,69
+V5_D,$04,$23,$05,v5.d,69,69
+V5_8B,$04,$18,$05,v5.8b,69,69
+V5_16B,$04,$19,$05,v5.16b,69,69
+V5_4H,$04,$1a,$05,v5.4h,69,69
+V5_8H,$04,$1b,$05,v5.8h,69,69
+V5_2S,$04,$1c,$05,v5.2s,69,69
+V5_4S,$04,$1d,$05,v5.4s,69,69
+V5_1D,$04,$1e,$05,v5.1d,69,69
+V5_2D,$04,$1f,$05,v5.2d,69,69
 B6,$04,$01,$06,b6,70,70
 H6,$04,$03,$06,h6,70,70
 S6,$04,$09,$06,s6,70,70
 D6,$04,$0a,$06,d6,70,70
-Q6,$04,$05,$06,q6,70,70
+Q6,$04,$0b,$06,q6,70,70
+V6,$04,$00,$06,v6,70,70
+V6_B,$04,$20,$06,v6.b,70,70
+V6_H,$04,$21,$06,v6.h,70,70
+V6_S,$04,$22,$06,v6.s,70,70
+V6_D,$04,$23,$06,v6.d,70,70
+V6_8B,$04,$18,$06,v6.8b,70,70
+V6_16B,$04,$19,$06,v6.16b,70,70
+V6_4H,$04,$1a,$06,v6.4h,70,70
+V6_8H,$04,$1b,$06,v6.8h,70,70
+V6_2S,$04,$1c,$06,v6.2s,70,70
+V6_4S,$04,$1d,$06,v6.4s,70,70
+V6_1D,$04,$1e,$06,v6.1d,70,70
+V6_2D,$04,$1f,$06,v6.2d,70,70
 B7,$04,$01,$07,b7,71,71
 H7,$04,$03,$07,h7,71,71
 S7,$04,$09,$07,s7,71,71
 D7,$04,$0a,$07,d7,71,71
-Q7,$04,$05,$07,q7,71,71
+Q7,$04,$0b,$07,q7,71,71
+V7,$04,$00,$07,v7,71,71
+V7_B,$04,$20,$07,v7.b,71,71
+V7_H,$04,$21,$07,v7.h,71,71
+V7_S,$04,$22,$07,v7.s,71,71
+V7_D,$04,$23,$07,v7.d,71,71
+V7_8B,$04,$18,$07,v7.8b,71,71
+V7_16B,$04,$19,$07,v7.16b,71,71
+V7_4H,$04,$1a,$07,v7.4h,71,71
+V7_8H,$04,$1b,$07,v7.8h,71,71
+V7_2S,$04,$1c,$07,v7.2s,71,71
+V7_4S,$04,$1d,$07,v7.4s,71,71
+V7_1D,$04,$1e,$07,v7.1d,71,71
+V7_2D,$04,$1f,$07,v7.2d,71,71
 B8,$04,$01,$08,b8,72,72
 H8,$04,$03,$08,h8,72,72
 S8,$04,$09,$08,s8,72,72
 D8,$04,$0a,$08,d8,72,72
-Q8,$04,$05,$08,q8,72,72
+Q8,$04,$0b,$08,q8,72,72
+V8,$04,$00,$08,v8,72,72
+V8_B,$04,$20,$08,v8.b,72,72
+V8_H,$04,$21,$08,v8.h,72,72
+V8_S,$04,$22,$08,v8.s,72,72
+V8_D,$04,$23,$08,v8.d,72,72
+V8_8B,$04,$18,$08,v8.8b,72,72
+V8_16B,$04,$19,$08,v8.16b,72,72
+V8_4H,$04,$1a,$08,v8.4h,72,72
+V8_8H,$04,$1b,$08,v8.8h,72,72
+V8_2S,$04,$1c,$08,v8.2s,72,72
+V8_4S,$04,$1d,$08,v8.4s,72,72
+V8_1D,$04,$1e,$08,v8.1d,72,72
+V8_2D,$04,$1f,$08,v8.2d,72,72
 B9,$04,$01,$09,b9,73,73
 H9,$04,$03,$09,h9,73,73
 S9,$04,$09,$09,s9,73,73
 D9,$04,$0a,$09,d9,73,73
-Q9,$04,$05,$09,q9,73,73
+Q9,$04,$0b,$09,q9,73,73
+V9,$04,$00,$09,v9,73,73
+V9_B,$04,$20,$09,v9.b,73,73
+V9_H,$04,$21,$09,v9.h,73,73
+V9_S,$04,$22,$09,v9.s,73,73
+V9_D,$04,$23,$09,v9.d,73,73
+V9_8B,$04,$18,$09,v9.8b,73,73
+V9_16B,$04,$19,$09,v9.16b,73,73
+V9_4H,$04,$1a,$09,v9.4h,73,73
+V9_8H,$04,$1b,$09,v9.8h,73,73
+V9_2S,$04,$1c,$09,v9.2s,73,73
+V9_4S,$04,$1d,$09,v9.4s,73,73
+V9_1D,$04,$1e,$09,v9.1d,73,73
+V9_2D,$04,$1f,$09,v9.2d,73,73
 B10,$04,$01,$0A,b10,74,74
 H10,$04,$03,$0A,h10,74,74
 S10,$04,$09,$0A,s10,74,74
 D10,$04,$0a,$0A,d10,74,74
-Q10,$04,$05,$0A,q10,74,74
+Q10,$04,$0b,$0A,q10,74,74
+V10,$04,$00,$0A,v10,74,74
+V10_B,$04,$20,$0A,v10.b,74,74
+V10_H,$04,$21,$0A,v10.h,74,74
+V10_S,$04,$22,$0A,v10.s,74,74
+V10_D,$04,$23,$0A,v10.d,74,74
+V10_8B,$04,$18,$0A,v10.8b,74,74
+V10_16B,$04,$19,$0A,v10.16b,74,74
+V10_4H,$04,$1a,$0A,v10.4h,74,74
+V10_8H,$04,$1b,$0A,v10.8h,74,74
+V10_2S,$04,$1c,$0A,v10.2s,74,74
+V10_4S,$04,$1d,$0A,v10.4s,74,74
+V10_1D,$04,$1e,$0A,v10.1d,74,74
+V10_2D,$04,$1f,$0A,v10.2d,74,74
 B11,$04,$01,$0B,b11,75,75
 H11,$04,$03,$0B,h11,75,75
 S11,$04,$09,$0B,s11,75,75
 D11,$04,$0a,$0B,d11,75,75
-Q11,$04,$05,$0B,q11,75,75
+Q11,$04,$0b,$0B,q11,75,75
+V11,$04,$00,$0B,v11,75,75
+V11_B,$04,$20,$0B,v11.b,75,75
+V11_H,$04,$21,$0B,v11.h,75,75
+V11_S,$04,$22,$0B,v11.s,75,75
+V11_D,$04,$23,$0B,v11.d,75,75
+V11_8B,$04,$18,$0B,v11.8b,75,75
+V11_16B,$04,$19,$0B,v11.16b,75,75
+V11_4H,$04,$1a,$0B,v11.4h,75,75
+V11_8H,$04,$1b,$0B,v11.8h,75,75
+V11_2S,$04,$1c,$0B,v11.2s,75,75
+V11_4S,$04,$1d,$0B,v11.4s,75,75
+V11_1D,$04,$1e,$0B,v11.1d,75,75
+V11_2D,$04,$1f,$0B,v11.2d,75,75
 B12,$04,$01,$0C,b12,76,76
 H12,$04,$03,$0C,h12,76,76
 S12,$04,$09,$0C,s12,76,76
 D12,$04,$0a,$0C,d12,76,76
-Q12,$04,$05,$0C,q12,76,76
+Q12,$04,$0b,$0C,q12,76,76
+V12,$04,$00,$0C,v12,76,76
+V12_B,$04,$20,$0C,v12.b,76,76
+V12_H,$04,$21,$0C,v12.h,76,76
+V12_S,$04,$22,$0C,v12.s,76,76
+V12_D,$04,$23,$0C,v12.d,76,76
+V12_8B,$04,$18,$0C,v12.8b,76,76
+V12_16B,$04,$19,$0C,v12.16b,76,76
+V12_4H,$04,$1a,$0C,v12.4h,76,76
+V12_8H,$04,$1b,$0C,v12.8h,76,76
+V12_2S,$04,$1c,$0C,v12.2s,76,76
+V12_4S,$04,$1d,$0C,v12.4s,76,76
+V12_1D,$04,$1e,$0C,v12.1d,76,76
+V12_2D,$04,$1f,$0C,v12.2d,76,76
 B13,$04,$01,$0D,b13,77,77
 H13,$04,$03,$0D,h13,77,77
 S13,$04,$09,$0D,s13,77,77
 D13,$04,$0a,$0D,d13,77,77
-Q13,$04,$05,$0D,q13,77,77
+Q13,$04,$0b,$0D,q13,77,77
+V13,$04,$00,$0D,v13,77,77
+V13_B,$04,$20,$0D,v13.b,77,77
+V13_H,$04,$21,$0D,v13.h,77,77
+V13_S,$04,$22,$0D,v13.s,77,77
+V13_D,$04,$23,$0D,v13.d,77,77
+V13_8B,$04,$18,$0D,v13.8b,77,77
+V13_16B,$04,$19,$0D,v13.16b,77,77
+V13_4H,$04,$1a,$0D,v13.4h,77,77
+V13_8H,$04,$1b,$0D,v13.8h,77,77
+V13_2S,$04,$1c,$0D,v13.2s,77,77
+V13_4S,$04,$1d,$0D,v13.4s,77,77
+V13_1D,$04,$1e,$0D,v13.1d,77,77
+V13_2D,$04,$1f,$0D,v13.2d,77,77
 B14,$04,$01,$0E,b14,78,78
 H14,$04,$03,$0E,h14,78,78
 S14,$04,$09,$0E,s14,78,78
 D14,$04,$0a,$0E,d14,78,78
-Q14,$04,$05,$0E,q14,78,78
+Q14,$04,$0b,$0E,q14,78,78
+V14,$04,$00,$0E,v14,78,78
+V14_B,$04,$20,$0E,v14.b,78,78
+V14_H,$04,$21,$0E,v14.h,78,78
+V14_S,$04,$22,$0E,v14.s,78,78
+V14_D,$04,$23,$0E,v14.d,78,78
+V14_8B,$04,$18,$0E,v14.8b,78,78
+V14_16B,$04,$19,$0E,v14.16b,78,78
+V14_4H,$04,$1a,$0E,v14.4h,78,78
+V14_8H,$04,$1b,$0E,v14.8h,78,78
+V14_2S,$04,$1c,$0E,v14.2s,78,78
+V14_4S,$04,$1d,$0E,v14.4s,78,78
+V14_1D,$04,$1e,$0E,v14.1d,78,78
+V14_2D,$04,$1f,$0E,v14.2d,78,78
 B15,$04,$01,$0F,b15,79,79
 H15,$04,$03,$0F,h15,79,79
 S15,$04,$09,$0F,s15,79,79
 D15,$04,$0a,$0F,d15,79,79
-Q15,$04,$05,$0F,q15,79,79
+Q15,$04,$0b,$0F,q15,79,79
+V15,$04,$00,$0F,v15,79,79
+V15_B,$04,$20,$0F,v15.b,79,79
+V15_H,$04,$21,$0F,v15.h,79,79
+V15_S,$04,$22,$0F,v15.s,79,79
+V15_D,$04,$23,$0F,v15.d,79,79
+V15_8B,$04,$18,$0F,v15.8b,79,79
+V15_16B,$04,$19,$0F,v15.16b,79,79
+V15_4H,$04,$1a,$0F,v15.4h,79,79
+V15_8H,$04,$1b,$0F,v15.8h,79,79
+V15_2S,$04,$1c,$0F,v15.2s,79,79
+V15_4S,$04,$1d,$0F,v15.4s,79,79
+V15_1D,$04,$1e,$0F,v15.1d,79,79
+V15_2D,$04,$1f,$0F,v15.2d,79,79
 B16,$04,$01,$10,b16,80,80
 H16,$04,$03,$10,h16,80,80
 S16,$04,$09,$10,s16,80,80
 D16,$04,$0a,$10,d16,80,80
-Q16,$04,$05,$10,q16,80,80
+Q16,$04,$0b,$10,q16,80,80
+V16,$04,$00,$10,v16,80,80
+V16_B,$04,$20,$10,v16.b,80,80
+V16_H,$04,$21,$10,v16.h,80,80
+V16_S,$04,$22,$10,v16.s,80,80
+V16_D,$04,$23,$10,v16.d,80,80
+V16_8B,$04,$18,$10,v16.8b,80,80
+V16_16B,$04,$19,$10,v16.16b,80,80
+V16_4H,$04,$1a,$10,v16.4h,80,80
+V16_8H,$04,$1b,$10,v16.8h,80,80
+V16_2S,$04,$1c,$10,v16.2s,80,80
+V16_4S,$04,$1d,$10,v16.4s,80,80
+V16_1D,$04,$1e,$10,v16.1d,80,80
+V16_2D,$04,$1f,$10,v16.2d,80,80
 B17,$04,$01,$11,b17,81,81
 H17,$04,$03,$11,h17,81,81
 S17,$04,$09,$11,s17,81,81
 D17,$04,$0a,$11,d17,81,81
-Q17,$04,$05,$11,q17,81,81
+Q17,$04,$0b,$11,q17,81,81
+V17,$04,$00,$11,v17,81,81
+V17_B,$04,$20,$11,v17.b,81,81
+V17_H,$04,$21,$11,v17.h,81,81
+V17_S,$04,$22,$11,v17.s,81,81
+V17_D,$04,$23,$11,v17.d,81,81
+V17_8B,$04,$18,$11,v17.8b,81,81
+V17_16B,$04,$19,$11,v17.16b,81,81
+V17_4H,$04,$1a,$11,v17.4h,81,81
+V17_8H,$04,$1b,$11,v17.8h,81,81
+V17_2S,$04,$1c,$11,v17.2s,81,81
+V17_4S,$04,$1d,$11,v17.4s,81,81
+V17_1D,$04,$1e,$11,v17.1d,81,81
+V17_2D,$04,$1f,$11,v17.2d,81,81
 B18,$04,$01,$12,b18,82,82
 H18,$04,$03,$12,h18,82,82
 S18,$04,$09,$12,s18,82,82
 D18,$04,$0a,$12,d18,82,82
-Q18,$04,$05,$12,q18,82,82
+Q18,$04,$0b,$12,q18,82,82
+V18,$04,$00,$12,v18,82,82
+V18_B,$04,$20,$12,v18.b,82,82
+V18_H,$04,$21,$12,v18.h,82,82
+V18_S,$04,$22,$12,v18.s,82,82
+V18_D,$04,$23,$12,v18.d,82,82
+V18_8B,$04,$18,$12,v18.8b,82,82
+V18_16B,$04,$19,$12,v18.16b,82,82
+V18_4H,$04,$1a,$12,v18.4h,82,82
+V18_8H,$04,$1b,$12,v18.8h,82,82
+V18_2S,$04,$1c,$12,v18.2s,82,82
+V18_4S,$04,$1d,$12,v18.4s,82,82
+V18_1D,$04,$1e,$12,v18.1d,82,82
+V18_2D,$04,$1f,$12,v18.2d,82,82
 B19,$04,$01,$13,b19,83,83
 H19,$04,$03,$13,h19,83,83
 S19,$04,$09,$13,s19,83,83
 D19,$04,$0a,$13,d19,83,83
-Q19,$04,$05,$13,q19,83,83
+Q19,$04,$0b,$13,q19,83,83
+V19,$04,$00,$13,v19,83,83
+V19_B,$04,$20,$13,v19.b,83,83
+V19_H,$04,$21,$13,v19.h,83,83
+V19_S,$04,$22,$13,v19.s,83,83
+V19_D,$04,$23,$13,v19.d,83,83
+V19_8B,$04,$18,$13,v19.8b,83,83
+V19_16B,$04,$19,$13,v19.16b,83,83
+V19_4H,$04,$1a,$13,v19.4h,83,83
+V19_8H,$04,$1b,$13,v19.8h,83,83
+V19_2S,$04,$1c,$13,v19.2s,83,83
+V19_4S,$04,$1d,$13,v19.4s,83,83
+V19_1D,$04,$1e,$13,v19.1d,83,83
+V19_2D,$04,$1f,$13,v19.2d,83,83
 B20,$04,$01,$14,b20,84,84
 H20,$04,$03,$14,h20,84,84
 S20,$04,$09,$14,s20,84,84
 D20,$04,$0a,$14,d20,84,84
-Q20,$04,$05,$14,q20,84,84
+Q20,$04,$0b,$14,q20,84,84
+V20,$04,$00,$14,v20,84,84
+V20_B,$04,$20,$14,v20.b,84,84
+V20_H,$04,$21,$14,v20.h,84,84
+V20_S,$04,$22,$14,v20.s,84,84
+V20_D,$04,$23,$14,v20.d,84,84
+V20_8B,$04,$18,$14,v20.8b,84,84
+V20_16B,$04,$19,$14,v20.16b,84,84
+V20_4H,$04,$1a,$14,v20.4h,84,84
+V20_8H,$04,$1b,$14,v20.8h,84,84
+V20_2S,$04,$1c,$14,v20.2s,84,84
+V20_4S,$04,$1d,$14,v20.4s,84,84
+V20_1D,$04,$1e,$14,v20.1d,84,84
+V20_2D,$04,$1f,$14,v20.2d,84,84
 B21,$04,$01,$15,b21,85,85
 H21,$04,$03,$15,h21,85,85
 S21,$04,$09,$15,s21,85,85
 D21,$04,$0a,$15,d21,85,85
-Q21,$04,$05,$15,q21,85,85
+Q21,$04,$0b,$15,q21,85,85
+V21,$04,$00,$15,v21,85,85
+V21_B,$04,$20,$15,v21.b,85,85
+V21_H,$04,$21,$15,v21.h,85,85
+V21_S,$04,$22,$15,v21.s,85,85
+V21_D,$04,$23,$15,v21.d,85,85
+V21_8B,$04,$18,$15,v21.8b,85,85
+V21_16B,$04,$19,$15,v21.16b,85,85
+V21_4H,$04,$1a,$15,v21.4h,85,85
+V21_8H,$04,$1b,$15,v21.8h,85,85
+V21_2S,$04,$1c,$15,v21.2s,85,85
+V21_4S,$04,$1d,$15,v21.4s,85,85
+V21_1D,$04,$1e,$15,v21.1d,85,85
+V21_2D,$04,$1f,$15,v21.2d,85,85
 B22,$04,$01,$16,b22,86,86
 H22,$04,$03,$16,h22,86,86
 S22,$04,$09,$16,s22,86,86
 D22,$04,$0a,$16,d22,86,86
-Q22,$04,$05,$16,q22,86,86
+Q22,$04,$0b,$16,q22,86,86
+V22,$04,$00,$16,v22,86,86
+V22_B,$04,$20,$16,v22.b,86,86
+V22_H,$04,$21,$16,v22.h,86,86
+V22_S,$04,$22,$16,v22.s,86,86
+V22_D,$04,$23,$16,v22.d,86,86
+V22_8B,$04,$18,$16,v22.8b,86,86
+V22_16B,$04,$19,$16,v22.16b,86,86
+V22_4H,$04,$1a,$16,v22.4h,86,86
+V22_8H,$04,$1b,$16,v22.8h,86,86
+V22_2S,$04,$1c,$16,v22.2s,86,86
+V22_4S,$04,$1d,$16,v22.4s,86,86
+V22_1D,$04,$1e,$16,v22.1d,86,86
+V22_2D,$04,$1f,$16,v22.2d,86,86
 B23,$04,$01,$17,b23,87,87
 H23,$04,$03,$17,h23,87,87
 S23,$04,$09,$17,s23,87,87
 D23,$04,$0a,$17,d23,87,87
-Q23,$04,$05,$17,q23,87,87
+Q23,$04,$0b,$17,q23,87,87
+V23,$04,$00,$17,v23,87,87
+V23_B,$04,$20,$17,v23.b,87,87
+V23_H,$04,$21,$17,v23.h,87,87
+V23_S,$04,$22,$17,v23.s,87,87
+V23_D,$04,$23,$17,v23.d,87,87
+V23_8B,$04,$18,$17,v23.8b,87,87
+V23_16B,$04,$19,$17,v23.16b,87,87
+V23_4H,$04,$1a,$17,v23.4h,87,87
+V23_8H,$04,$1b,$17,v23.8h,87,87
+V23_2S,$04,$1c,$17,v23.2s,87,87
+V23_4S,$04,$1d,$17,v23.4s,87,87
+V23_1D,$04,$1e,$17,v23.1d,87,87
+V23_2D,$04,$1f,$17,v23.2d,87,87
 B24,$04,$01,$18,b24,88,88
 H24,$04,$03,$18,h24,88,88
 S24,$04,$09,$18,s24,88,88
 D24,$04,$0a,$18,d24,88,88
-Q24,$04,$05,$18,q24,88,88
+Q24,$04,$0b,$18,q24,88,88
+V24,$04,$00,$18,v24,88,88
+V24_B,$04,$20,$18,v24.b,88,88
+V24_H,$04,$21,$18,v24.h,88,88
+V24_S,$04,$22,$18,v24.s,88,88
+V24_D,$04,$23,$18,v24.d,88,88
+V24_8B,$04,$18,$18,v24.8b,88,88
+V24_16B,$04,$19,$18,v24.16b,88,88
+V24_4H,$04,$1a,$18,v24.4h,88,88
+V24_8H,$04,$1b,$18,v24.8h,88,88
+V24_2S,$04,$1c,$18,v24.2s,88,88
+V24_4S,$04,$1d,$18,v24.4s,88,88
+V24_1D,$04,$1e,$18,v24.1d,88,88
+V24_2D,$04,$1f,$18,v24.2d,88,88
 B25,$04,$01,$19,b25,89,89
 H25,$04,$03,$19,h25,89,89
 S25,$04,$09,$19,s25,89,89
 D25,$04,$0a,$19,d25,89,89
-Q25,$04,$05,$19,q25,89,89
+Q25,$04,$0b,$19,q25,89,89
+V25,$04,$00,$19,v25,89,89
+V25_B,$04,$20,$19,v25.b,89,89
+V25_H,$04,$21,$19,v25.h,89,89
+V25_S,$04,$22,$19,v25.s,89,89
+V25_D,$04,$23,$19,v25.d,89,89
+V25_8B,$04,$18,$19,v25.8b,89,89
+V25_16B,$04,$19,$19,v25.16b,89,89
+V25_4H,$04,$1a,$19,v25.4h,89,89
+V25_8H,$04,$1b,$19,v25.8h,89,89
+V25_2S,$04,$1c,$19,v25.2s,89,89
+V25_4S,$04,$1d,$19,v25.4s,89,89
+V25_1D,$04,$1e,$19,v25.1d,89,89
+V25_2D,$04,$1f,$19,v25.2d,89,89
 B26,$04,$01,$1A,b26,90,90
 H26,$04,$03,$1A,h26,90,90
 S26,$04,$09,$1A,s26,90,90
 D26,$04,$0a,$1A,d26,90,90
-Q26,$04,$05,$1A,q26,90,90
+Q26,$04,$0b,$1A,q26,90,90
+V26,$04,$00,$1A,v26,90,90
+V26_B,$04,$20,$1A,v26.b,90,90
+V26_H,$04,$21,$1A,v26.h,90,90
+V26_S,$04,$22,$1A,v26.s,90,90
+V26_D,$04,$23,$1A,v26.d,90,90
+V26_8B,$04,$18,$1A,v26.8b,90,90
+V26_16B,$04,$19,$1A,v26.16b,90,90
+V26_4H,$04,$1a,$1A,v26.4h,90,90
+V26_8H,$04,$1b,$1A,v26.8h,90,90
+V26_2S,$04,$1c,$1A,v26.2s,90,90
+V26_4S,$04,$1d,$1A,v26.4s,90,90
+V26_1D,$04,$1e,$1A,v26.1d,90,90
+V26_2D,$04,$1f,$1A,v26.2d,90,90
 B27,$04,$01,$1B,b27,91,91
 H27,$04,$03,$1B,h27,91,91
 S27,$04,$09,$1B,s27,91,91
 D27,$04,$0a,$1B,d27,91,91
-Q27,$04,$05,$1B,q27,91,91
+Q27,$04,$0b,$1B,q27,91,91
+V27,$04,$00,$1B,v27,91,91
+V27_B,$04,$20,$1B,v27.b,91,91
+V27_H,$04,$21,$1B,v27.h,91,91
+V27_S,$04,$22,$1B,v27.s,91,91
+V27_D,$04,$23,$1B,v27.d,91,91
+V27_8B,$04,$18,$1B,v27.8b,91,91
+V27_16B,$04,$19,$1B,v27.16b,91,91
+V27_4H,$04,$1a,$1B,v27.4h,91,91
+V27_8H,$04,$1b,$1B,v27.8h,91,91
+V27_2S,$04,$1c,$1B,v27.2s,91,91
+V27_4S,$04,$1d,$1B,v27.4s,91,91
+V27_1D,$04,$1e,$1B,v27.1d,91,91
+V27_2D,$04,$1f,$1B,v27.2d,91,91
 B28,$04,$01,$1C,b28,92,92
 H28,$04,$03,$1C,h28,92,92
 S28,$04,$09,$1C,s28,92,92
 D28,$04,$0a,$1C,d28,92,92
-Q28,$04,$05,$1C,q28,92,92
+Q28,$04,$0b,$1C,q28,92,92
+V28,$04,$00,$1C,v28,92,92
+V28_B,$04,$20,$1C,v28.b,92,92
+V28_H,$04,$21,$1C,v28.h,92,92
+V28_S,$04,$22,$1C,v28.s,92,92
+V28_D,$04,$23,$1C,v28.d,92,92
+V28_8B,$04,$18,$1C,v28.8b,92,92
+V28_16B,$04,$19,$1C,v28.16b,92,92
+V28_4H,$04,$1a,$1C,v28.4h,92,92
+V28_8H,$04,$1b,$1C,v28.8h,92,92
+V28_2S,$04,$1c,$1C,v28.2s,92,92
+V28_4S,$04,$1d,$1C,v28.4s,92,92
+V28_1D,$04,$1e,$1C,v28.1d,92,92
+V28_2D,$04,$1f,$1C,v28.2d,92,92
 B29,$04,$01,$1D,b29,93,93
 H29,$04,$03,$1D,h29,93,93
 S29,$04,$09,$1D,s29,93,93
 D29,$04,$0a,$1D,d29,93,93
-Q29,$04,$05,$1D,q29,93,93
+Q29,$04,$0b,$1D,q29,93,93
+V29,$04,$00,$1D,v29,93,93
+V29_B,$04,$20,$1D,v29.b,93,93
+V29_H,$04,$21,$1D,v29.h,93,93
+V29_S,$04,$22,$1D,v29.s,93,93
+V29_D,$04,$23,$1D,v29.d,93,93
+V29_8B,$04,$18,$1D,v29.8b,93,93
+V29_16B,$04,$19,$1D,v29.16b,93,93
+V29_4H,$04,$1a,$1D,v29.4h,93,93
+V29_8H,$04,$1b,$1D,v29.8h,93,93
+V29_2S,$04,$1c,$1D,v29.2s,93,93
+V29_4S,$04,$1d,$1D,v29.4s,93,93
+V29_1D,$04,$1e,$1D,v29.1d,93,93
+V29_2D,$04,$1f,$1D,v29.2d,93,93
 B30,$04,$01,$1E,b30,94,94
 H30,$04,$03,$1E,h30,94,94
 S30,$04,$09,$1E,s30,94,94
 D30,$04,$0a,$1E,d30,94,94
-Q30,$04,$05,$1E,q30,94,94
+Q30,$04,$0b,$1E,q30,94,94
+V30,$04,$00,$1E,v30,94,94
+V30_B,$04,$20,$1E,v30.b,94,94
+V30_H,$04,$21,$1E,v30.h,94,94
+V30_S,$04,$22,$1E,v30.s,94,94
+V30_D,$04,$23,$1E,v30.d,94,94
+V30_8B,$04,$18,$1E,v30.8b,94,94
+V30_16B,$04,$19,$1E,v30.16b,94,94
+V30_4H,$04,$1a,$1E,v30.4h,94,94
+V30_8H,$04,$1b,$1E,v30.8h,94,94
+V30_2S,$04,$1c,$1E,v30.2s,94,94
+V30_4S,$04,$1d,$1E,v30.4s,94,94
+V30_1D,$04,$1e,$1E,v30.1d,94,94
+V30_2D,$04,$1f,$1E,v30.2d,94,94
 B31,$04,$01,$1F,b31,95,95
 H31,$04,$03,$1F,h31,95,95
 S31,$04,$09,$1F,s31,95,95
 D31,$04,$0a,$1F,d31,95,95
-Q31,$04,$05,$1F,q31,95,95
-
-NZCV,$05,$00,$00,nzcv,0,0
-FPCR,$05,$00,$01,fpcr,0,0
-FPSR,$05,$00,$02,fpsr,0,0
-TPIDR_EL0,$05,$00,$03,tpidr_el0,0,0
+Q31,$04,$0b,$1F,q31,95,95
+V31,$04,$00,$1F,v31,95,95
+V31_B,$04,$20,$1F,v31.b,95,95
+V31_H,$04,$21,$1F,v31.h,95,95
+V31_S,$04,$22,$1F,v31.s,95,95
+V31_D,$04,$23,$1F,v31.d,95,95
+V31_8B,$04,$18,$1F,v31.8b,95,95
+V31_16B,$04,$19,$1F,v31.16b,95,95
+V31_4H,$04,$1a,$1F,v31.4h,95,95
+V31_8H,$04,$1b,$1F,v31.8h,95,95
+V31_2S,$04,$1c,$1F,v31.2s,95,95
+V31_4S,$04,$1d,$1F,v31.4s,95,95
+V31_1D,$04,$1e,$1F,v31.1d,95,95
+V31_2D,$04,$1f,$1F,v31.2d,95,95

+ 254 - 38
compiler/aarch64/aasmcpu.pas

@@ -157,6 +157,10 @@ uses
          oppostfix : TOpPostfix;
          procedure loadshifterop(opidx:longint;const so:tshifterop);
          procedure loadconditioncode(opidx: longint; const c: tasmcond);
+         procedure loadrealconst(opidx: longint; const _value: bestreal);
+         procedure loadregset(opidx: longint; _basereg: tregister; _nregs: byte; _regsetindex: byte = 255);
+         procedure loadindexedreg(opidx: longint; _indexedreg: tregister; _regindex: byte);
+
          constructor op_none(op : tasmop);
 
          constructor op_reg(op : tasmop;_op1 : tregister);
@@ -168,6 +172,10 @@ uses
          constructor op_reg_cond(op: tasmop; _op1: tregister; _op2: tasmcond);
          constructor op_reg_const(op:tasmop; _op1: tregister; _op2: aint);
          constructor op_reg_const_shifterop(op : tasmop;_op1: tregister; _op2: aint;_op3 : tshifterop);
+         constructor op_reg_realconst(op: tasmop; _op1: tregister; _op2: bestreal);
+
+         constructor op_indexedreg_reg(op : tasmop;_op1: tregister; _op1index: byte; _op2 : tregister);
+         constructor op_reg_indexedreg(op : tasmop;_op1: tregister; _op2 : tregister; _op2index: byte);
 
          constructor op_reg_reg_reg(op : tasmop;_op1,_op2,_op3 : tregister);
          constructor op_reg_reg_reg_reg(op : tasmop;_op1,_op2,_op3,_op4 : tregister);
@@ -180,10 +188,14 @@ uses
          constructor op_reg_reg_reg_shifterop(op : tasmop;_op1,_op2,_op3 : tregister; const _op4 : tshifterop);
          constructor op_reg_reg_reg_cond(op : tasmop;_op1,_op2,_op3 : tregister; const _op4: tasmcond);
 
+         constructor op_const_ref(op:tasmop; _op1: aint; _op2: treference);
 
          { this is for Jmp instructions }
          constructor op_cond_sym(op : tasmop;cond:TAsmCond;_op1 : tasmsymbol);
 
+         { ldN(r)/stN }
+         constructor op_regset_reg_ref(op: tasmop; basereg: tregister; nregs: byte; const ref: treference);
+
          constructor op_sym(op : tasmop;_op1 : tasmsymbol);
          constructor op_sym_ofs(op : tasmop;_op1 : tasmsymbol;_op1ofs:longint);
          constructor op_reg_sym_ofs(op : tasmop;_op1 : tregister;_op2:tasmsymbol;_op2ofs : longint);
@@ -280,6 +292,48 @@ implementation
       end;
 
 
+    procedure taicpu.loadrealconst(opidx:longint;const _value:bestreal);
+      begin
+        allocate_oper(opidx+1);
+        with oper[opidx]^ do
+          begin
+            if typ<>top_realconst then
+              clearop(opidx);
+            val_real:=_value;
+            typ:=top_realconst;
+          end;
+      end;
+
+
+    procedure taicpu.loadregset(opidx: longint; _basereg: tregister; _nregs: byte; _regsetindex: byte = 255);
+      begin
+        allocate_oper(opidx+1);
+        with oper[opidx]^ do
+          begin
+            if typ<>top_regset then
+              clearop(opidx);
+            basereg:=_basereg;
+            nregs:=_nregs;
+            regsetindex:=_regsetindex;
+            typ:=top_regset;
+          end;
+      end;
+
+
+    procedure taicpu.loadindexedreg(opidx: longint; _indexedreg: tregister; _regindex: byte);
+      begin
+        allocate_oper(opidx+1);
+        with oper[opidx]^ do
+          begin
+            if typ<>top_indexedreg then
+              clearop(opidx);
+            indexedreg:=_indexedreg;
+            regindex:=_regindex;
+            typ:=top_indexedreg;
+          end;
+      end;
+
+
 {*****************************************************************************
                                  taicpu Constructors
 *****************************************************************************}
@@ -382,6 +436,33 @@ implementation
       end;
 
 
+    constructor taicpu.op_reg_realconst(op : tasmop; _op1 : tregister; _op2 : bestreal);
+      begin
+         inherited create(op);
+         ops:=2;
+         loadreg(0,_op1);
+         loadrealconst(1,_op2);
+      end;
+
+
+    constructor taicpu.op_indexedreg_reg(op: tasmop; _op1: tregister; _op1index: byte; _op2: tregister);
+      begin
+        inherited create(op);
+        ops:=2;
+        loadindexedreg(0,_op1,_op1index);
+        loadreg(1,_op2);
+      end;
+
+
+    constructor taicpu.op_reg_indexedreg(op: tasmop; _op1: tregister; _op2: tregister; _op2index: byte);
+      begin
+        inherited create(op);
+        ops:=2;
+        loadreg(0,_op1);
+        loadindexedreg(1,_op2,_op2index);
+      end;
+
+
      constructor taicpu.op_reg_reg_const(op : tasmop;_op1,_op2 : tregister; _op3: aint);
        begin
          inherited create(op);
@@ -465,6 +546,15 @@ implementation
        end;
 
 
+     constructor taicpu.op_const_ref(op : tasmop; _op1 : aint; _op2 : treference);
+      begin
+         inherited create(op);
+         ops:=2;
+         loadconst(0,_op1);
+         loadref(1,_op2);
+      end;
+
+
     constructor taicpu.op_cond_sym(op : tasmop;cond:TAsmCond;_op1 : tasmsymbol);
       begin
          inherited create(op);
@@ -474,6 +564,15 @@ implementation
       end;
 
 
+    constructor taicpu.op_regset_reg_ref(op: tasmop; basereg: tregister; nregs: byte; const ref: treference);
+      begin
+        inherited create(op);
+        ops:=2;
+        loadregset(0,basereg,nregs);
+        loadref(1, ref);
+      end;
+
+
     constructor taicpu.op_sym(op : tasmop;_op1 : tasmsymbol);
       begin
          inherited create(op);
@@ -528,7 +627,7 @@ implementation
       const
         { invalid sizes for aarch64 are 0 }
         subreg2bytesize: array[TSubRegister] of byte =
-          (0,0,0,0,4,8,0,0,0,4,8,0,0,0,0,0,0,0,0,0,0,0,0);
+          (0,0,0,0,4,8,0,0,0,4,8,0,0,0,0,0,0,0,0,0,0,0,0,8,16,0,16,16,16,16,16,16,16,16,16,16);
       var
         scalefactor: byte;
       begin
@@ -545,7 +644,7 @@ implementation
           R_MMREGISTER:
             result:=taicpu.op_reg_ref(op,r,ref);
           else
-            internalerror(200401041);
+            internalerror(2004010407);
         end;
       end;
 
@@ -554,22 +653,40 @@ implementation
       begin
         result:=sr_complex;
         if not assigned(ref.symboldata) and
-           not(ref.refaddr in [addr_gotpageoffset,addr_gotpage,addr_pageoffset,addr_page]) then
+           not(ref.refaddr in [addr_pic,addr_gotpageoffset,addr_gotpage,addr_pageoffset,addr_page]) then
           exit;
         { can't use pre-/post-indexed mode here (makes no sense either) }
         if ref.addressmode<>AM_OFFSET then
           exit;
         { "ldr literal" must be a 32/64 bit LDR and have a symbol }
-        if assigned(ref.symboldata) and
-           ((op<>A_LDR) or
+        if (ref.refaddr=addr_pic) and
+           (not (op in [A_LDR,A_B,A_BL]) or
             not(oppostfix in [PF_NONE,PF_W,PF_SW]) or
-            not assigned(ref.symbol)) then
+            (not assigned(ref.symbol) and
+             not assigned(ref.symboldata))) then
           exit;
         { if this is a (got) page offset load, we must have a base register and a
-          symbol }
+          symbol (except if we have an ADD with a non-got page offset load) }
         if (ref.refaddr in [addr_gotpageoffset,addr_pageoffset]) and
-           (not assigned(ref.symbol) or
-            (ref.base=NR_NO) or
+           (
+             (
+               (
+                 (op<>A_ADD) or
+                 (ref.refaddr=addr_gotpageoffset)
+               ) and
+               (
+                 not assigned(ref.symbol) or
+                 (ref.base=NR_NO)
+               )
+             ) or
+             (
+               (
+                 (op=A_ADD) and
+                 (ref.refaddr=addr_pageoffset)
+               ) and
+               not assigned(ref.symbol) and
+               (ref.base=NR_NO)
+             ) or
             (ref.index<>NR_NO) or
             (ref.offset<>0)) then
           begin
@@ -597,7 +714,9 @@ implementation
         result:=sr_internal_illegal;
         { post-indexed is only allowed for vector and immediate loads/stores }
         if (ref.addressmode=AM_POSTINDEXED) and
-           not(op in [A_LD1,A_LD2,A_LD3,A_LD4,A_ST1,A_ST2,A_ST3,A_ST4]) and
+           not((op = A_LD1) or (op = A_LD2) or (op = A_LD3) or (op = A_LD4) or
+               (op = A_LD1R) or (op = A_LD2R) or (op = A_LD3R) or (op = A_LD4R) or
+               (op = A_ST1) or (op = A_ST2) or (op = A_ST3) or (op = A_ST4)) and
            (not(op in [A_LDR,A_STR,A_LDP,A_STP]) or
             (ref.base=NR_NO) or
             (ref.index<>NR_NO)) then
@@ -640,32 +759,46 @@ implementation
             * can scale with the size of the access
             * can zero/sign extend 32 bit index register, and/or multiple by
               access size
-            * no pre/post-indexing
+            * no pre/post-indexing except for ldN(r)/stN
         }
         if (ref.base<>NR_NO) and
            (ref.index<>NR_NO) then
           begin
-            if ref.addressmode in [AM_PREINDEXED,AM_POSTINDEXED] then
-              exit;
             case op of
               { this holds for both integer and fpu/vector loads }
               A_LDR,A_STR:
-                if (ref.offset=0) and
-                   (((ref.shiftmode=SM_None) and
-                     (ref.shiftimm=0)) or
-                    ((ref.shiftmode in [SM_LSL,SM_UXTW,SM_SXTW]) and
-                     (ref.shiftimm=tcgsizep2size[size]))) then
-                  result:=sr_simple
-                else
-                  result:=sr_complex;
-              { todo }
+                begin
+                  if ref.addressmode in [AM_PREINDEXED,AM_POSTINDEXED] then
+                    exit;
+                  if (ref.offset=0) and
+                     (((ref.shiftmode=SM_None) and
+                       (ref.shiftimm=0)) or
+                      ((ref.shiftmode in [SM_LSL,SM_UXTW,SM_SXTW]) and
+                       (ref.shiftimm=tcgsizep2size[size]))) then
+                    result:=sr_simple
+                  else
+                    result:=sr_complex;
+                end;
               A_LD1,A_LD2,A_LD3,A_LD4,
+              A_LD1R,A_LD2R,A_LD3R,A_LD4R,
               A_ST1,A_ST2,A_ST3,A_ST4:
-                internalerror(2014110704);
+                begin
+                  if ref.addressmode in [AM_PREINDEXED] then
+                    exit;
+                  if (ref.offset=0) and
+                     (ref.addressmode=AM_POSTINDEXED) then
+                    result:=sr_simple
+                  else
+                   result:=sr_complex;
+                end;
               { these don't support base+index }
               A_LDUR,A_STUR,
               A_LDP,A_STP:
-                result:=sr_complex;
+                begin
+                  if ref.addressmode in [AM_PREINDEXED,AM_POSTINDEXED] then
+                    exit;
+                  result:=sr_complex;
+                end
               else
                 { nothing: result is already sr_internal_illegal };
             end;
@@ -682,6 +815,8 @@ implementation
               - regular with signed 9 bit immediate
             * LDUR*/STUR*:
               - regular with signed 9 bit immediate
+            * ldN(r)/stN
+              - 0 or with postindex
         }
         if ref.base<>NR_NO then
           begin
@@ -725,17 +860,28 @@ implementation
                 end;
               A_LDUR,A_STUR:
                 begin
-                  if (ref.addressmode=AM_OFFSET) and
-                     (ref.offset>=-256) and
+                  if ref.addressmode in [AM_PREINDEXED,AM_POSTINDEXED] then
+                    exit;
+                  if (ref.offset>=-256) and
                      (ref.offset<=255) then
                     result:=sr_simple
                   else
                     result:=sr_complex;
                 end;
-              { todo }
               A_LD1,A_LD2,A_LD3,A_LD4,
+              A_LD1R,A_LD2R,A_LD3R,A_LD4R,
               A_ST1,A_ST2,A_ST3,A_ST4:
-                internalerror(2014110907);
+                begin
+                  if ref.addressmode in [AM_PREINDEXED] then
+                    exit;
+                  if (ref.offset=0) or
+                     ((ref.addressmode=AM_POSTINDEXED) and
+                      { to check the validity of the offset, we'd have to analyse the regset argument }
+                      (ref.offset>0)) then
+                    result:=sr_simple
+                  else
+                    result:=sr_complex;
+                end;
               A_LDAR,
               A_LDAXR,
               A_LDXR,
@@ -866,10 +1012,14 @@ implementation
     function taicpu.spilling_get_operation_type(opnr: longint): topertype;
       begin
         case opcode of
-          A_B,A_BL,
+          A_B,A_BL,A_BR,A_BLR,
           A_CMN,A_CMP,
           A_CCMN,A_CCMP,
-          A_TST:
+          A_TST,
+          A_FCMP,A_FCMPE,
+          A_CBZ,A_CBNZ,
+          A_PRFM,A_PRFUM,
+          A_RET:
             result:=operand_read;
           A_STR,A_STUR:
             if opnr=0 then
@@ -877,14 +1027,6 @@ implementation
             else
               { check for pre/post indexed in spilling_get_operation_type_ref }
               result:=operand_read;
-          A_STLXP,
-          A_STLXR,
-          A_STXP,
-          A_STXR:
-            if opnr=0 then
-              result:=operand_write
-            else
-              result:=operand_read;
           A_STP:
             begin
               if opnr in [0,1] then
@@ -902,11 +1044,85 @@ implementation
                  { check for pre/post indexed in spilling_get_operation_type_ref }
                  result:=operand_read;
              end;
+{$ifdef EXTDEBUG}
+           { play save to avoid hard to find bugs, better fail at compile time }
+           A_ADD,
+           A_ADRP,
+           A_AND,
+           A_ASR,
+           A_BFI,
+           A_BFXIL,
+           A_CLZ,
+           A_CSEL,
+           A_CSET,
+           A_CSETM,
+           A_FABS,
+           A_EON,
+           A_EOR,
+           A_FADD,
+           A_FCVT,
+           A_FDIV,
+           A_FMADD,
+           A_FMOV,
+           A_FMSUB,
+           A_FMUL,
+           A_FNEG,
+           A_FNMADD,
+           A_FNMSUB,
+           A_FRINTX,
+           A_FSQRT,
+           A_FSUB,
+           A_ORR,
+           A_LSL,
+           A_LSLV,
+           A_LSR,
+           A_LSRV,
+           A_MOV,
+           A_MOVK,
+           A_MOVN,
+           A_MOVZ,
+           A_MSUB,
+           A_MUL,
+           A_MVN,
+           A_NEG,
+           A_LDR,
+           A_LDUR,
+           A_RBIT,
+           A_ROR,
+           A_RORV,
+           A_SBFX,
+           A_SCVTF,
+           A_FCVTZS,
+           A_SDIV,
+           A_SMULL,
+           A_STLXP,
+           A_STLXR,
+           A_STXP,
+           A_STXR,
+           A_SUB,
+           A_SXTB,
+           A_SXTH,
+           A_SXTW,
+           A_UBFIZ,
+           A_UBFX,
+           A_UCVTF,
+           A_UDIV,
+           A_UMULL,
+           A_UXTB,
+           A_UXTH:
+             if opnr=0 then
+               result:=operand_write
+             else
+               result:=operand_read;
+           else
+             Internalerror(2019090802);
+{$else EXTDEBUG}
            else
              if opnr=0 then
                result:=operand_write
              else
                result:=operand_read;
+{$endif EXTDEBUG}
         end;
       end;
 

+ 536 - 28
compiler/aarch64/agcpugas.pas

@@ -30,7 +30,7 @@ unit agcpugas;
 
     uses
        globtype,systems,
-       aasmtai,
+       aasmtai,aasmdata,aasmbase,
        assemble,aggas,
        cpubase,cpuinfo;
 
@@ -47,14 +47,22 @@ unit agcpugas;
         constructor CreateWithWriter(info: pasminfo; wr: TExternalAssemblerOutputFile; freewriter, smart: boolean); override;
       end;
 
+      TAArch64ClangGASAssembler=class(TAArch64Assembler)
+      private
+        procedure TransformSEHDirectives(list:TAsmList);
+      protected
+        function sectionflags(secflags:TSectionFlags):string;override;
+      public
+        procedure WriteAsmList; override;
+      end;
 
     const
       gas_shiftmode2str : array[tshiftmode] of string[4] = (
-        '','lsl','lsr','asr',
+        '','lsl','lsr','asr','ror',
         'uxtb','uxth','uxtw','uxtx',
         'sxtb','sxth','sxtw','sxtx');
 
-    const 
+    const
       cputype_to_gas_march : array[tcputype] of string = (
         '', // cpu_none
         'armv8'
@@ -63,7 +71,7 @@ unit agcpugas;
   implementation
 
     uses
-       cutils,globals,verbose,
+       cutils,cclasses,globals,verbose,
        aasmcpu,
        itcpugas,
        cgbase,cgutils;
@@ -90,6 +98,476 @@ unit agcpugas;
       end;
 
 
+{****************************************************************************}
+{                      CLang AArch64 Assembler writer                        }
+{****************************************************************************}
+
+    procedure TAArch64ClangGASAssembler.TransformSEHDirectives(list:TAsmList);
+
+      function convert_unwinddata(list:tasmlist):tdynamicarray;
+
+        procedure check_offset(ofs,max:dword);
+          begin
+            if ((ofs and $7)<>0) or (ofs>max) then
+              internalerror(2020041210);
+          end;
+
+        procedure check_reg(reg:tregister;rt:TRegisterType;min:TSuperRegister);
+          begin
+            if (getregtype(reg)<>rt) or (getsupreg(reg)<min) then
+              internalerror(2020041211);
+          end;
+
+        procedure writebyte(b:byte); inline;
+          begin
+            result.write(b,sizeof(b));
+          end;
+
+        procedure writeword(w:word);
+          begin
+            w:=NtoBE(w);
+            result.write(w,sizeof(w));
+          end;
+
+        procedure writedword(dw:dword);
+          begin
+            dw:=NtoBE(dw);
+            result.write(dw,sizeof(dw));
+          end;
+
+        const
+          min_int_reg = 19;
+          min_mm_reg = 8;
+        var
+          hp : tai;
+          seh : tai_seh_directive absolute hp;
+        begin
+          result:=tdynamicarray.create(0);
+          hp:=tai(list.last);
+          while assigned(hp) do
+            begin
+              if hp.typ<>ait_seh_directive then
+                internalerror(2020041502);
+              case seh.kind of
+                ash_stackalloc:
+                  begin
+                    if (seh.data.offset and $f)<>0 then
+                      internalerror(2020041207);
+                    if seh.data.offset<((1 shl 5)*16) then
+                      writebyte(byte(seh.data.offset shr 4))
+                    else if seh.data.offset<((1 shl 11)*16) then
+                      writeword($C000 or word(seh.data.offset shr 4))
+                    else if seh.data.offset<((1 shl 24)*16) then
+                      writedword($E0000000 or (seh.data.offset shr 4))
+                    else begin
+                      writeln(hexstr(seh.data.offset,8));
+                      internalerror(2020041209);
+                    end;
+                  end;
+                ash_addfp:
+                  begin
+                    check_offset(seh.data.offset,(1 shl 7)*8);
+                    writeword($E200 or (seh.data.offset shr 3));
+                  end;
+                ash_setfp:
+                  writebyte($E1);
+                ash_nop:
+                  writebyte($E3);
+                ash_savefplr:
+                  begin
+                    check_offset(seh.data.offset,504);
+                    writebyte($40 or (seh.data.offset shr 3));
+                  end;
+                ash_savefplr_x:
+                  begin
+                    check_offset(seh.data.offset,512);
+                    writebyte($80 or (seh.data.offset shr 3)-1);
+                  end;
+                ash_savereg:
+                  begin
+                    check_offset(seh.data.offset,504);
+                    check_reg(seh.data.reg,R_INTREGISTER,min_int_reg);
+                    writeword($C000 or ((getsupreg(seh.data.reg)-min_int_reg) shl 6) or (seh.data.offset shr 3));
+                  end;
+                ash_savereg_x:
+                  begin
+                    check_offset(seh.data.offset,256);
+                    check_reg(seh.data.reg,R_INTREGISTER,min_int_reg);
+                    writeword($C400 or ((getsupreg(seh.data.reg)-min_int_reg) shl 5) or ((seh.data.offset shr 3)-1));
+                  end;
+                ash_saveregp:
+                  begin
+                    check_offset(seh.data.offset,504);
+                    check_reg(seh.data.reg,R_INTREGISTER,min_int_reg);
+                    writeword($C800 or ((getsupreg(seh.data.reg)-min_int_reg) shl 6) or (seh.data.offset shr 3));
+                  end;
+                ash_saveregp_x:
+                  begin
+                    check_offset(seh.data.offset,512);
+                    check_reg(seh.data.reg,R_INTREGISTER,min_int_reg);
+                    writeword($CC00 or ((getsupreg(seh.data.reg)-min_int_reg) shl 6) or ((seh.data.offset shr 3)-1));
+                  end;
+                ash_savefreg:
+                  begin
+                    check_offset(seh.data.offset,504);
+                    check_reg(seh.data.reg,R_MMREGISTER,min_mm_reg);
+                    writeword($DC00 or ((getsupreg(seh.data.reg)-min_mm_reg) shl 6) or (seh.data.offset shr 3));
+                  end;
+                ash_savefreg_x:
+                  begin
+                    check_offset(seh.data.offset,256);
+                    check_reg(seh.data.reg,R_MMREGISTER,min_mm_reg);
+                    writeword($CE00 or ((getsupreg(seh.data.reg)-min_mm_reg) shl 5) or ((seh.data.offset shr 3)-1));
+                  end;
+                ash_savefregp:
+                  begin
+                    check_offset(seh.data.offset,504);
+                    check_reg(seh.data.reg,R_MMREGISTER,min_mm_reg);
+                    writeword($D800 or ((getsupreg(seh.data.reg)-min_mm_reg) shl 6) or (seh.data.offset shr 3));
+                  end;
+                ash_savefregp_x:
+                  begin
+                    check_offset(seh.data.offset,512);
+                    check_reg(seh.data.reg,R_MMREGISTER,min_mm_reg);
+                    writeword($DA00 or ((getsupreg(seh.data.reg)-min_mm_reg) shl 6) or ((seh.data.offset shr 3)-1));
+                  end;
+                else
+                  internalerror(2020041503);
+              end;
+              hp:=tai(hp.previous);
+            end;
+        end;
+
+      var
+        unwinddata : tdynamicarray;
+
+      procedure writebyte(b:byte);
+        begin
+          unwinddata.write(b,sizeof(b));
+        end;
+
+      var
+        hp,hpnext,hpdata : tai;
+        seh : tai_seh_directive absolute hp;
+        lastsym : tai_symbol;
+        lastsec : tai_section;
+        inprologue,
+        deleteai : boolean;
+        totalcount,
+        instrcount,
+        datacount : sizeint;
+        handlername : tsymstr;
+        handlerflags : byte;
+        handlerdata : array of tai;
+        handlerdataidx : sizeint;
+        handlerdatacount : tai;
+        sehlist,
+        tmplist : TAsmList;
+        xdatasym : tasmsymbol;
+        unwindrec : longword;
+      begin
+        if not assigned(list) then
+          exit;
+
+        lastsym:=nil;
+        tmplist:=nil;
+        sehlist:=nil;
+        lastsec:=nil;
+        instrcount:=0;
+        datacount:=0;
+        unwinddata:=nil;
+        inprologue:=false;
+        handlerdata:=nil;
+        handlerdataidx:=0;
+        handlerdatacount:=nil;
+        handlerflags:=0;
+        handlername:='';
+
+        hp:=tai(list.first);
+        while assigned(hp) do
+          begin
+            deleteai:=false;
+            case hp.typ of
+              ait_section:
+                begin
+                  if assigned(sehlist) then
+                    begin
+                      if assigned(lastsec) and (tai_section(hp).name^=lastsec.name^) then
+                        begin
+                          { this section was only added due to the now removed SEH data }
+                          deleteai:=true;
+                          dec(list.section_count);
+                        end
+                      else
+                        internalerror(2020041214);
+                    end
+                  else
+                    begin
+                      lastsec:=tai_section(hp);
+                      { also reset the last encountered symbol }
+                      lastsym:=nil;
+                    end;
+
+                  if assigned(tmplist) then
+                    begin
+                      list.insertListBefore(hp,tmplist);
+                      tmplist.free;
+                      tmplist:=nil;
+                    end;
+
+                end;
+              ait_symbol:
+                begin
+                  if tai_symbol(hp).sym.typ=AT_FUNCTION then
+                    lastsym:=tai_symbol(hp);
+                end;
+              ait_instruction:
+                if assigned(sehlist) then
+                  inc(instrcount);
+              ait_const:
+                if assigned(sehlist) then
+                  inc(datacount,tai_const(hp).size);
+              ait_seh_directive:
+                begin
+                  if not assigned(sehlist) and (seh.kind<>ash_proc) then
+                    internalerror(2020041208);
+                  { most seh directives are removed }
+                  deleteai:=true;
+                  case seh.kind of
+                    ash_proc:
+                      begin
+                        if not assigned(lastsec) then
+                          internalerror(2020041203);
+                        datacount:=0;
+                        instrcount:=0;
+                        handlerflags:=0;
+                        handlername:='';
+                        sehlist:=tasmlist.create;
+                        inprologue:=true;
+                      end;
+                    ash_endproc:
+                      begin
+                        if not assigned(sehlist) then
+                          internalerror(2020041501);
+                        if assigned(tmplist) then
+                          internalerror(2020041302);
+                        if not assigned(lastsym) then
+                          internalerror(2020041303);
+                        if inprologue then
+                          cgmessage(asmw_e_missing_endprologue);
+
+                        unwinddata:=convert_unwinddata(sehlist);
+
+                        writebyte($E4);
+
+                        { fill up with NOPs }
+                        while unwinddata.size mod 4<>0 do
+                          writebyte($E3);
+
+                        { note: we can pass Nil here, because in case of a LLVM
+                                backend this whole code shouldn't be required
+                                anyway }
+                        xdatasym:=current_asmdata.DefineAsmSymbol('xdata_'+lastsym.sym.name,AB_LOCAL,AT_DATA,nil);
+
+                        tmplist:=tasmlist.create;
+                        new_section(tmplist,sec_pdata,lastsec.name^,0);
+                        tmplist.concat(tai_const.Create_rva_sym(lastsym.sym));
+                        tmplist.concat(tai_const.Create_rva_sym(xdatasym));
+
+                        new_section(tmplist,sec_rodata,xdatasym.name,0);
+                        tmplist.concat(tai_symbol.Create(xdatasym,0));
+
+                        tmplist.concat(tai_comment.Create(strpnew('instr: '+tostr(instrcount)+', data: '+tostr(datacount)+', unwind: '+tostr(unwinddata.size))));
+
+                        {$ifdef EXTDEBUG}
+                        comment(V_Debug,'got section: '+lastsec.name^);
+                        comment(V_Debug,'got instructions: '+tostr(instrcount));
+                        comment(V_Debug,'got data: '+tostr(datacount));
+                        comment(V_Debug,'got unwinddata: '+tostr(unwinddata.size));
+                        {$endif EXTDEBUG}
+
+                        if datacount mod 4<>0 then
+                          cgmessage(asmw_e_seh_invalid_data_size);
+
+                        totalcount:=datacount div 4+instrcount;
+
+                        { splitting to multiple pdata/xdata sections is not yet
+                          supported, so 1 MB is our limit for now }
+                        if totalcount>(1 shl 18) then
+                          comment(V_Error,'Function is larger than 1 MB which is not supported for SEH currently');
+
+                        unwindrec:=min(totalcount,(1 shl 18)-1);
+                        if handlerflags<>0 then
+                          unwindrec:=unwindrec or (1 shl 20);
+
+                        { currently we only have one epilog, so E needs to be
+                          set to 1 and epilog scope index needs to be 0, no
+                          matter if we require the extension for the unwinddata
+                          or not }
+                        unwindrec:=unwindrec or (1 shl 21);
+
+                        if unwinddata.size div 4<=31 then
+                          unwindrec:=unwindrec or ((unwinddata.size div 4) shl 27);
+
+                        { exception record headers }
+                        tmplist.concat(tai_const.Create_32bit(longint(unwindrec)));
+                        if cs_asm_source in init_settings.globalswitches then
+                          tmplist.concat(tai_comment.create(strpnew(hexstr(unwindrec,8))));
+
+                        if unwinddata.size div 4>31 then
+                          begin
+                            { once we're able to split a .pdata entry this can be
+                              removed as well }
+                            if unwinddata.size div 4>255 then
+                              comment(V_Error,'Too many unwind codes for SEH');
+                            unwindrec:=(unwinddata.size div 4) shl 16;
+                            tmplist.concat(tai_const.create_32bit(longint(unwindrec)));
+                            if cs_asm_source in init_settings.globalswitches then
+                              tmplist.concat(tai_comment.create(strpnew(hexstr(unwindrec,8))));
+                          end;
+
+                        { unwind codes }
+                        unwinddata.seek(0);
+                        while unwinddata.pos<unwinddata.size do
+                          begin
+                            unwinddata.read(unwindrec,sizeof(longword));
+                            tmplist.concat(tai_const.Create_32bit(longint(unwindrec)));
+                            if cs_asm_source in init_settings.globalswitches then
+                              tmplist.concat(tai_comment.create(strpnew(hexstr(unwindrec,8))));
+                          end;
+                        unwinddata.free;
+
+                        if handlerflags<>0 then
+                          begin
+                            tmplist.concat(tai_const.Create_rva_sym(current_asmdata.RefAsmSymbol(handlername,AT_FUNCTION,false)));
+                            if length(handlerdata)>0 then
+                              begin
+                                tmplist.concat(handlerdatacount);
+                                for handlerdataidx:=0 to high(handlerdata) do
+                                  tmplist.concat(handlerdata[handlerdataidx]);
+                              end;
+                          end;
+
+                        handlerdata:=nil;
+
+                        sehlist.free;
+                        sehlist:=nil;
+                      end;
+                    ash_endprologue:
+                      inprologue:=false;
+                    ash_handler:
+                      begin
+                        handlername:=seh.data.name^;
+                        handlerflags:=seh.data.flags;
+                      end;
+                    ash_handlerdata:
+                      begin
+                        if handlername='' then
+                          cgmessage(asmw_e_handlerdata_no_handler);
+                        hpdata:=tai(hp.next);
+                        if not assigned(hpdata) or (hpdata.typ<>ait_const) or (tai_const(hpdata).consttype<>aitconst_32bit) then
+                          internalerror(2020041215);
+                        handlerdatacount:=hpdata;
+                        setlength(handlerdata,tai_const(hpdata).value*4);
+                        handlerdataidx:=0;
+                        hpnext:=tai(hpdata.next);
+                        list.remove(hpdata);
+                        hpdata:=hpnext;
+                        while (handlerdataidx<length(handlerdata)) and assigned(hpdata) do
+                          begin
+                            if (hpdata.typ<>ait_const) or not (tai_const(hpdata).consttype in [aitconst_32bit,aitconst_rva_symbol]) then
+                              internalerror(2020041212);
+                            handlerdata[handlerdataidx]:=hpdata;
+                            inc(handlerdataidx);
+                            hpnext:=tai(hpdata.next);
+                            list.remove(hpdata);
+                            hpdata:=hpnext;
+                          end;
+                        if handlerdataidx<length(handlerdata) then
+                          internalerror(2020041213);
+                      end;
+                    ash_stackalloc,
+                    ash_addfp,
+                    ash_setfp,
+                    ash_nop,
+                    ash_savefplr,
+                    ash_savefplr_x,
+                    ash_savereg,
+                    ash_savereg_x,
+                    ash_saveregp,
+                    ash_saveregp_x,
+                    ash_savefreg,
+                    ash_savefreg_x,
+                    ash_savefregp,
+                    ash_savefregp_x:
+                      begin
+                        if not assigned(sehlist) then
+                          internalerror(2020041504);
+                        if not inprologue then
+                          internalerror(2020041505);
+                        hpdata:=hp;
+                        hp:=tai(hp.previous);
+                        list.Remove(hpdata);
+                        sehlist.concat(hpdata);
+                        { don't delete this }
+                        deleteai:=false;
+                      end;
+                    else
+                      internalerror(2020041206);
+                  end;
+                end;
+              else
+                { ignore }
+                ;
+            end;
+
+            if deleteai then
+              begin
+                hpnext:=tai(hp.next);
+                list.remove(hp);
+                hp.free;
+                hp:=hpnext;
+              end
+            else
+              hp:=tai(hp.next);
+          end;
+
+        if assigned(sehlist) then
+          internalerror(2020041205);
+
+        if assigned(tmplist) then
+          begin
+            list.concatlist(tmplist);
+            tmplist.free;
+          end;
+      end;
+
+
+    function TAArch64ClangGASAssembler.sectionflags(secflags:TSectionFlags):string;
+      begin
+        Result:=inherited sectionflags(secflags);
+        if (target_info.system=system_aarch64_win64) then
+          begin
+            { we require an explicit "r" if write is not allowed }
+            if not (SF_W in secflags) then
+              result:=result+'r';
+          end;
+      end;
+
+
+    procedure TAArch64ClangGASAssembler.WriteAsmList;
+      begin
+        { clang does not support all the directives we need, so we need to
+          manually transform them to pdata/xdata records }
+        if target_info.system=system_aarch64_win64 then
+          begin
+            TransformSEHDirectives(current_asmdata.AsmLists[al_pure_assembler]);
+            TransformSEHDirectives(current_asmdata.AsmLists[al_procedures]);
+          end;
+        inherited WriteAsmList;
+      end;
+
+
 {****************************************************************************}
 {                  Helper routines for Instruction Writer                    }
 {****************************************************************************}
@@ -119,9 +597,13 @@ unit agcpugas;
                     result:=ref.symbol.name+darwin_addrpage2str[ref.refaddr]
                   else
                     result:=linux_addrpage2str[ref.refaddr]+ref.symbol.name
-                end
+                end;
+              addr_pic,
+              { for locals replaced by temp symbols on LLVM }
+              addr_no:
+                result:=ref.symbol.name;
               else
-                internalerror(2015022301);
+                internalerror(2015022302);
             end
           end
         else
@@ -170,7 +652,7 @@ unit agcpugas;
                 else
                   begin
                     if ref.refaddr<>addr_no then
-                      internalerror(2014121506);
+                      internalerror(2014121502);
                     if (ref.offset<>0) then
                       result:=result+', #'+tostr(ref.offset);
                   end;
@@ -188,26 +670,13 @@ unit agcpugas;
 
 
     function getopstr(asminfo: pasminfo; hp: taicpu; opnr: longint; const o: toper): string;
+      var
+        i: longint;
+        reg: tregister;
       begin
         case o.typ of
           top_reg:
-            { we cannot yet represent "umov w0, v4.s[0]" or "ins v4.d[0], x1",
-              so for now we use "s4" or "d4" instead -> translate here }
-            if ((hp.opcode=A_INS) or
-                (hp.opcode=A_UMOV)) and
-               (getregtype(hp.oper[opnr]^.reg)=R_MMREGISTER) then
-              begin
-                case getsubreg(hp.oper[opnr]^.reg) of
-                  R_SUBMMS:
-                    getopstr:='v'+tostr(getsupreg(hp.oper[opnr]^.reg))+'.S[0]';
-                  R_SUBMMD:
-                    getopstr:='v'+tostr(getsupreg(hp.oper[opnr]^.reg))+'.D[0]';
-                  else
-                    internalerror(2014122907);
-                end;
-              end
-            else
-              getopstr:=gas_regname(o.reg);
+            getopstr:=gas_regname(o.reg);
           top_shifterop:
             begin
               getopstr:=gas_shiftmode2str[o.shifterop^.shiftmode];
@@ -236,6 +705,28 @@ unit agcpugas;
               end
             else
               getopstr:=getreferencestring(asminfo,o.ref^);
+          top_realconst:
+            begin
+              str(o.val_real,Result);
+              Result:='#'+Result;
+            end;
+          top_regset:
+            begin
+              reg:=o.basereg;
+              result:='{'+gas_regname(reg);
+              for i:=1 to o.nregs-1 do
+                begin
+                  setsupreg(reg,succ(getsupreg(reg)) mod 32);
+                  result:=result+', '+gas_regname(reg);
+                end;
+              result:=result+'}';
+              if o.regsetindex<>255 then
+                result:=result+'['+tostr(o.regsetindex)+']'
+            end;
+          top_indexedreg:
+            begin
+              result:=gas_regname(o.indexedreg)+'['+tostr(o.regindex)+']';
+            end;
           else
             internalerror(2014121507);
         end;
@@ -279,25 +770,42 @@ unit agcpugas;
             supported_targets : [system_aarch64_linux,system_aarch64_android];
             flags : [af_needar,af_smartlink_sections];
             labelprefix : '.L';
+            labelmaxlen : -1;
             comment : '// ';
             dollarsign: '$';
           );
 
        as_aarch64_clang_darwin_info : tasminfo =
           (
-            id     : as_clang;
+            id     : as_clang_asdarwin;
             idtxt  : 'CLANG';
             asmbin : 'clang';
-            asmcmd : '-c -o $OBJ $EXTRAOPT -arch arm64 $DARWINVERSION -x assembler $ASM';
-            supported_targets : [system_aarch64_darwin];
-            flags : [af_needar,af_smartlink_sections,af_supports_dwarf];
+            asmcmd : '-x assembler -c -target $TRIPLET -o $OBJ $EXTRAOPT -x assembler $ASM';
+            supported_targets : [system_aarch64_ios,system_aarch64_darwin];
+            flags : [af_needar,af_smartlink_sections,af_supports_dwarf,af_llvm];
             labelprefix : 'L';
+            labelmaxlen : -1;
             comment : '# ';
             dollarsign: '$';
           );
 
+       as_aarch64_clang_gas_info : tasminfo =
+          (
+            id     : as_clang_gas;
+            idtxt  : 'CLANG';
+            asmbin : 'clang';
+            asmcmd : '-x assembler -c -target $TRIPLET -o $OBJ $EXTRAOPT -x assembler $ASM';
+            supported_targets : [system_aarch64_win64];
+            flags : [af_needar,af_smartlink_sections,af_supports_dwarf,af_llvm];
+            labelprefix : '.L';
+            labelmaxlen : -1;
+            comment : '// ';
+            dollarsign: '$';
+          );
+
 
 begin
   RegisterAssembler(as_aarch64_gas_info,TAArch64Assembler);
   RegisterAssembler(as_aarch64_clang_darwin_info,TAArch64AppleAssembler);
+  RegisterAssembler(as_aarch64_clang_gas_info,TAArch64ClangGASAssembler);
 end.

+ 728 - 57
compiler/aarch64/aoptcpu.pas

@@ -32,32 +32,47 @@ Interface
     uses
       globtype, globals,
       cutils,
-      cgbase, cpubase, aasmtai, aasmcpu, aopt, aoptcpub;
+      cgbase, cpubase, aasmtai, aasmcpu,
+      aopt, aoptcpub, aoptarm;
 
     Type
-      TCpuAsmOptimizer = class(TAsmOptimizer)
+      TCpuAsmOptimizer = class(TARMAsmOptimizer)
         { uses the same constructor as TAopObj }
         function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
-        procedure PeepHoleOptPass2;override;
-        function GetNextInstructionUsingReg(Current : tai; out Next : tai; reg : TRegister) : Boolean;
-        function LookForPostindexedPattern(p : taicpu) : boolean;
-        procedure DebugMsg(const s : string; p : tai);
+        function PeepHoleOptPass2Cpu(var p: tai): boolean; override;
+        function PostPeepHoleOptsCpu(var p: tai): boolean; override;
+        function RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;override;
+        function InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;override;
+        function LookForPostindexedPattern(var p : tai) : boolean;
+      private
+        function RemoveSuperfluousFMov(const p: tai; movp: tai; const optimizer: string): boolean;
+        function OptPass1Shift(var p: tai): boolean;
+        function OptPostCMP(var p: tai): boolean;
+        function OptPass1Data(var p: tai): boolean;
+        function OptPass1FData(var p: tai): Boolean;
+        function OptPass1STP(var p: tai): boolean;
+        function OptPass1Mov(var p: tai): boolean;
+        function OptPass1FMov(var p: tai): Boolean;
+
+        function OptPass2LDRSTR(var p: tai): boolean;
       End;
 
 Implementation
 
   uses
-    aasmbase;
+    aasmbase,
+    aoptutils,
+    cgutils,
+    verbose;
 
 {$ifdef DEBUG_AOPTCPU}
-  procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
-    begin
-      asml.insertbefore(tai_comment.Create(strpnew(s)), p);
-    end;
+    const
+      SPeepholeOptimization: shortstring = 'Peephole Optimization: ';
 {$else DEBUG_AOPTCPU}
-  procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
-    begin
-    end;
+    { Empty strings help the optimizer to remove string concatenations that won't
+      ever appear to the user on release builds. [Kit] }
+    const
+      SPeepholeOptimization = '';
 {$endif DEBUG_AOPTCPU}
 
   function CanBeCond(p : tai) : boolean;
@@ -66,35 +81,86 @@ Implementation
     end;
 
 
-  function MatchInstruction(const instr: tai; const op: TAsmOps; const postfix: TOpPostfixes): boolean;
+  function TCpuAsmOptimizer.RegLoadedWithNewValue(reg: tregister; hp: tai): boolean;
+    var
+      p: taicpu;
     begin
-      result :=
-        (instr.typ = ait_instruction) and
-        ((op = []) or (taicpu(instr).opcode in op)) and
-        ((postfix = []) or (taicpu(instr).oppostfix in postfix));
-    end;
+      Result := false;
+      if not ((assigned(hp)) and (hp.typ = ait_instruction)) then
+        exit;
 
+      p := taicpu(hp);
+      case p.opcode of
+        { These operands do not write into a register at all }
+        A_CMP, A_CMN, A_TST, A_B, A_BL, A_MSR, A_FCMP:
+          exit;
+        {Take care of post/preincremented store and loads, they will change their base register}
+        A_STR, A_LDR:
+          begin
+            Result := false;
+            { actually, this does not apply here because post-/preindexed does not mean that a register
+              is loaded with a new value, it is only modified
+              (taicpu(p).oper[1]^.typ=top_ref) and
+              (taicpu(p).oper[1]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
+              (taicpu(p).oper[1]^.ref^.base = reg);
+            }
+            { STR does not load into it's first register }
+            if p.opcode = A_STR then
+              exit;
+          end;
+        else
+          ;
+      end;
 
-  function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
-    begin
-      result :=
-        (instr.typ = ait_instruction) and
-        (taicpu(instr).opcode = op) and
-        ((postfix = []) or (taicpu(instr).oppostfix in postfix));
+      if Result then
+        exit;
+
+      case p.oper[0]^.typ of
+        top_reg:
+          Result := (p.oper[0]^.reg = reg);
+        top_ref:
+          Result :=
+            (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
+            (taicpu(p).oper[0]^.ref^.base = reg);
+        else
+          ;
+      end;
     end;
 
 
-  function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
-    Out Next: tai; reg: TRegister): Boolean;
+  function TCpuAsmOptimizer.InstructionLoadsFromReg(const reg: TRegister; const hp: tai): boolean;
+    var
+      p: taicpu;
+      i: longint;
     begin
-      Next:=Current;
-      repeat
-        Result:=GetNextInstruction(Next,Next);
-      until not (Result) or
-            not(cs_opt_level3 in current_settings.optimizerswitches) or
-            (Next.typ<>ait_instruction) or
-            RegInInstruction(reg,Next) or
-            is_calljmp(taicpu(Next).opcode);
+      instructionLoadsFromReg := false;
+      if not (assigned(hp) and (hp.typ = ait_instruction)) then
+        exit;
+      p:=taicpu(hp);
+
+      i:=1;
+
+      { Start on oper[0]? }
+      if taicpu(hp).spilling_get_operation_type(0) in [operand_read, operand_readwrite] then
+        i:=0;
+
+      while(i<p.ops) do
+        begin
+          case p.oper[I]^.typ of
+            top_reg:
+              Result := (p.oper[I]^.reg = reg);
+            top_ref:
+              Result :=
+                (p.oper[I]^.ref^.base = reg) or
+                (p.oper[I]^.ref^.index = reg);
+            else
+              ;
+          end;
+          { Bailout if we found something }
+          if Result then
+            exit;
+          Inc(I);
+        end;
     end;
 
   {
@@ -107,20 +173,20 @@ Implementation
 
       ldr/str regX,[reg1], regY/const
   }
-  function TCpuAsmOptimizer.LookForPostindexedPattern(p: taicpu) : boolean;
+  function TCpuAsmOptimizer.LookForPostindexedPattern(var p: tai) : boolean;
     var
       hp1 : tai;
     begin
       Result:=false;
-      if (p.oper[1]^.typ = top_ref) and
-        (p.oper[1]^.ref^.addressmode=AM_OFFSET) and
-        (p.oper[1]^.ref^.index=NR_NO) and
-        (p.oper[1]^.ref^.offset=0) and
-        GetNextInstructionUsingReg(p, hp1, p.oper[1]^.ref^.base) and
+      if (taicpu(p).oper[1]^.typ = top_ref) and
+        (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
+        (taicpu(p).oper[1]^.ref^.index=NR_NO) and
+        (taicpu(p).oper[1]^.ref^.offset=0) and
+        GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[1]^.ref^.base) and
         { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
         MatchInstruction(hp1, [A_ADD, A_SUB], [PF_None]) and
-        (taicpu(hp1).oper[0]^.reg=p.oper[1]^.ref^.base) and
-        (taicpu(hp1).oper[1]^.reg=p.oper[1]^.ref^.base) and
+        (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[1]^.ref^.base) and
+        (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[1]^.ref^.base) and
         (
          { valid offset? }
          (taicpu(hp1).oper[2]^.typ=top_const) and
@@ -128,16 +194,20 @@ Implementation
          (abs(taicpu(hp1).oper[2]^.val)<256)
         ) and
         { don't apply the optimization if the base register is loaded }
-        (getsupreg(p.oper[0]^.reg)<>getsupreg(p.oper[1]^.ref^.base)) and
+        (getsupreg(taicpu(p).oper[0]^.reg)<>getsupreg(taicpu(p).oper[1]^.ref^.base)) and
         not(RegModifiedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) and
         not(RegModifiedBetween(taicpu(hp1).oper[2]^.reg,p,hp1)) then
         begin
-          DebugMsg('Peephole Str/LdrAdd/Sub2Str/Ldr Postindex done', p);
-          p.oper[1]^.ref^.addressmode:=AM_POSTINDEXED;
+          if taicpu(p).opcode = A_LDR then
+            DebugMsg('Peephole LdrAdd/Sub2Ldr Postindex done', p)
+          else
+            DebugMsg('Peephole StrAdd/Sub2Str Postindex done', p);
+
+          taicpu(p).oper[1]^.ref^.addressmode:=AM_POSTINDEXED;
           if taicpu(hp1).opcode=A_ADD then
-            p.oper[1]^.ref^.offset:=taicpu(hp1).oper[2]^.val
+            taicpu(p).oper[1]^.ref^.offset:=taicpu(hp1).oper[2]^.val
           else
-            p.oper[1]^.ref^.offset:=-taicpu(hp1).oper[2]^.val;
+            taicpu(p).oper[1]^.ref^.offset:=-taicpu(hp1).oper[2]^.val;
           asml.Remove(hp1);
           hp1.Free;
           Result:=true;
@@ -145,20 +215,611 @@ Implementation
     end;
 
 
+  function TCpuAsmOptimizer.RemoveSuperfluousFMov(const p: tai; movp: tai; const optimizer: string):boolean;
+    var
+      alloc,
+      dealloc : tai_regalloc;
+      hp1 : tai;
+    begin
+      Result:=false;
+      if ((MatchInstruction(movp, A_FMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
+           ((getregtype(taicpu(movp).oper[0]^.reg)=R_MMREGISTER) { or (taicpu(p).opcode in [A_LDUR])})
+          ) { or
+          (((taicpu(p).oppostfix in [PF_F64F32,PF_F64S16,PF_F64S32,PF_F64U16,PF_F64U32]) or (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBFD)) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F64])) or
+          (((taicpu(p).oppostfix in [PF_F32F64,PF_F32S16,PF_F32S32,PF_F32U16,PF_F32U32]) or (getsubreg(taicpu(p).oper[0]^.reg)=R_SUBFS)) and MatchInstruction(movp, A_VMOV, [taicpu(p).condition], [PF_F32])) }
+         ) and
+         (taicpu(movp).ops=2) and
+         MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
+         { the destination register of the mov might not be used beween p and movp }
+         not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
+         { Take care to only do this for instructions which REALLY load to the first register.
+           Otherwise
+             str reg0, [reg1]
+             fmov reg2, reg0
+           will be optimized to
+             str reg2, [reg1]
+         }
+         RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
+        begin
+          dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
+          if assigned(dealloc) then
+            begin
+              DebugMsg('Peephole '+optimizer+' removed superfluous vmov', movp);
+              result:=true;
+
+              { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
+                and remove it if possible }
+              asml.Remove(dealloc);
+              alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
+              if assigned(alloc) then
+                begin
+                  asml.Remove(alloc);
+                  alloc.free;
+                  dealloc.free;
+                end
+              else
+                asml.InsertAfter(dealloc,p);
+
+              { try to move the allocation of the target register }
+              GetLastInstruction(movp,hp1);
+              alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
+              if assigned(alloc) then
+                begin
+                  asml.Remove(alloc);
+                  asml.InsertBefore(alloc,p);
+                  { adjust used regs }
+                  IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
+                end;
+
+              { change
+                  vldr reg0,[reg1]
+                  vmov reg2,reg0
+                into
+                  ldr reg2,[reg1]
+
+                if reg2 is an int register
+              if (taicpu(p).opcode=A_VLDR) and (getregtype(taicpu(movp).oper[0]^.reg)=R_INTREGISTER) then
+                taicpu(p).opcode:=A_LDR;
+              }
+
+              { finally get rid of the mov }
+              taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
+              asml.remove(movp);
+              movp.free;
+            end;
+        end;
+    end;
+
+
+  function TCpuAsmOptimizer.OptPass1Shift(var p : tai): boolean;
+    var
+      hp1,hp2: tai;
+      I2, I: Integer;
+      shifterop: tshifterop;
+    begin
+      Result:=false;
+      { This folds shifterops into following instructions
+        <shiftop> r0, r1, #imm
+        <op> r2, r3, r0
+
+        to
+
+        <op> r2, r3, r1, <shiftop> #imm
+      }
+      { do not handle ROR yet, only part of the instructions below support ROR as shifter operand }
+      if MatchInstruction(p,[A_LSL, A_LSR, A_ASR{, A_ROR}],[PF_None]) and
+         MatchOpType(taicpu(p),top_reg,top_reg,top_const) and
+         GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+         MatchInstruction(hp1, [A_ADD, A_AND, A_BIC, A_CMP, A_CMN,
+                                A_EON, A_EOR, A_NEG, A_ORN, A_ORR,
+                                A_SUB, A_TST], [PF_None]) and
+         RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) and
+         (taicpu(hp1).ops >= 2) and
+         { Currently we can't fold into another shifterop }
+         (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
+         { SP does not work completely with shifted registers, as I didn't find the exact rules,
+           we do not operate on SP }
+         (taicpu(hp1).oper[0]^.reg<>NR_SP) and
+         (taicpu(hp1).oper[1]^.reg<>NR_SP) and
+         (taicpu(hp1).oper[taicpu(hp1).ops-1]^.reg<>NR_SP) and
+         { reg1 might not be modified inbetween }
+         not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
+         (
+           { Only ONE of the two src operands is allowed to match }
+           MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-2]^) xor
+           MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)
+         ) and
+         { for SUB, the last operand must match, there is no RSB on AArch64 }
+         ((taicpu(hp1).opcode<>A_SUB) or
+          MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[taicpu(hp1).ops-1]^)) then
+        begin
+          { for the two operand instructions, start also at the second operand as they are not always commutative
+            (depends on the flags tested laster on) and thus the operands cannot swapped }
+          I2:=1;
+          for I:=I2 to taicpu(hp1).ops-1 do
+            if MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg) then
+              begin
+                { If the parameter matched on the second op from the RIGHT
+                  we have to switch the parameters, this will not happen for CMP
+                  were we're only evaluating the most right parameter
+                }
+                shifterop_reset(shifterop);
+                case taicpu(p).opcode of
+                  A_LSL:
+                    shifterop.shiftmode:=SM_LSL;
+                  A_ROR:
+                    shifterop.shiftmode:=SM_ROR;
+                  A_LSR:
+                    shifterop.shiftmode:=SM_LSR;
+                  A_ASR:
+                    shifterop.shiftmode:=SM_ASR;
+                  else
+                    InternalError(2019090401);
+                end;
+                shifterop.shiftimm:=taicpu(p).oper[2]^.val;
+
+                if I <> taicpu(hp1).ops-1 then
+                  begin
+                    if taicpu(hp1).ops = 3 then
+                      hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
+                           taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[2]^.reg,
+                           taicpu(p).oper[1]^.reg, shifterop)
+                    else
+                      hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
+                           taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
+                           shifterop);
+                  end
+                else
+                  if taicpu(hp1).ops = 3 then
+                    hp2:=taicpu.op_reg_reg_reg_shifterop(taicpu(hp1).opcode,
+                         taicpu(hp1).oper[0]^.reg, taicpu(hp1).oper[1]^.reg,
+                         taicpu(p).oper[1]^.reg,shifterop)
+                  else
+                    hp2:=taicpu.op_reg_reg_shifterop(taicpu(hp1).opcode,
+                         taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
+                         shifterop);
+
+                taicpu(hp2).fileinfo:=taicpu(hp1).fileinfo;
+                asml.insertbefore(hp2, hp1);
+                GetNextInstruction(p, hp2);
+                asml.remove(p);
+                asml.remove(hp1);
+                p.free;
+                hp1.free;
+                p:=hp2;
+                DebugMsg('Peephole FoldShiftProcess done', p);
+                Result:=true;
+                break;
+              end;
+        end
+      else if MatchInstruction(p,[A_LSL, A_LSR, A_ASR,A_ROR],[PF_None]) and
+        GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+        RemoveSuperfluousMove(p, hp1, 'ShiftMov2Shift') then
+        Result:=true;
+    end;
+
+
+  function TCpuAsmOptimizer.OptPass1Data(var p : tai): boolean;
+    var
+      hp1: tai;
+    begin
+      Result := GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+        RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
+    end;
+
+
+  function TCpuAsmOptimizer.OptPass1FData(var p: tai): Boolean;
+    var
+      hp1: tai;
+    begin
+      Result := GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+        RemoveSuperfluousFMov(p, hp1, 'FOpFMov2FOp');
+    end;
+
+
+  function TCpuAsmOptimizer.OptPass1STP(var p : tai): boolean;
+    var
+      hp1, hp2, hp3, hp4: tai;
+    begin
+      Result:=false;
+      {
+        change
+
+	stp	x29,x30,[sp, #-16]!
+	mov	x29,sp
+	bl	abc
+	ldp	x29,x30,[sp], #16
+	ret
+
+        into
+
+        b         abc
+      }
+      if MatchInstruction(p, A_STP, [C_None], [PF_None]) and
+        MatchOpType(taicpu(p),top_reg,top_reg,top_ref) and
+        (taicpu(p).oper[0]^.reg = NR_X29) and
+        (taicpu(p).oper[1]^.reg = NR_X30) and
+        (taicpu(p).oper[2]^.ref^.base=NR_STACK_POINTER_REG) and
+        (taicpu(p).oper[2]^.ref^.index=NR_NO) and
+        (taicpu(p).oper[2]^.ref^.offset=-16) and
+        (taicpu(p).oper[2]^.ref^.addressmode=AM_PREINDEXED) and
+
+        GetNextInstruction(p, hp1) and
+        MatchInstruction(hp1, A_MOV, [C_None], [PF_NONE]) and
+        MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^) and
+        (taicpu(hp1).oper[1]^.typ = top_reg) and
+        (taicpu(hp1).oper[1]^.reg = NR_STACK_POINTER_REG) and
+
+        GetNextInstruction(hp1, hp2) and
+        SkipEntryExitMarker(hp2, hp2) and
+        MatchInstruction(hp2, A_BL, [C_None], [PF_NONE]) and
+        (taicpu(hp2).oper[0]^.typ = top_ref) and
+
+        GetNextInstruction(hp2, hp3) and
+        SkipEntryExitMarker(hp3, hp3) and
+        MatchInstruction(hp3, A_LDP, [C_None], [PF_NONE]) and
+        MatchOpType(taicpu(hp3),top_reg,top_reg,top_ref) and
+        (taicpu(hp3).oper[0]^.reg = NR_X29) and
+        (taicpu(hp3).oper[1]^.reg = NR_X30) and
+        (taicpu(hp3).oper[2]^.ref^.base=NR_STACK_POINTER_REG) and
+        (taicpu(hp3).oper[2]^.ref^.index=NR_NO) and
+        (taicpu(hp3).oper[2]^.ref^.offset=16) and
+        (taicpu(hp3).oper[2]^.ref^.addressmode=AM_POSTINDEXED) and
+
+        GetNextInstruction(hp3, hp4) and
+        MatchInstruction(hp4, A_RET, [C_None], [PF_None]) and
+        (taicpu(hp4).ops = 0) then
+        begin
+          asml.Remove(p);
+          asml.Remove(hp1);
+          asml.Remove(hp3);
+          asml.Remove(hp4);
+          taicpu(hp2).opcode:=A_B;
+          p.free;
+          hp1.free;
+          hp3.free;
+          hp4.free;
+          p:=hp2;
+          DebugMsg('Peephole Bl2B done', p);
+          Result:=true;
+        end;
+    end;
+
+
+  function TCpuAsmOptimizer.OptPass1Mov(var p : tai): boolean;
+    var
+      hp1: tai;
+    begin
+     Result:=false;
+     if MatchOperand(taicpu(p).oper[0]^,taicpu(p).oper[1]^) and
+       (taicpu(p).oppostfix=PF_None) then
+       begin
+         RemoveCurrentP(p);
+         DebugMsg('Peephole Mov2None done', p);
+         Result:=true;
+       end
+
+     {
+       optimize
+       mov rX, yyyy
+       ....
+     }
+     else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
+       begin
+         if RemoveSuperfluousMove(p, hp1, 'MovMov2Mov') then
+           Result:=true
+         else if (taicpu(p).ops = 2) and
+           (tai(hp1).typ = ait_instruction) and
+           RedundantMovProcess(p,hp1) then
+           Result:=true;
+       end;
+    end;
+
+
+  function TCpuAsmOptimizer.OptPass1FMov(var p: tai): Boolean;
+    var
+      hp1: tai;
+    begin
+      {
+        change
+        fmov reg0,reg1
+        fmov reg1,reg0
+        into
+        fmov reg0,reg1
+      }
+      Result := False;
+      while GetNextInstruction(p, hp1) and
+        MatchInstruction(hp1, A_FMOV, [taicpu(p).condition], [taicpu(p).oppostfix]) and
+        MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^) and
+        MatchOperand(taicpu(p).oper[1]^, taicpu(hp1).oper[0]^) do
+        begin
+          asml.Remove(hp1);
+          hp1.free;
+          DebugMsg(SPeepholeOptimization + 'FMovFMov2FMov done', p);
+          Result:=true;
+        end;
+      { not enabled as apparently not happening
+      if MatchOpType(taicpu(p),top_reg,top_reg) and
+        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+        MatchInstruction(hp1, [A_FSUB,A_FADD,A_FNEG,A_FMUL,A_FSQRT,A_FDIV,A_FABS], [PF_None]) and
+        (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) or
+         ((taicpu(hp1).ops=3) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^))
+        ) and
+        RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
+        not(RegUsedBetween(taicpu(p).oper[0]^.reg,p,hp1)) then
+        begin
+          DebugMsg(SPeepholeOptimization + 'FMovFOp2FOp done', hp1);
+          AllocRegBetween(taicpu(hp1).oper[1]^.reg,p,hp1,UsedRegs);
+          if MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^) then
+            taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
+          if (taicpu(hp1).ops=3) and MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[2]^) then
+            taicpu(hp1).oper[2]^.reg:=taicpu(p).oper[1]^.reg;
+          RemoveCurrentP(p);
+          Result:=true;
+          exit;
+        end;
+      }
+    end;
+
+
+  function TCpuAsmOptimizer.OptPass2LDRSTR(var p: tai): boolean;
+    var
+      hp1, hp1_last: tai;
+      ThisRegister: TRegister;
+      OffsetVal, ValidOffset, MinOffset, MaxOffset: asizeint;
+      TargetOpcode: TAsmOp;
+    begin
+      Result := False;
+      ThisRegister := taicpu(p).oper[0]^.reg;
+
+      case taicpu(p).opcode of
+        A_LDR:
+          TargetOpcode := A_LDP;
+        A_STR:
+          TargetOpcode := A_STP;
+        else
+          InternalError(2020081501);
+      end;
+
+      { reg appearing in ref invalidates these optimisations }
+      if (TargetOpcode = A_STP) or not RegInRef(ThisRegister, taicpu(p).oper[1]^.ref^) then
+        begin
+          { LDP/STP has a smaller permitted offset range than LDR/STR.
+
+            TODO: For a group of out-of-range LDR/STR instructions, can
+            we declare a temporary register equal to the offset base
+            address, modify the STR instructions to use that register
+            and then convert them to STP instructions?  Note that STR
+            generally takes 2 cycles (on top of the memory latency),
+            while LDP/STP takes 3.
+          }
+
+          if (getsubreg(ThisRegister) = R_SUBQ) then
+            begin
+              ValidOffset := 8;
+              MinOffset := -512;
+              MaxOffset := 504;
+            end
+          else
+            begin
+              ValidOffset := 4;
+              MinOffset := -256;
+              MaxOffset := 252;
+            end;
+
+          hp1_last := p;
+
+          { Look for nearby LDR/STR instructions }
+          if (taicpu(p).oppostfix = PF_NONE) and
+            (taicpu(p).oper[1]^.ref^.addressmode = AM_OFFSET) then
+            { If SkipGetNext is True, GextNextInstruction isn't called }
+            while GetNextInstruction(hp1_last, hp1) do
+              begin
+                if (hp1.typ <> ait_instruction) then
+                  Break;
+
+                if (taicpu(hp1).opcode = taicpu(p).opcode) then
+                  begin
+                    if (taicpu(hp1).oppostfix = PF_NONE) and
+                      { Registers need to be the same size }
+                      (getsubreg(ThisRegister) = getsubreg(taicpu(hp1).oper[0]^.reg)) and
+                      (
+                        (TargetOpcode = A_STP) or
+                        { LDP x0, x0, [sp, #imm] is undefined behaviour, even
+                          though such an LDR pair should have been optimised
+                          out by now. STP is okay }
+                        (ThisRegister <> taicpu(hp1).oper[0]^.reg)
+                      ) and
+                      (taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and
+                      (taicpu(p).oper[1]^.ref^.base = taicpu(hp1).oper[1]^.ref^.base) and
+                      (taicpu(p).oper[1]^.ref^.index = taicpu(hp1).oper[1]^.ref^.index) and
+                      { Make sure the address registers haven't changed }
+                      not RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.base, p, hp1) and
+                      (
+                        (taicpu(hp1).oper[1]^.ref^.index = NR_NO) or
+                        not RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.index, p, hp1)
+                      ) and
+                      { Don't need to check "RegInRef" because the base registers are identical,
+                        and the first one was checked already. [Kit] }
+                      (((TargetOpcode=A_LDP) and not RegUsedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) or
+                       ((TargetOpcode=A_STP) and not RegModifiedBetween(taicpu(hp1).oper[0]^.reg, p, hp1))) then
+                      begin
+                        { Can we convert these two LDR/STR instructions into a
+                          single LDR/STP? }
+
+                        OffsetVal := taicpu(hp1).oper[1]^.ref^.offset - taicpu(p).oper[1]^.ref^.offset;
+                        if (OffsetVal = ValidOffset) then
+                          begin
+                            if  (taicpu(p).oper[1]^.ref^.offset >= MinOffset) and (taicpu(hp1).oper[1]^.ref^.offset <= MaxOffset) then
+                              begin
+                                { Convert:
+                                    LDR/STR reg0, [reg2, #ofs]
+                                    ...
+                                    LDR/STR reg1. [reg2, #ofs + 8] // 4 if registers are 32-bit
+                                  To:
+                                    LDP/STP reg0, reg1, [reg2, #ofs]
+                                }
+                                taicpu(p).opcode := TargetOpcode;
+                                if TargetOpcode = A_STP then
+                                  DebugMsg('Peephole Optimization: StrStr2Stp', p)
+                                else
+                                  DebugMsg('Peephole Optimization: LdrLdr2Ldp', p);
+                                taicpu(p).ops := 3;
+                                taicpu(p).loadref(2, taicpu(p).oper[1]^.ref^);
+                                taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg);
+
+                                asml.Remove(hp1);
+                                hp1.Free;
+                                Result := True;
+                                Exit;
+                              end;
+                          end
+                        else if (OffsetVal = -ValidOffset) then
+                          begin
+                            if (taicpu(hp1).oper[1]^.ref^.offset >= MinOffset) and (taicpu(p).oper[1]^.ref^.offset <= MaxOffset) then
+                              begin
+                                { Convert:
+                                    LDR/STR reg0, [reg2, #ofs + 8] // 4 if registers are 32-bit
+                                    ...
+                                    LDR/STR reg1. [reg2, #ofs]
+                                  To:
+                                    LDP/STP reg1, reg0, [reg2, #ofs]
+                                }
+                                taicpu(p).opcode := TargetOpcode;
+                                if TargetOpcode = A_STP then
+                                  DebugMsg('Peephole Optimization: StrStr2Stp (reverse)', p)
+                                else
+                                  DebugMsg('Peephole Optimization: LdrLdr2Ldp (reverse)', p);
+                                taicpu(p).ops := 3;
+                                taicpu(p).loadref(2, taicpu(hp1).oper[1]^.ref^);
+                                taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
+                                taicpu(p).loadreg(0, taicpu(hp1).oper[0]^.reg);
+
+                                asml.Remove(hp1);
+                                hp1.Free;
+                                Result := True;
+                                Exit;
+                              end;
+                          end;
+                      end;
+                  end
+                else
+                  Break;
+
+                { Don't continue looking for LDR/STR pairs if the address register
+                  gets modified }
+                if RegModifiedByInstruction(taicpu(p).oper[1]^.ref^.base, hp1) then
+                  Break;
+
+                hp1_last := hp1;
+              end;
+        end;
+    end;
+
+
+  function TCpuAsmOptimizer.OptPostCMP(var p : tai): boolean;
+    var
+     hp1,hp2: tai;
+    begin
+      Result:=false;
+      if MatchOpType(taicpu(p),top_reg,top_const) and
+        (taicpu(p).oper[1]^.val=0) and
+        GetNextInstruction(p,hp1) and
+        MatchInstruction(hp1,A_B,[PF_None]) and
+        (taicpu(hp1).condition in [C_EQ,C_NE]) then
+        begin
+          case taicpu(hp1).condition of
+            C_NE:
+              hp2:=taicpu.op_reg_sym_ofs(A_CBNZ,taicpu(p).oper[0]^.reg,taicpu(hp1).oper[0]^.ref^.symbol,taicpu(hp1).oper[0]^.ref^.offset);
+            C_EQ:
+              hp2:=taicpu.op_reg_sym_ofs(A_CBZ,taicpu(p).oper[0]^.reg,taicpu(hp1).oper[0]^.ref^.symbol,taicpu(hp1).oper[0]^.ref^.offset);
+            else
+              Internalerror(2019090801);
+          end;
+          taicpu(hp2).fileinfo:=taicpu(hp1).fileinfo;
+          asml.insertbefore(hp2, hp1);
+
+          asml.remove(p);
+          asml.remove(hp1);
+          p.free;
+          hp1.free;
+          p:=hp2;
+          DebugMsg('Peephole CMPB.E/NE2CBNZ/CBZ done', p);
+          Result:=true;
+        end;
+    end;
+
+
   function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
     begin
       result := false;
       if p.typ=ait_instruction then
         begin
           case taicpu(p).opcode of
-            A_LDR:
-              begin
-                Result:=LookForPostindexedPattern(taicpu(p));
-              end;
+            A_LDR,
             A_STR:
-              begin
-                Result:=LookForPostindexedPattern(taicpu(p));
-              end;
+              Result:=LookForPostindexedPattern(p);
+            A_MOV:
+              Result:=OptPass1Mov(p);
+            A_STP:
+              Result:=OptPass1STP(p);
+            A_LSR,
+            A_ROR,
+            A_ASR,
+            A_LSL:
+              Result:=OptPass1Shift(p);
+            A_AND:
+              Result:=OptPass1And(p);
+            A_ADD,
+            A_ADC,
+            A_SUB,
+            A_SBC,
+            A_BIC,
+            A_EOR,
+            A_ORR,
+            A_MUL:
+              Result:=OptPass1Data(p);
+            A_UXTB:
+              Result:=OptPass1UXTB(p);
+            A_UXTH:
+              Result:=OptPass1UXTH(p);
+            A_SXTB:
+              Result:=OptPass1SXTB(p);
+            A_SXTH:
+              Result:=OptPass1SXTH(p);
+//            A_VLDR,
+            A_FMADD,
+            A_FMSUB,
+            A_FNMADD,
+            A_FNMSUB,
+            A_FNMUL,
+            A_FADD,
+            A_FMUL,
+            A_FDIV,
+            A_FSUB,
+            A_FSQRT,
+            A_FNEG,
+            A_FCVT,
+            A_FABS:
+              Result:=OptPass1FData(p);
+            A_FMOV:
+              Result:=OptPass1FMov(p);
+            else
+              ;
+          end;
+        end;
+    end;
+
+
+  function TCpuAsmOptimizer.PeepHoleOptPass2Cpu(var p: tai): boolean;
+    begin
+      result := false;
+      if p.typ=ait_instruction then
+        begin
+          case taicpu(p).opcode of
+            A_LDR,
+            A_STR:
+              Result:=OptPass2LDRSTR(p);
             else
               ;
           end;
@@ -166,8 +827,18 @@ Implementation
     end;
 
 
-  procedure TCpuAsmOptimizer.PeepHoleOptPass2;
+  function TCpuAsmOptimizer.PostPeepHoleOptsCpu(var p: tai): boolean;
     begin
+      result := false;
+      if p.typ=ait_instruction then
+        begin
+          case taicpu(p).opcode of
+            A_CMP:
+              Result:=OptPostCMP(p);
+            else
+              ;
+          end;
+        end;
     end;
 
 begin

+ 483 - 208
compiler/aarch64/cgcpu.pas

@@ -101,6 +101,7 @@ interface
         procedure g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);override;
         procedure g_adjust_self_value(list: TAsmList; procdef: tprocdef; ioffset: tcgint);override;
         procedure g_check_for_fpu_exception(list: TAsmList; force, clear: boolean);override;
+        procedure g_profilecode(list: TAsmList);override;
        private
         function save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
         procedure load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
@@ -123,10 +124,11 @@ interface
 implementation
 
   uses
-    globals,verbose,systems,cutils,
+    globals,verbose,systems,cutils,cclasses,
     paramgr,fmodule,
     symtable,symsym,
     tgobj,
+    ncgutil,
     procinfo,cpupi;
 
 
@@ -180,7 +182,8 @@ implementation
             if ((ref.symbol.typ in [AT_FUNCTION,AT_LABEL]) and
                 (ref.symbol.bind in [AB_LOCAL,AB_GLOBAL])) or
                ((ref.symbol.typ=AT_DATA) and
-                (ref.symbol.bind=AB_LOCAL)) then
+                (ref.symbol.bind=AB_LOCAL)) or
+               (target_info.system=system_aarch64_win64) then
               href.refaddr:=addr_page
             else
               href.refaddr:=addr_gotpage;
@@ -193,7 +196,8 @@ implementation
             if ((ref.symbol.typ in [AT_FUNCTION,AT_LABEL]) and
                 (ref.symbol.bind in [AB_LOCAL,AB_GLOBAL])) or
                ((ref.symbol.typ=AT_DATA) and
-                (ref.symbol.bind=AB_LOCAL)) then
+                (ref.symbol.bind=AB_LOCAL)) or
+               (target_info.system=system_aarch64_win64) then
               begin
                 href.base:=NR_NO;
                 href.refaddr:=addr_pageoffset;
@@ -270,7 +274,7 @@ implementation
               { todo }
               A_LD1,A_LD2,A_LD3,A_LD4,
               A_ST1,A_ST2,A_ST3,A_ST4:
-                internalerror(2014110704);
+                internalerror(2014110702);
               { these don't support base+index }
               A_LDUR,A_STUR,
               A_LDP,A_STP:
@@ -297,7 +301,7 @@ implementation
                     offset may still be too big }
                 end;
               else
-                internalerror(2014110901);
+                internalerror(2014110903);
             end;
           end;
 
@@ -576,102 +580,168 @@ implementation
 
     procedure tcgaarch64.a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg : tregister);
       var
-        preva: tcgint;
         opc: tasmop;
-        shift,maxshift: byte;
+        shift: byte;
         so: tshifterop;
-        reginited: boolean;
-        mask: tcgint;
+        reginited,doinverted: boolean;
+        manipulated_a: tcgint;
+        leftover_a: word;
       begin
-        { if we load a value into a 32 bit register, it is automatically
-          zero-extended to 64 bit }
-        if (hi(a)=0) and
-           (size in [OS_64,OS_S64]) then
-          begin
-            size:=OS_32;
-            reg:=makeregsize(reg,size);
-          end;
-        { values <= 32 bit are stored in a 32 bit register }
-        if not(size in [OS_64,OS_S64]) then
-          a:=cardinal(a);
-
-        if size in [OS_64,OS_S64] then
-          begin
-            mask:=-1;
-            maxshift:=64;
-          end
-        else
-          begin
-            mask:=$ffffffff;
-            maxshift:=32;
-          end;
-        { single movn enough? (to be extended) }
-        shift:=16;
-        preva:=a;
-        repeat
-          if (a shr shift)=(mask shr shift) then
+{$ifdef extdebug}
+        list.concat(tai_comment.Create(strpnew('Generating constant ' + tostr(a))));
+{$endif extdebug}
+        case a of
+          { Small positive number }
+          $0..$FFFF:
+            begin
+              list.concat(taicpu.op_reg_const(A_MOVZ, reg, a));
+              Exit;
+            end;
+          { Small negative number }
+          -65536..-1:
             begin
-              if shift=16 then
-                list.concat(taicpu.op_reg_const(A_MOVN,reg,not(word(preva))))
+              list.concat(taicpu.op_reg_const(A_MOVN, reg, Word(not a)));
+              Exit;
+            end;
+          { Can be represented as a negative number more compactly }
+          $FFFF0000..$FFFFFFFF:
+            begin
+              { if we load a value into a 32 bit register, it is automatically
+                zero-extended to 64 bit }
+              list.concat(taicpu.op_reg_const(A_MOVN, makeregsize(reg,OS_32), Word(not a)));
+              Exit;
+            end;
+          else
+            begin
+
+              if size in [OS_64,OS_S64] then
+                begin
+                  { Check to see if a is a valid shifter constant that can be encoded in ORR as is }
+                  if is_shifter_const(a,size) then
+                    begin
+                      list.concat(taicpu.op_reg_reg_const(A_ORR,reg,makeregsize(NR_XZR,size),a));
+                      Exit;
+                    end;
+
+                  { This determines whether this write can be peformed with an ORR followed by MOVK
+                    by copying the 2nd word to the 4th word for the ORR constant, then overwriting
+                    the 4th word (unless the word is.  The alternative would require 3 instructions }
+                  leftover_a := word(a shr 48);
+                  manipulated_a := (a and $0000FFFFFFFFFFFF);
+
+                  if manipulated_a = $0000FFFFFFFFFFFF then
+                    begin
+                      { This is even better, as we can just use a single MOVN on the last word }
+                      shifterop_reset(so);
+                      so.shiftmode := SM_LSL;
+                      so.shiftimm := 48;
+                      list.concat(taicpu.op_reg_const_shifterop(A_MOVN, reg, word(not leftover_a), so));
+                      Exit;
+                    end;
+
+                  manipulated_a := manipulated_a or (((a shr 16) and $FFFF) shl 48);
+                  { if manipulated_a = a, don't check, because is_shifter_const was already
+                    called for a and it returned False.  Reduces processing time. [Kit] }
+                  if (manipulated_a <> a) and is_shifter_const(manipulated_a, size) then
+                    begin
+                      list.concat(taicpu.op_reg_reg_const(A_ORR, reg, makeregsize(NR_XZR, size), manipulated_a));
+                      if (leftover_a <> 0) then
+                        begin
+                          shifterop_reset(so);
+                          so.shiftmode := SM_LSL;
+                          so.shiftimm := 48;
+                          list.concat(taicpu.op_reg_const_shifterop(A_MOVK, reg, leftover_a, so));
+                        end;
+                      Exit;
+                    end;
+
+                  case a of
+                    { If a is in the given negative range, it can be stored
+                      more efficiently if it is inverted.  }
+                    TCgInt($FFFF000000000000)..-65537:
+                      begin
+                        { NOTE: This excluded range can be more efficiently
+                          stored as the first 16 bits followed by a shifter constant }
+                        case a of
+                          TCgInt($FFFF0000FFFF0000)..TCgInt($FFFF0000FFFFFFFF):
+                            doinverted := False
+                          else
+                            begin
+                              doinverted := True;
+                              a := not a;
+                            end;
+                        end;
+                      end;
+
+                    else
+                      doinverted := False;
+                  end;
+                end
               else
                 begin
-                  shifterop_reset(so);
-                  so.shiftmode:=SM_LSL;
-                  so.shiftimm:=shift-16;
-                  list.concat(taicpu.op_reg_const_shifterop(A_MOVN,reg,not(word(preva)),so));
+                  a:=cardinal(a);
+                  doinverted:=False;
                 end;
-              exit;
             end;
-          { only try the next 16 bits if the current one is all 1 bits, since
-            the movn will set all lower bits to 1 }
-          if word(a shr (shift-16))<>$ffff then
-            break;
-          inc(shift,16);
-        until shift=maxshift;
+        end;
+
         reginited:=false;
         shift:=0;
-        { can be optimized later to use more movn }
+
+        if doinverted then
+          opc:=A_MOVN
+        else
+          opc:=A_MOVZ;
+
         repeat
           { leftover is shifterconst? (don't check if we can represent it just
             as effectively with movz/movk, as this check is expensive) }
-          if ((shift<tcgsize2size[size]*(8 div 2)) and
-              (word(a)<>0) and
-              ((a shr 16)<>0)) and
-             is_shifter_const(a shl shift,size) then
+          if (word(a)<>0) then
             begin
-              if reginited then
-                list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,a shl shift))
+
+              if not doinverted and
+                ((shift<tcgsize2size[size]*(8 div 2)) and
+                  ((a shr 16)<>0)) and
+                 is_shifter_const(a shl shift,size) then
+                begin
+                  if reginited then
+                    list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,a shl shift))
+                  else
+                    list.concat(taicpu.op_reg_reg_const(A_ORR,reg,makeregsize(NR_XZR,size),a shl shift));
+
+                  exit;
+                end;
+
+              { set all 16 bit parts <> 0 }
+              if shift=0 then
+                begin
+                  list.concat(taicpu.op_reg_const(opc,reg,word(a)));
+                  reginited:=true;
+                end
               else
-                list.concat(taicpu.op_reg_reg_const(A_ORR,reg,makeregsize(NR_XZR,size),a shl shift));
-              exit;
+                begin
+                  shifterop_reset(so);
+                  so.shiftmode:=SM_LSL;
+                  so.shiftimm:=shift;
+                  if not reginited then
+                    begin
+                      list.concat(taicpu.op_reg_const_shifterop(opc,reg,word(a),so));
+                      reginited:=true;
+                    end
+                  else
+                    begin
+                      if doinverted then
+                        list.concat(taicpu.op_reg_const_shifterop(A_MOVK,reg,word(not a),so))
+                      else
+                        list.concat(taicpu.op_reg_const_shifterop(A_MOVK,reg,word(a),so));
+                    end;
+                end;
             end;
-          { set all 16 bit parts <> 0 }
-          if (word(a)<>0) or
-             ((shift=0) and
-              (a=0)) then
-            if shift=0 then
-              begin
-                list.concat(taicpu.op_reg_const(A_MOVZ,reg,word(a)));
-                reginited:=true;
-              end
-            else
-              begin
-                shifterop_reset(so);
-                so.shiftmode:=SM_LSL;
-                so.shiftimm:=shift;
-                if not reginited then
-                  begin
-                    opc:=A_MOVZ;
-                    reginited:=true;
-                  end
-                else
-                  opc:=A_MOVK;
-                list.concat(taicpu.op_reg_const_shifterop(opc,reg,word(a),so));
-              end;
-            preva:=a;
-            a:=a shr 16;
-           inc(shift,16);
-        until word(preva)=preva;
+
+          a:=a shr 16;
+          inc(shift,16);
+        until a = 0;
+
         if not reginited then
           internalerror(2014102702);
       end;
@@ -680,15 +750,32 @@ implementation
     procedure tcgaarch64.a_load_const_ref(list: TAsmList; size: tcgsize; a: tcgint; const ref: treference);
       var
         reg: tregister;
+        href: treference;
+        i: Integer;
       begin
         { use the zero register if possible }
         if a=0 then
           begin
-            if size in [OS_64,OS_S64] then
-              reg:=NR_XZR
+            href:=ref;
+            inc(href.offset,tcgsize2size[size]-1);
+            if (tcgsize2size[size]>1) and (ref.alignment=1) and (simple_ref_type(A_STUR,OS_8,PF_None,ref)=sr_simple) and
+              (simple_ref_type(A_STUR,OS_8,PF_None,href)=sr_simple) then
+              begin
+                href:=ref;
+                for i:=0 to tcgsize2size[size]-1 do
+                  begin
+                    a_load_const_ref(list,OS_8,0,href);
+                    inc(href.offset);
+                  end;
+              end
             else
-              reg:=NR_WZR;
-            a_load_reg_ref(list,size,size,reg,ref);
+              begin
+                if size in [OS_64,OS_S64] then
+                  reg:=NR_XZR
+                else
+                  reg:=NR_WZR;
+                a_load_reg_ref(list,size,size,reg,ref);
+              end;
           end
         else
           inherited;
@@ -906,13 +993,13 @@ implementation
           begin
             case tosize of
               OS_8:
-                list.concat(setoppostfix(taicpu.op_reg_reg(A_UXT,reg2,makeregsize(reg1,OS_32)),PF_B));
+                list.concat(taicpu.op_reg_reg(A_UXTB,reg2,makeregsize(reg1,OS_32)));
               OS_16:
-                list.concat(setoppostfix(taicpu.op_reg_reg(A_UXT,reg2,makeregsize(reg1,OS_32)),PF_H));
+                list.concat(taicpu.op_reg_reg(A_UXTH,reg2,makeregsize(reg1,OS_32)));
               OS_S8:
-                list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_B));
+                list.concat(taicpu.op_reg_reg(A_SXTB,reg2,makeregsize(reg1,OS_32)));
               OS_S16:
-                list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_H));
+                list.concat(taicpu.op_reg_reg(A_SXTH,reg2,makeregsize(reg1,OS_32)));
               { while "mov wN, wM" automatically inserts a zero-extension and
                 hence we could encode a 64->32 bit move like that, the problem
                 is that we then can't distinguish 64->32 from 32->32 moves, and
@@ -927,7 +1014,7 @@ implementation
                 list.concat(taicpu.op_reg_reg_const_const(A_UBFIZ,makeregsize(reg2,OS_64),makeregsize(reg1,OS_64),0,32));
               OS_64,
               OS_S64:
-                list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_W));
+                list.concat(taicpu.op_reg_reg(A_SXTW,reg2,makeregsize(reg1,OS_32)));
               else
                 internalerror(2002090901);
             end;
@@ -1024,6 +1111,7 @@ implementation
             { Notify the register allocator that we have written a move
               instruction so it can try to eliminate it. }
             add_move_instruction(instr);
+            { FMOV cannot generate a floating point exception }
           end
         else
           begin
@@ -1031,9 +1119,9 @@ implementation
                (reg_cgsize(reg2)<>tosize) then
               internalerror(2014110913);
             instr:=taicpu.op_reg_reg(A_FCVT,reg2,reg1);
+            maybe_check_for_fpu_exception(list);
           end;
         list.Concat(instr);
-        maybe_check_for_fpu_exception(list);
       end;
 
 
@@ -1077,10 +1165,17 @@ implementation
        begin
          if not shufflescalar(shuffle) then
            internalerror(2014122801);
-         if not(tcgsize2size[fromsize] in [4,8]) or
-            (tcgsize2size[fromsize]<>tcgsize2size[tosize]) then
+         if tcgsize2size[fromsize]<>tcgsize2size[tosize] then
            internalerror(2014122803);
-         list.concat(taicpu.op_reg_reg(A_INS,mmreg,intreg));
+         case tcgsize2size[tosize] of
+           4:
+             setsubreg(mmreg,R_SUBMMS);
+           8:
+             setsubreg(mmreg,R_SUBMMD);
+           else
+             internalerror(2020101310);
+         end;
+         list.concat(taicpu.op_indexedreg_reg(A_INS,mmreg,0,intreg));
        end;
 
 
@@ -1090,14 +1185,21 @@ implementation
        begin
          if not shufflescalar(shuffle) then
            internalerror(2014122802);
-         if not(tcgsize2size[fromsize] in [4,8]) or
-            (tcgsize2size[fromsize]>tcgsize2size[tosize]) then
+         if tcgsize2size[fromsize]>tcgsize2size[tosize] then
            internalerror(2014122804);
+         case tcgsize2size[fromsize] of
+           4:
+             setsubreg(mmreg,R_SUBMMS);
+           8:
+             setsubreg(mmreg,R_SUBMMD);
+           else
+             internalerror(2020101311);
+           end;
          if tcgsize2size[fromsize]<tcgsize2size[tosize] then
            r:=makeregsize(intreg,fromsize)
          else
            r:=intreg;
-         list.concat(taicpu.op_reg_reg(A_UMOV,r,mmreg));
+         list.concat(taicpu.op_reg_indexedreg(A_UMOV,r,mmreg,0));
        end;
 
 
@@ -1107,17 +1209,24 @@ implementation
           { "xor Vx,Vx" is used to initialize global regvars to 0 }
           OP_XOR:
             begin
-              if (src<>dst) or
+              if shuffle=nil then
+                begin
+                  dst:=newreg(R_MMREGISTER,getsupreg(dst),R_SUBMM16B);
+                  src:=newreg(R_MMREGISTER,getsupreg(src),R_SUBMM16B);
+                  list.concat(taicpu.op_reg_reg_reg(A_EOR,dst,dst,src))
+                end
+              else if (src<>dst) or
                  (reg_cgsize(src)<>size) or
                  assigned(shuffle) then
-                internalerror(2015011401);
-              case size of
-                OS_F32,
-                OS_F64:
-                  list.concat(taicpu.op_reg_const(A_MOVI,makeregsize(dst,OS_F64),0));
-                else
-                  internalerror(2015011402);
-              end;
+                internalerror(2015011401)
+              else
+                case size of
+                  OS_F32,
+                  OS_F64:
+                    list.concat(taicpu.op_reg_const(A_MOVI,makeregsize(dst,OS_F64),0));
+                  else
+                    internalerror(2015011402);
+                end;
             end
           else
             internalerror(2015011403);
@@ -1155,7 +1264,7 @@ implementation
         list.Concat(taicpu.op_reg_reg_reg_cond(A_CSINV,dst,dst,makeregsize(NR_XZR,dstsize),C_NE));
         { mask the -1 to 255 if src was 0 (anyone find a two-instruction
           branch-free version? All of mine are 3...) }
-        list.Concat(setoppostfix(taicpu.op_reg_reg(A_UXT,makeregsize(dst,OS_32),makeregsize(dst,OS_32)),PF_B));
+        list.Concat(taicpu.op_reg_reg(A_UXTB,makeregsize(dst,OS_32),makeregsize(dst,OS_32)));
       end;
 
 
@@ -1575,27 +1684,74 @@ implementation
         ref: treference;
         sr: tsuperregister;
         pairreg: tregister;
+        sehreg,sehregp : TAsmSehDirective;
       begin
         result:=0;
         reference_reset_base(ref,NR_SP,-16,ctempposinvalid,16,[]);
         ref.addressmode:=AM_PREINDEXED;
         pairreg:=NR_NO;
-        { store all used registers pairwise }
-        for sr:=lowsr to highsr do
-          if sr in rg[rt].used_in_proc then
-            if pairreg=NR_NO then
-              pairreg:=newreg(rt,sr,sub)
+        { for SEH on Win64 we can only store consecutive register pairs, others
+          need to be stored with STR }
+        if target_info.system=system_aarch64_win64 then
+          begin
+            if rt=R_INTREGISTER then
+              begin
+                sehreg:=ash_savereg_x;
+                sehregp:=ash_saveregp_x;
+              end
+            else if rt=R_MMREGISTER then
+              begin
+                sehreg:=ash_savefreg_x;
+                sehregp:=ash_savefregp_x;
+              end
             else
+              internalerror(2020041304);
+            for sr:=lowsr to highsr do
+              if sr in rg[rt].used_in_proc then
+                if pairreg=NR_NO then
+                  pairreg:=newreg(rt,sr,sub)
+                else
+                  begin
+                    inc(result,16);
+                    if getsupreg(pairreg)=sr-1 then
+                      begin
+                        list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,newreg(rt,sr,sub),ref));
+                        list.concat(cai_seh_directive.create_reg_offset(sehregp,pairreg,16));
+                        pairreg:=NR_NO;
+                      end
+                    else
+                      begin
+                        list.concat(taicpu.op_reg_ref(A_STR,pairreg,ref));
+                        list.concat(cai_seh_directive.create_reg_offset(sehreg,pairreg,16));
+                        pairreg:=newreg(rt,sr,sub);
+                      end;
+                  end;
+            if pairreg<>NR_NO then
               begin
                 inc(result,16);
-                list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,newreg(rt,sr,sub),ref));
-                pairreg:=NR_NO
+                list.concat(taicpu.op_reg_ref(A_STR,pairreg,ref));
+                list.concat(cai_seh_directive.create_reg_offset(sehreg,pairreg,16));
               end;
-        { one left -> store twice (stack must be 16 bytes aligned) }
-        if pairreg<>NR_NO then
+          end
+        else
           begin
-            list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,pairreg,ref));
-            inc(result,16);
+            { store all used registers pairwise }
+            for sr:=lowsr to highsr do
+              if sr in rg[rt].used_in_proc then
+                if pairreg=NR_NO then
+                  pairreg:=newreg(rt,sr,sub)
+                else
+                  begin
+                    inc(result,16);
+                    list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,newreg(rt,sr,sub),ref));
+                    pairreg:=NR_NO
+                  end;
+            { one left -> store twice (stack must be 16 bytes aligned) }
+            if pairreg<>NR_NO then
+              begin
+                list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,pairreg,ref));
+                inc(result,16);
+              end;
           end;
       end;
 
@@ -1616,69 +1772,124 @@ implementation
 
     procedure tcgaarch64.g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);
       var
+        hitem: tlinkedlistitem;
+        seh_proc: tai_seh_directive;
+        templist: TAsmList;
+        suppress_endprologue: boolean;
         ref: treference;
         totalstackframesize: longint;
       begin
-        if nostackframe then
-          exit;
-        { stack pointer has to be aligned to 16 bytes at all times }
-        localsize:=align(localsize,16);
+        hitem:=list.last;
+        { pi_has_unwind_info may already be set at this point if there are
+          SEH directives in assembler body. In this case, .seh_endprologue
+          is expected to be one of those directives, and not generated here. }
+        suppress_endprologue:=(pi_has_unwind_info in current_procinfo.flags);
 
-        { save stack pointer and return address }
-        reference_reset_base(ref,NR_SP,-16,ctempposinvalid,16,[]);
-        ref.addressmode:=AM_PREINDEXED;
-        list.concat(taicpu.op_reg_reg_ref(A_STP,NR_FP,NR_LR,ref));
-        { initialise frame pointer }
-        a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_SP,NR_FP);
-
-        totalstackframesize:=localsize;
-        { save modified integer registers }
-        inc(totalstackframesize,
-          save_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE));
-        { only the lower 64 bits of the modified vector registers need to be
-          saved; if the caller needs the upper 64 bits, it has to save them
-          itself }
-        inc(totalstackframesize,
-          save_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD));
-
-        { allocate stack space }
-        if localsize<>0 then
+        if not nostackframe then
           begin
+            { stack pointer has to be aligned to 16 bytes at all times }
             localsize:=align(localsize,16);
-            current_procinfo.final_localsize:=localsize;
-            handle_reg_imm12_reg(list,A_SUB,OS_ADDR,NR_SP,localsize,NR_SP,NR_IP0,false,true);
+
+            if target_info.system=system_aarch64_win64 then
+              include(current_procinfo.flags,pi_has_unwind_info);
+
+            { save stack pointer and return address }
+            reference_reset_base(ref,NR_SP,-16,ctempposinvalid,16,[]);
+            ref.addressmode:=AM_PREINDEXED;
+            list.concat(taicpu.op_reg_reg_ref(A_STP,NR_FP,NR_LR,ref));
+            if target_info.system=system_aarch64_win64 then
+              list.concat(cai_seh_directive.create_offset(ash_savefplr_x,16));
+            { initialise frame pointer }
+            if current_procinfo.procdef.proctypeoption<>potype_exceptfilter then
+              begin
+                a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_SP,NR_FP);
+                if target_info.system=system_aarch64_win64 then
+                  list.concat(cai_seh_directive.create(ash_setfp));
+              end
+            else
+              begin
+                gen_load_frame_for_exceptfilter(list);
+                localsize:=current_procinfo.maxpushedparasize;
+              end;
+
+            totalstackframesize:=localsize;
+            { save modified integer registers }
+            inc(totalstackframesize,
+              save_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE));
+            { only the lower 64 bits of the modified vector registers need to be
+              saved; if the caller needs the upper 64 bits, it has to save them
+              itself }
+            inc(totalstackframesize,
+              save_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD));
+
+            { allocate stack space }
+            if localsize<>0 then
+              begin
+                localsize:=align(localsize,16);
+                current_procinfo.final_localsize:=localsize;
+                handle_reg_imm12_reg(list,A_SUB,OS_ADDR,NR_SP,localsize,NR_SP,NR_IP0,false,true);
+                if target_info.system=system_aarch64_win64 then
+                  list.concat(cai_seh_directive.create_offset(ash_stackalloc,localsize));
+              end;
+            { By default, we use the frame pointer to access parameters passed via
+              the stack and the stack pointer to address local variables and temps
+              because
+               a) we can use bigger positive than negative offsets (so accessing
+                  locals via negative offsets from the frame pointer would be less
+                  efficient)
+               b) we don't know the local size while generating the code, so
+                  accessing the parameters via the stack pointer is not possible
+                  without copying them
+              The problem with this is the get_frame() intrinsic:
+               a) it must return the same value as what we pass as parentfp
+                  parameter, since that's how it's used in the TP-style objects unit
+               b) its return value must usable to access all local data from a
+                  routine (locals and parameters), since it's all the nested
+                  routines have access to
+               c) its return value must be usable to construct a backtrace, as it's
+                  also used by the exception handling routines
+
+              The solution we use here, based on something similar that's done in
+              the MIPS port, is to generate all accesses to locals in the routine
+              itself SP-relative, and then after the code is generated and the local
+              size is known (namely, here), we change all SP-relative variables/
+              parameters into FP-relative ones. This means that they'll be accessed
+              less efficiently from nested routines, but those accesses are indirect
+              anyway and at least this way they can be accessed at all
+            }
+            if current_procinfo.has_nestedprocs or
+               (
+                 (target_info.system=system_aarch64_win64) and
+                 (current_procinfo.flags*[pi_has_implicit_finally,pi_needs_implicit_finally,pi_uses_exceptions]<>[])
+               ) then
+              begin
+                current_procinfo.procdef.localst.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
+                current_procinfo.procdef.parast.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
+              end;
           end;
-        { By default, we use the frame pointer to access parameters passed via
-          the stack and the stack pointer to address local variables and temps
-          because
-           a) we can use bigger positive than negative offsets (so accessing
-              locals via negative offsets from the frame pointer would be less
-              efficient)
-           b) we don't know the local size while generating the code, so
-              accessing the parameters via the stack pointer is not possible
-              without copying them
-          The problem with this is the get_frame() intrinsic:
-           a) it must return the same value as what we pass as parentfp
-              parameter, since that's how it's used in the TP-style objects unit
-           b) its return value must usable to access all local data from a
-              routine (locals and parameters), since it's all the nested
-              routines have access to
-           c) its return value must be usable to construct a backtrace, as it's
-              also used by the exception handling routines
-
-          The solution we use here, based on something similar that's done in
-          the MIPS port, is to generate all accesses to locals in the routine
-          itself SP-relative, and then after the code is generated and the local
-          size is known (namely, here), we change all SP-relative variables/
-          parameters into FP-relative ones. This means that they'll be accessed
-          less efficiently from nested routines, but those accesses are indirect
-          anyway and at least this way they can be accessed at all
-        }
-        if current_procinfo.has_nestedprocs then
+
+        if not (pi_has_unwind_info in current_procinfo.flags) then
+          exit;
+
+        { Generate unwind data for aarch64-win64 }
+        seh_proc:=cai_seh_directive.create_name(ash_proc,current_procinfo.procdef.mangledname);
+        if assigned(hitem) then
+          list.insertafter(seh_proc,hitem)
+        else
+          list.insert(seh_proc);
+        { the directive creates another section }
+        inc(list.section_count);
+        templist:=TAsmList.Create;
+
+        if not suppress_endprologue then
           begin
-            current_procinfo.procdef.localst.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
-            current_procinfo.procdef.parast.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
+            templist.concat(cai_seh_directive.create(ash_endprologue));
           end;
+        if assigned(current_procinfo.endprologue_ai) then
+          current_procinfo.aktproccode.insertlistafter(current_procinfo.endprologue_ai,templist)
+        else
+          list.concatlist(templist);
+        templist.free;
       end;
 
 
@@ -1699,35 +1910,76 @@ implementation
         ref: treference;
         sr, highestsetsr: tsuperregister;
         pairreg: tregister;
+        i,
         regcount: longint;
+        aiarr : array of tai;
       begin
         reference_reset_base(ref,NR_SP,16,ctempposinvalid,16,[]);
         ref.addressmode:=AM_POSTINDEXED;
-        { highest reg stored twice? }
         regcount:=0;
-        highestsetsr:=RS_NO;
-        for sr:=lowsr to highsr do
-          if sr in rg[rt].used_in_proc then
-            begin
-              inc(regcount);
-              highestsetsr:=sr;
-            end;
-        if odd(regcount) then
+        { due to SEH on Win64 we can only load consecutive registers and single
+          ones are done using LDR, so we need to handle this differently there }
+        if target_info.system=system_aarch64_win64 then
           begin
-            list.concat(taicpu.op_reg_ref(A_LDR,newreg(rt,highestsetsr,sub),ref));
-            highestsetsr:=pred(highestsetsr);
-          end;
-        { load all (other) used registers pairwise }
-        pairreg:=NR_NO;
-        for sr:=highestsetsr downto lowsr do
-          if sr in rg[rt].used_in_proc then
-            if pairreg=NR_NO then
-              pairreg:=newreg(rt,sr,sub)
-            else
+            setlength(aiarr,highsr-lowsr+1);
+            pairreg:=NR_NO;
+            for sr:=lowsr to highsr do
+              if sr in rg[rt].used_in_proc then
+                begin
+                  if pairreg=NR_NO then
+                    pairreg:=newreg(rt,sr,sub)
+                  else
+                    begin
+                      if getsupreg(pairreg)=sr-1 then
+                        begin
+                          aiarr[regcount]:=taicpu.op_reg_reg_ref(A_LDP,pairreg,newreg(rt,sr,sub),ref);
+                          inc(regcount);
+                          pairreg:=NR_NO;
+                        end
+                      else
+                        begin
+                          aiarr[regcount]:=taicpu.op_reg_ref(A_LDR,pairreg,ref);
+                          inc(regcount);
+                          pairreg:=newreg(rt,sr,sub);
+                        end;
+                    end;
+                end;
+            if pairreg<>NR_NO then
               begin
-                list.concat(taicpu.op_reg_reg_ref(A_LDP,newreg(rt,sr,sub),pairreg,ref));
-                pairreg:=NR_NO
+                aiarr[regcount]:=taicpu.op_reg_ref(A_LDR,pairreg,ref);
+                inc(regcount);
+                pairreg:=NR_NO;
               end;
+            for i:=regcount-1 downto 0 do
+              list.concat(aiarr[i]);
+          end
+        else
+          begin
+            { highest reg stored twice? }
+            highestsetsr:=RS_NO;
+            for sr:=lowsr to highsr do
+              if sr in rg[rt].used_in_proc then
+                begin
+                  inc(regcount);
+                  highestsetsr:=sr;
+                end;
+            if odd(regcount) then
+              begin
+                list.concat(taicpu.op_reg_ref(A_LDR,newreg(rt,highestsetsr,sub),ref));
+                highestsetsr:=pred(highestsetsr);
+              end;
+            { load all (other) used registers pairwise }
+            pairreg:=NR_NO;
+            for sr:=highestsetsr downto lowsr do
+              if sr in rg[rt].used_in_proc then
+                if pairreg=NR_NO then
+                  pairreg:=newreg(rt,sr,sub)
+                else
+                  begin
+                    list.concat(taicpu.op_reg_reg_ref(A_LDP,newreg(rt,sr,sub),pairreg,ref));
+                    pairreg:=NR_NO
+                  end;
+          end;
         { There can't be any register left }
         if pairreg<>NR_NO then
           internalerror(2014112602);
@@ -1741,7 +1993,14 @@ implementation
         regsstored: boolean;
         sr: tsuperregister;
       begin
-        if not nostackframe then
+        if not(nostackframe) and
+          { we do not need an exit stack frame when we never return
+
+            * the final ret is left so the peephole optimizer can easily do call/ret -> jmp or call conversions
+            * the entry stack frame must be normally generated because the subroutine could be still left by
+              an exception and then the unwinding code might need to restore the registers stored by the entry code
+          }
+          not(po_noreturn in current_procinfo.procdef.procoptions) then
           begin
             { if no registers have been stored, we don't have to subtract the
               allocated temp space from the stack pointer }
@@ -1779,6 +2038,11 @@ implementation
 
         { return }
         list.concat(taicpu.op_none(A_RET));
+        if (pi_has_unwind_info in current_procinfo.flags) then
+          begin
+            tcpuprocinfo(current_procinfo).dump_scopes(list);
+            list.concat(cai_seh_directive.create(ash_endproc));
+          end;
       end;
 
 
@@ -1799,9 +2063,9 @@ implementation
         paraloc1.init;
         paraloc2.init;
         paraloc3.init;
-        paramanager.getintparaloc(list,pd,1,paraloc1);
-        paramanager.getintparaloc(list,pd,2,paraloc2);
-        paramanager.getintparaloc(list,pd,3,paraloc3);
+        paramanager.getcgtempparaloc(list,pd,1,paraloc1);
+        paramanager.getcgtempparaloc(list,pd,2,paraloc2);
+        paramanager.getcgtempparaloc(list,pd,3,paraloc3);
         a_load_const_cgpara(list,OS_SINT,len,paraloc3);
         a_loadaddr_ref_cgpara(list,dest,paraloc2);
         a_loadaddr_ref_cgpara(list,source,paraloc1);
@@ -2260,7 +2524,7 @@ implementation
 
     procedure tcgaarch64.g_check_for_fpu_exception(list: TAsmList;force,clear : boolean);
       var
-        r : TRegister;
+        r, tmpreg: TRegister;
         ai: taicpu;
         l1,l2: TAsmLabel;
       begin
@@ -2269,18 +2533,17 @@ implementation
             (force or current_procinfo.FPUExceptionCheckNeeded)) then
           begin
             r:=getintregister(list,OS_INT);
+            tmpreg:=getintregister(list,OS_INT);
             list.concat(taicpu.op_reg_reg(A_MRS,r,NR_FPSR));
-            list.concat(taicpu.op_reg_const(A_TST,r,$1f));
+            list.concat(taicpu.op_reg_reg_const(A_AND,tmpreg,r,$1f));
             current_asmdata.getjumplabel(l1);
             current_asmdata.getjumplabel(l2);
-            ai:=taicpu.op_sym(A_B,l1);
+            ai:=taicpu.op_reg_sym_ofs(A_CBNZ,tmpreg,l1,0);
             ai.is_jmp:=true;
-            ai.condition:=C_NE;
             list.concat(ai);
-            list.concat(taicpu.op_reg_const(A_TST,r,$80));
-            ai:=taicpu.op_sym(A_B,l2);
+            list.concat(taicpu.op_reg_reg_const(A_AND,tmpreg,r,$80));
+            ai:=taicpu.op_reg_sym_ofs(A_CBZ,tmpreg,l2,0);
             ai.is_jmp:=true;
-            ai.condition:=C_EQ;
             list.concat(ai);
             a_label(list,l1);
             alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
@@ -2293,6 +2556,18 @@ implementation
       end;
 
 
+    procedure tcgaarch64.g_profilecode(list : TAsmList);
+      begin
+        if target_info.system = system_aarch64_linux then
+          begin
+            list.concat(taicpu.op_reg_reg(A_MOV,NR_X0,NR_X30));
+            a_call_name(list,'_mcount',false);
+          end
+        else
+          internalerror(2020021901);
+      end;
+
+
     procedure create_codegen;
       begin
         cg:=tcgaarch64.Create;

+ 82 - 7
compiler/aarch64/cpubase.pas

@@ -48,8 +48,6 @@ unit cpubase;
     type
       TAsmOp= {$i a64op.inc}
 
-      TAsmOps = set of TAsmOp;
-
       { This should define the array of instructions as string }
       op2strtable=array[tasmop] of string[11];
 
@@ -58,6 +56,13 @@ unit cpubase;
       firstop = low(tasmop);
       { Last value of opcode enumeration  }
       lastop  = high(tasmop);
+      { Last value of opcode for TCommonAsmOps set below  }
+      LastCommonAsmOp = A_MOV;
+
+    type
+      { See comment for this type in arm/cpubase.pas }
+      TCommonAsmOps = Set of A_None .. LastCommonAsmOp;
+
 
 {*****************************************************************************
                                   Registers
@@ -73,6 +78,7 @@ unit cpubase;
 
       RS_IP0 = RS_X16;
       RS_IP1 = RS_X17;
+      RS_XR = RS_X8;
 
       R_SUBWHOLE = R_SUBQ;
 
@@ -81,6 +87,7 @@ unit cpubase;
 
       NR_IP0 = NR_X16;
       NR_IP1 = NR_X17;
+      NR_XR = NR_X8;
 
       { Integer Super registers first and last }
       first_int_supreg = RS_X0;
@@ -106,7 +113,7 @@ unit cpubase;
       std_param_align = 8;
 
       { TODO: Calculate bsstart}
-      regnumber_count_bsstart = 128;
+      regnumber_count_bsstart = 512;
 
       regnumber_table : array[tregisterindex] of tregister = (
         {$i ra64num.inc}
@@ -123,9 +130,6 @@ unit cpubase;
       VOLATILE_INTREGISTERS = [RS_X0..RS_X18,RS_X30];
       VOLATILE_MMREGISTERS =  [RS_D0..RS_D7,RS_D16..RS_D31];
 
-    type
-      totherregisterset = set of tregisterindex;
-
 {*****************************************************************************
                           Instruction post fixes
 *****************************************************************************}
@@ -202,7 +206,7 @@ unit cpubase;
       tshiftmode = (SM_None,
                     { shifted register instructions. LSL can also be used for
                       the index register of certain loads/stores }
-                    SM_LSL,SM_LSR,SM_ASR,
+                    SM_LSL,SM_LSR,SM_ASR,SM_ROR,
                     { extended register instructions: zero/sign extension +
                         optional shift (interpreted as LSL after extension)
                        -- the index register of certain loads/stores can be
@@ -324,6 +328,9 @@ unit cpubase;
     function inverse_cond(const c: TAsmCond): TAsmCond; {$ifdef USEINLINE}inline;{$endif USEINLINE}
     function conditions_equal(const c1, c2: TAsmCond): boolean; {$ifdef USEINLINE}inline;{$endif USEINLINE}
 
+    { Checks if Subset is a subset of c (e.g. "less than" is a subset of "less than or equal" }
+    function condition_in(const Subset, c: TAsmCond): Boolean;
+
     procedure shifterop_reset(var so : tshifterop); {$ifdef USEINLINE}inline;{$endif USEINLINE}
 
     function dwarf_reg(r:tregister):shortint;
@@ -331,6 +338,7 @@ unit cpubase;
     function eh_return_data_regno(nr: longint): longint;
 
     function is_shifter_const(d: aint; size: tcgsize): boolean;
+    function IsFloatImmediate(ft : tfloattype;value : bestreal) : boolean;
 
 
   implementation
@@ -403,6 +411,23 @@ unit cpubase;
                   result:=OS_F64;
                 R_SUBMMS:
                   result:=OS_F32;
+                { always use OS_M128, because these could be the top or bottom bytes (or middle in some cases) }
+                R_SUBMM8B:
+                  result:=OS_M128;
+                R_SUBMM16B:
+                  result:=OS_M128;
+                R_SUBMM4H:
+                  result:=OS_M128;
+                R_SUBMM8H:
+                  result:=OS_M128;
+                R_SUBMM2S:
+                  result:=OS_M128;
+                R_SUBMM4S:
+                  result:=OS_M128;
+                R_SUBMM1D:
+                  result:=OS_M128;
+                R_SUBMM2D:
+                  result:=OS_M128;
                 R_SUBMMWHOLE:
                   result:=OS_M128;
                 else
@@ -490,6 +515,26 @@ unit cpubase;
       end;
 
 
+    { Checks if Subset is a subset of c (e.g. "less than" is a subset of "less than or equal" }
+    function condition_in(const Subset, c: TAsmCond): Boolean;
+      begin
+        Result := (c = C_None) or conditions_equal(Subset, c);
+
+        { Please update as necessary. [Kit] }
+        if not Result then
+          case Subset of
+            C_EQ:
+              Result := (c in [C_GE, C_LE]);
+            C_LT:
+              Result := (c in [C_LE]);
+            C_GT:
+              Result := (c in [C_GE]);
+            else
+              Result := False;
+          end;
+      end;
+
+
     function dwarf_reg(r:tregister):shortint;
       begin
         result:=regdwarf_table[findreg_by_number(r)];
@@ -619,4 +664,34 @@ unit cpubase;
         result:=-1;
     end;
 
+
+  function IsFloatImmediate(ft : tfloattype;value : bestreal) : boolean;
+    var
+      singlerec : tcompsinglerec;
+      doublerec : tcompdoublerec;
+    begin
+      Result:=false;
+      case ft of
+        s32real:
+          begin
+            singlerec.value:=value;
+            singlerec:=tcompsinglerec(NtoLE(DWord(singlerec)));
+            Result:=(singlerec.bytes[0]=0) and (singlerec.bytes[1]=0) and ((singlerec.bytes[2] and 7)=0)  and
+              (((singlerec.bytes[3] and $7e)=$40) or ((singlerec.bytes[3] and $7e)=$3e));
+          end;
+        s64real:
+          begin
+            doublerec.value:=value;
+            doublerec:=tcompdoublerec(NtoLE(QWord(doublerec)));
+            Result:=(doublerec.bytes[0]=0) and (doublerec.bytes[1]=0) and (doublerec.bytes[2]=0) and
+                    (doublerec.bytes[3]=0) and (doublerec.bytes[4]=0) and (doublerec.bytes[5]=0) and
+                    ((((doublerec.bytes[6] and $c0)=$0) and ((doublerec.bytes[7] and $7f)=$40)) or
+                     (((doublerec.bytes[6] and $c0)=$c0) and ((doublerec.bytes[7] and $7f)=$3f)));
+          end;
+        else
+          ;
+      end;
+    end;
+
+
 end.

+ 2 - 2
compiler/aarch64/cpuinfo.pas

@@ -112,12 +112,12 @@ Const
                                  genericlevel3optimizerswitches-
                                  { no need to write info about those }
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
-                                 [cs_opt_regvar,cs_opt_loopunroll,cs_opt_tailrecursion,
+                                 [{$ifndef llvm}cs_opt_regvar,{$endif}cs_opt_loopunroll,cs_opt_tailrecursion,
 				  cs_opt_nodecse,cs_opt_reorder_fields,cs_opt_fastmath];
 
    level1optimizerswitches = genericlevel1optimizerswitches;
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches +
-     [cs_opt_regvar,cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse];
+     [{$ifndef llvm}cs_opt_regvar,{$endif}cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse];
    level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [{,cs_opt_loopunroll}];
    level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [];
 

+ 1 - 1
compiler/aarch64/cpunode.pas

@@ -35,7 +35,7 @@ implementation
     symcpu,
     aasmdef,
 {$ifndef llvm}
-    ncpuadd,ncpumat,ncpumem,ncpuinl,ncpucnv,ncpuset
+    ncpuadd,ncpumat,ncpumem,ncpuinl,ncpucnv,ncpuset,ncpucon,ncpuflw
 {$else llvm}
     llvmnode
 {$endif llvm}

+ 50 - 37
compiler/aarch64/cpupara.pas

@@ -266,7 +266,7 @@ unit cpupara;
                     size:=OS_ADDR;
                     def:=hp.paraloc[side].def;
                     loc:=LOC_REGISTER;
-                    register:=NR_X8;
+                    register:=NR_XR;
                   end
               end
             else
@@ -280,6 +280,7 @@ unit cpupara;
     function  tcpuparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;
       var
         retcgsize: tcgsize;
+        otherside: tcallercallee;
       begin
          if set_common_funcretloc_info(p,forcetempdef,retcgsize,result) then
            exit;
@@ -287,11 +288,18 @@ unit cpupara;
          { in this case, it must be returned in registers as if it were passed
            as the first parameter }
          init_para_alloc_values;
-         alloc_para(result,p,vs_value,side,result.def,false,false);
+         { if we're on the callee side, filling the result location is actually the "callerside"
+          as far passing it as a parameter value is concerned }
+         if side=callerside then
+           otherside:=calleeside
+         else
+           otherside:=callerside;
+         alloc_para(result,p,vs_value,otherside,result.def,false,false);
          { sanity check (LOC_VOID for empty records) }
          if not assigned(result.location) or
             not(result.location^.loc in [LOC_REGISTER,LOC_MMREGISTER,LOC_VOID]) then
            internalerror(2014113001);
+{$ifndef llvm}
          {
            According to ARM64 ABI: "If the size of the argument is less than 8 bytes then
            the size of the argument is set to 8 bytes. The effect is as if the argument
@@ -310,6 +318,7 @@ unit cpupara;
              result.location^.size:=OS_64;
              result.location^.def:=u64inttype;
            end;
+{$endif}
       end;
 
 
@@ -340,7 +349,7 @@ unit cpupara;
         paracgsize, locsize: tcgsize;
         firstparaloc: boolean;
       begin
-        result.reset;
+        result.init;
 
         { currently only support C-style array of const,
           there should be no location assigned to the vararg array itself }
@@ -374,7 +383,7 @@ unit cpupara;
             else
               paralen:=tcgsize2size[def_cgsize(paradef)];
             loc:=getparaloc(p.proccalloption,paradef);
-            if (paradef.typ in [objectdef,arraydef,recorddef]) and
+            if (paradef.typ in [objectdef,arraydef,recorddef,setdef]) and
                not is_special_array(paradef) and
                (varspez in [vs_value,vs_const]) then
               paracgsize:=int_cgsize(paralen)
@@ -486,47 +495,45 @@ unit cpupara;
              end
            else
              begin
-{$ifndef llvm}
                paraloc^.size:=locsize;
                paraloc^.def:=locdef;
-{$else llvm}
-               case locsize of
-                 OS_8,OS_16,OS_32:
-                   begin
-                     paraloc^.size:=OS_64;
-                     paraloc^.def:=u64inttype;
-                   end;
-                 OS_S8,OS_S16,OS_S32:
-                   begin
-                     paraloc^.size:=OS_S64;
-                     paraloc^.def:=s64inttype;
-                   end;
-                 OS_F32:
-                   begin
-                     paraloc^.size:=OS_F32;
-                     paraloc^.def:=s32floattype;
-                   end;
-                 OS_F64:
-                   begin
-                     paraloc^.size:=OS_F64;
-                     paraloc^.def:=s64floattype;
-                   end;
-                 else
-                   begin
-                     if is_record(locdef) or
-                        ((locdef.typ=arraydef) and
-                         not is_special_array(locdef)) then
+{$ifdef llvm}
+               if not is_ordinal(paradef) then
+                 begin
+                   case locsize of
+                     OS_8,OS_16,OS_32:
                        begin
                          paraloc^.size:=OS_64;
                          paraloc^.def:=u64inttype;
-                       end
+                       end;
+                     OS_S8,OS_S16,OS_S32:
+                       begin
+                         paraloc^.size:=OS_S64;
+                         paraloc^.def:=s64inttype;
+                       end;
+                     OS_F32:
+                       begin
+                         paraloc^.size:=OS_F32;
+                         paraloc^.def:=s32floattype;
+                       end;
+                     OS_F64:
+                       begin
+                         paraloc^.size:=OS_F64;
+                         paraloc^.def:=s64floattype;
+                       end;
                      else
                        begin
-                         paraloc^.size:=locsize;
-                         paraloc^.def:=locdef;
+                         if is_record(locdef) or
+                            is_set(locdef) or
+                            ((locdef.typ=arraydef) and
+                             not is_special_array(locdef)) then
+                           begin
+                             paraloc^.size:=OS_64;
+                             paraloc^.def:=u64inttype;
+                           end
                        end;
                    end;
-               end;
+                 end;
 {$endif llvm}
              end;
 
@@ -559,6 +566,7 @@ unit cpupara;
                              paraloc^.def:=u32inttype;
                            end;
                        end
+{$ifndef llvm}
                      else
                        begin
                          if side=calleeside then
@@ -567,6 +575,7 @@ unit cpupara;
                              paraloc^.def:=u32inttype;
                            end;
                        end;
+{$endif llvm}
                    end;
 
                  { in case it's a composite, "The argument is passed as though
@@ -590,6 +599,10 @@ unit cpupara;
                begin
                   paraloc^.size:=paracgsize;
                   paraloc^.loc:=LOC_REFERENCE;
+                  if assigned(hfabasedef) then
+                    paraloc^.def:=carraydef.getreusable_no_free(hfabasedef,paralen div hfabasedef.size)
+                  else
+                    paraloc^.def:=paradef;
 
                   { the current stack offset may not be properly aligned in
                     case we're on Darwin and have allocated a non-variadic argument
@@ -669,7 +682,7 @@ unit cpupara;
             result:=curstackoffset;
           end
         else
-          internalerror(200410231);
+          internalerror(2004102303);
 
         create_funcretloc_info(p,side);
       end;

+ 81 - 2
compiler/aarch64/cpupi.pas

@@ -27,19 +27,38 @@ interface
 
   uses
     procinfo,
-    psub;
+    psub,
+    aasmdata,aasmbase;
 
   type
     tcpuprocinfo=class(tcgprocinfo)
+    private
+      scopes: TAsmList;
+      scopecount: longint;
+      unwindflags: byte;
+    public
       constructor create(aparent: tprocinfo); override;
+      destructor destroy; override;
       procedure set_first_temp_offset; override;
+      procedure add_finally_scope(startlabel,endlabel,handler:TAsmSymbol;implicit:Boolean);
+      procedure add_except_scope(trylabel,exceptlabel,endlabel,filter:TAsmSymbol);
+      procedure dump_scopes(list:tasmlist);
     end;
 
 implementation
 
   uses
+    cutils,
+    fmodule,
+    symtable,
     tgobj,
-    cpubase;
+    cpubase,
+    aasmtai;
+
+  const
+    SCOPE_FINALLY=0;
+    SCOPE_CATCHALL=1;
+    SCOPE_IMPLICIT=2;
 
   constructor tcpuprocinfo.create(aparent: tprocinfo);
     begin
@@ -56,12 +75,72 @@ implementation
       framepointer:=NR_STACK_POINTER_REG;
     end;
 
+  destructor tcpuprocinfo.destroy;
+    begin
+      scopes.free;
+      inherited destroy;
+    end;
+
   procedure tcpuprocinfo.set_first_temp_offset;
     begin
      { leave room for allocated parameters }
      tg.setfirsttemp(align(maxpushedparasize,16));
     end;
 
+  procedure tcpuprocinfo.add_finally_scope(startlabel,endlabel,handler:TAsmSymbol;implicit:Boolean);
+    begin
+      unwindflags:=unwindflags or 2;
+      if implicit then  { also needs catch functionality }
+        unwindflags:=unwindflags or 1;
+      inc(scopecount);
+      if scopes=nil then
+        scopes:=TAsmList.Create;
+
+      if implicit then
+        scopes.concat(tai_const.create_32bit(SCOPE_IMPLICIT))
+      else
+        scopes.concat(tai_const.create_32bit(SCOPE_FINALLY));
+      scopes.concat(tai_const.create_rva_sym(startlabel));
+      scopes.concat(tai_const.create_rva_sym(endlabel));
+      scopes.concat(tai_const.create_rva_sym(handler));
+    end;
+
+  procedure tcpuprocinfo.add_except_scope(trylabel,exceptlabel,endlabel,filter:TAsmSymbol);
+    begin
+      unwindflags:=unwindflags or 3;
+      inc(scopecount);
+      if scopes=nil then
+        scopes:=TAsmList.Create;
+
+      if Assigned(filter) then
+        scopes.concat(tai_const.create_rva_sym(filter))
+      else
+        scopes.concat(tai_const.create_32bit(SCOPE_CATCHALL));
+      scopes.concat(tai_const.create_rva_sym(trylabel));
+      scopes.concat(tai_const.create_rva_sym(exceptlabel));
+      scopes.concat(tai_const.create_rva_sym(endlabel));
+    end;
+
+  procedure tcpuprocinfo.dump_scopes(list: tasmlist);
+    var
+      hdir: tai_seh_directive;
+    begin
+      if (scopecount=0) then
+        exit;
+      hdir:=cai_seh_directive.create_name(ash_handler,'__FPC_specific_handler');
+      if not systemunit.iscurrentunit then
+        current_module.add_extern_asmsym('__FPC_specific_handler',AB_EXTERNAL,AT_FUNCTION);
+      hdir.data.flags:=unwindflags;
+      list.concat(hdir);
+      list.concat(cai_seh_directive.create(ash_handlerdata));
+      inc(list.section_count);
+      list.concat(tai_const.create_32bit(scopecount));
+      list.concatlist(scopes);
+      { return to text, required for GAS compatibility }
+      { This creates a tai_align which is redundant here (although harmless) }
+      new_section(list,sec_code,lower(procdef.mangledname),0);
+    end;
+
 
 begin
   cprocinfo:=tcpuprocinfo;

+ 5 - 2
compiler/aarch64/cputarg.pas

@@ -38,12 +38,15 @@ implementation
     {$ifndef NOTARGETLINUX}
       ,t_linux
     {$endif}
-    {$ifndef NOTARGETBSD}
-      ,t_bsd
+    {$ifndef NOTARGETDARWIN}
+      ,t_darwin
     {$endif}
     {$ifndef NOTARGETANDROID}
       ,t_android
     {$endif}
+    {$ifndef NOTARGETWIN64}
+      ,t_win
+    {$endif}
 
 {**************************************
              Assemblers

+ 1 - 1
compiler/aarch64/hlcgcpu.pas

@@ -184,7 +184,7 @@ implementation
           not is_objectpascal_helper(procdef.struct) then
         begin
           if (procdef.extnumber=$ffff) then
-            Internalerror(200006139);
+            Internalerror(2000061302);
           { mov  0(%rdi),%rax ; load vmt}
           reference_reset_base(href,voidpointertype,paraloc^.register,0,ctempposinvalid,sizeof(pint),[]);
           getcpuregister(list,NR_IP0);

+ 90 - 0
compiler/aarch64/ncpucon.pas

@@ -0,0 +1,90 @@
+{
+    Copyright (c) 2005 by Florian Klaempfl
+
+    Code generation for const nodes on the AArch64
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit ncpucon;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+      node,ncgcon,cpubase;
+
+    type
+      taarch64realconstnode = class(tcgrealconstnode)
+        function pass_1 : tnode;override;
+        procedure pass_generate_code;override;
+      end;
+
+  implementation
+
+    uses
+      verbose,
+      globtype,globals,
+      cpuinfo,
+      aasmbase,aasmtai,aasmdata,aasmcpu,
+      symdef,
+      defutil,
+      cgbase,cgutils,cgobj,
+      procinfo,
+      ncon;
+
+{*****************************************************************************
+                           TARMREALCONSTNODE
+*****************************************************************************}
+
+    function taarch64realconstnode.pass_1 : tnode;
+      begin
+        result:=nil;
+        if IsFloatImmediate(tfloatdef(resultdef).floattype,value_real) then
+           expectloc:=LOC_MMREGISTER
+         else
+           result:=Inherited pass_1;
+      end;
+
+
+    procedure taarch64realconstnode.pass_generate_code;
+      var
+        hreg : TRegister;
+      begin
+        if IsFloatImmediate(tfloatdef(resultdef).floattype,value_real) then
+          begin
+            location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
+            location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+            current_asmdata.CurrAsmList.concat(taicpu.op_reg_realconst(A_FMOV,
+              location.register,value_real));
+          end
+        { cast and compare the bit pattern as we cannot handle -0.0 }
+        else if bestrealrec(value_real).Data=0 then
+          begin
+            location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
+            location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+            hreg:=newreg(R_MMREGISTER,getsupreg(location.register),R_SUBMM16B);
+            current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_EOR,
+              hreg,hreg,hreg));
+          end
+        else
+          Inherited pass_generate_code;
+      end;
+
+begin
+  crealconstnode:=taarch64realconstnode;
+end.

+ 564 - 0
compiler/aarch64/ncpuflw.pas

@@ -0,0 +1,564 @@
+{
+    Copyright (c) 2011-2020 by Free Pascal development team
+
+    Generate Win64-specific exception handling code (based on x86_64 code)
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit ncpuflw;
+
+{$i fpcdefs.inc}
+
+interface
+
+  uses
+    node,nflw,ncgflw,psub;
+
+  type
+    taarch64raisenode=class(tcgraisenode)
+      function pass_1 : tnode;override;
+    end;
+
+    taarch64onnode=class(tcgonnode)
+      procedure pass_generate_code;override;
+    end;
+
+    taarch64tryexceptnode=class(tcgtryexceptnode)
+      procedure pass_generate_code;override;
+    end;
+
+    taarch64tryfinallynode=class(tcgtryfinallynode)
+      finalizepi: tcgprocinfo;
+      constructor create(l,r:TNode);override;
+      constructor create_implicit(l,r:TNode);override;
+      function simplify(forinline: boolean): tnode;override;
+      procedure pass_generate_code;override;
+      function dogetcopy:tnode;override;
+    end;
+
+implementation
+
+  uses
+    globtype,globals,verbose,systems,fmodule,
+    nbas,ncal,nutils,
+    symconst,symsym,symdef,
+    cgbase,cgobj,cgutils,tgobj,
+    cpubase,htypechk,
+    pass_1,pass_2,
+    aasmbase,aasmtai,aasmdata,aasmcpu,procinfo,cpupi;
+
+  var
+    endexceptlabel: tasmlabel;
+
+
+{ taarch64raisenode }
+
+function taarch64raisenode.pass_1 : tnode;
+  var
+    statements : tstatementnode;
+    raisenode : tcallnode;
+  begin
+    { difference from generic code is that address stack is not popped on reraise }
+    if (target_info.system<>system_aarch64_win64) or assigned(left) then
+      result:=inherited pass_1
+    else
+      begin
+        result:=internalstatements(statements);
+        raisenode:=ccallnode.createintern('fpc_reraise',nil);
+        include(raisenode.callnodeflags,cnf_call_never_returns);
+        addstatement(statements,raisenode);
+      end;
+end;
+
+{ taarch64onnode }
+
+procedure taarch64onnode.pass_generate_code;
+  var
+    exceptvarsym : tlocalvarsym;
+  begin
+    if (target_info.system<>system_aarch64_win64) then
+      begin
+        inherited pass_generate_code;
+        exit;
+      end;
+
+    location_reset(location,LOC_VOID,OS_NO);
+
+    { RTL will put exceptobject into X0 when jumping here }
+    cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_FUNCTION_RESULT_REG);
+
+    { Retrieve exception variable }
+    if assigned(excepTSymtable) then
+      exceptvarsym:=tlocalvarsym(excepTSymtable.SymList[0])
+    else
+      exceptvarsym:=nil;
+
+    if assigned(exceptvarsym) then
+      begin
+        exceptvarsym.localloc.loc:=LOC_REFERENCE;
+        exceptvarsym.localloc.size:=OS_ADDR;
+        tg.GetLocal(current_asmdata.CurrAsmList,sizeof(pint),voidpointertype,exceptvarsym.localloc.reference);
+        cg.a_load_reg_ref(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,NR_FUNCTION_RESULT_REG,exceptvarsym.localloc.reference);
+      end;
+    cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_FUNCTION_RESULT_REG);
+
+    if assigned(right) then
+      secondpass(right);
+
+    { deallocate exception symbol }
+    if assigned(exceptvarsym) then
+      begin
+        tg.UngetLocal(current_asmdata.CurrAsmList,exceptvarsym.localloc.reference);
+        exceptvarsym.localloc.loc:=LOC_INVALID;
+      end;
+    cg.g_call(current_asmdata.CurrAsmList,'FPC_DONEEXCEPTION');
+    cg.a_jmp_always(current_asmdata.CurrAsmList,endexceptlabel);
+  end;
+
+{ taarch64tryfinallynode }
+
+function reset_regvars(var n: tnode; arg: pointer): foreachnoderesult;
+  begin
+    case n.nodetype of
+      temprefn:
+        make_not_regable(n,[]);
+      calln:
+        include(tprocinfo(arg).flags,pi_do_call);
+      else
+        ;
+    end;
+    result:=fen_true;
+  end;
+
+function copy_parasize(var n: tnode; arg: pointer): foreachnoderesult;
+  begin
+    case n.nodetype of
+      calln:
+        tcgprocinfo(arg).allocate_push_parasize(tcallnode(n).pushed_parasize);
+      else
+        ;
+    end;
+    result:=fen_true;
+  end;
+
+constructor taarch64tryfinallynode.create(l, r: TNode);
+  begin
+    inherited create(l,r);
+    if (target_info.system=system_aarch64_win64) and
+      { Don't create child procedures for generic methods, their nested-like
+        behavior causes compilation errors because real nested procedures
+        aren't allowed for generics. Not creating them doesn't harm because
+        generic node tree is discarded without generating code. }
+       not (df_generic in current_procinfo.procdef.defoptions) then
+      begin
+        finalizepi:=tcgprocinfo(current_procinfo.create_for_outlining('$fin$',current_procinfo.procdef.struct,potype_exceptfilter,voidtype,r));
+        { the init/final code is messing with asm nodes, so inform the compiler about this }
+        include(finalizepi.flags,pi_has_assembler_block);
+        { Regvar optimization for symbols is suppressed when using exceptions, but
+          temps may be still placed into registers. This must be fixed. }
+        foreachnodestatic(r,@reset_regvars,finalizepi);
+      end;
+  end;
+
+constructor taarch64tryfinallynode.create_implicit(l, r: TNode);
+  begin
+    inherited create_implicit(l, r);
+    if (target_info.system=system_aarch64_win64) then
+      begin
+        if df_generic in current_procinfo.procdef.defoptions then
+          InternalError(2020033101);
+
+        finalizepi:=tcgprocinfo(current_procinfo.create_for_outlining('$fin$',current_procinfo.procdef.struct,potype_exceptfilter,voidtype,r));
+        include(finalizepi.flags,pi_do_call);
+        { the init/final code is messing with asm nodes, so inform the compiler about this }
+        include(finalizepi.flags,pi_has_assembler_block);
+        finalizepi.allocate_push_parasize(32);
+      end;
+  end;
+
+function taarch64tryfinallynode.simplify(forinline: boolean): tnode;
+  begin
+    result:=inherited simplify(forinline);
+    if (target_info.system<>system_aarch64_win64) then
+      exit;
+    if (result=nil) then
+      begin
+        { generate a copy of the code }
+        finalizepi.code:=right.getcopy;
+        foreachnodestatic(right,@copy_parasize,finalizepi);
+        { For implicit frames, no actual code is available at this time,
+          it is added later in assembler form. So store the nested procinfo
+          for later use. }
+        if implicitframe then
+          begin
+            current_procinfo.finalize_procinfo:=finalizepi;
+          end;
+      end;
+  end;
+
+procedure emit_nop;
+  var
+    dummy: TAsmLabel;
+  begin
+    { To avoid optimizing away the whole thing, prepend a jumplabel with increased refcount }
+    current_asmdata.getjumplabel(dummy);
+    dummy.increfs;
+    cg.a_label(current_asmdata.CurrAsmList,dummy);
+    current_asmdata.CurrAsmList.concat(Taicpu.op_none(A_NOP));
+  end;
+
+procedure taarch64tryfinallynode.pass_generate_code;
+  var
+    trylabel,
+    endtrylabel,
+    finallylabel,
+    endfinallylabel,
+    templabel,
+    oldexitlabel: tasmlabel;
+    oldflowcontrol: tflowcontrol;
+    catch_frame: boolean;
+  begin
+    if (target_info.system<>system_aarch64_win64) then
+      begin
+        inherited pass_generate_code;
+        exit;
+      end;
+
+    location_reset(location,LOC_VOID,OS_NO);
+
+    { Do not generate a frame that catches exceptions if the only action
+      would be reraising it. Doing so is extremely inefficient with SEH
+      (in contrast with setjmp/longjmp exception handling) }
+    catch_frame:=implicitframe and
+      (current_procinfo.procdef.proccalloption=pocall_safecall);
+
+    oldflowcontrol:=flowcontrol;
+    flowcontrol:=[fc_inflowcontrol];
+
+    templabel:=nil;
+    current_asmdata.getjumplabel(trylabel);
+    current_asmdata.getjumplabel(endtrylabel);
+    current_asmdata.getjumplabel(finallylabel);
+    current_asmdata.getjumplabel(endfinallylabel);
+    oldexitlabel:=current_procinfo.CurrExitLabel;
+    if implicitframe then
+      current_procinfo.CurrExitLabel:=finallylabel;
+
+    { Start of scope }
+    { Padding with NOP is necessary here because exceptions in called
+      procedures are seen at the next instruction, while CPU/OS exceptions
+      like AV are seen at the current instruction.
+
+      So in the following code
+
+      raise_some_exception;        //(a)
+      try
+        pchar(nil)^:='0';          //(b)
+        ...
+
+      without NOP, exceptions (a) and (b) will be seen at the same address
+      and fall into the same scope. However they should be seen in different scopes.
+    }
+
+    emit_nop;
+    cg.a_label(current_asmdata.CurrAsmList,trylabel);
+
+    { try code }
+    if assigned(left) then
+      begin
+        { fc_unwind_xx tells exit/continue/break statements to emit special
+          unwind code instead of just JMP }
+        if not implicitframe then
+          flowcontrol:=flowcontrol+[fc_catching_exceptions,fc_unwind_exit,fc_unwind_loop];
+        secondpass(left);
+        flowcontrol:=flowcontrol-[fc_catching_exceptions,fc_unwind_exit,fc_unwind_loop];
+        if codegenerror then
+          exit;
+      end;
+
+    { finallylabel is only used in implicit frames as an exit point from nested try..finally
+      statements, if any. To prevent finalizer from being executed twice, it must come before
+      endtrylabel (bug #34772) }
+    if catch_frame then
+      begin
+        current_asmdata.getjumplabel(templabel);
+        cg.a_label(current_asmdata.CurrAsmList, finallylabel);
+        { jump over exception handler }
+        cg.a_jmp_always(current_asmdata.CurrAsmList,templabel);
+        { Handle the except block first, so endtrylabel serves both
+          as end of scope and as unwind target. This way it is possible to
+          encode everything into a single scope record. }
+        cg.a_label(current_asmdata.CurrAsmList,endtrylabel);
+        if (current_procinfo.procdef.proccalloption=pocall_safecall) then
+          begin
+            handle_safecall_exception;
+            cg.a_jmp_always(current_asmdata.CurrAsmList,endfinallylabel);
+          end
+        else
+          InternalError(2014031601);
+        cg.a_label(current_asmdata.CurrAsmList,templabel);
+      end
+    else
+      begin
+        { same as emit_nop but using finallylabel instead of dummy }
+        cg.a_label(current_asmdata.CurrAsmList,finallylabel);
+        finallylabel.increfs;
+        current_asmdata.CurrAsmList.concat(Taicpu.op_none(A_NOP));
+        cg.a_label(current_asmdata.CurrAsmList,endtrylabel);
+      end;
+
+    flowcontrol:=[fc_inflowcontrol];
+    { store the tempflags so that we can generate a copy of the finally handler
+      later on }
+    if not implicitframe then
+      finalizepi.store_tempflags;
+    { generate the inline finalizer code }
+    secondpass(right);
+
+    if codegenerror then
+      exit;
+
+    { normal exit from safecall proc must zero the result register }
+    if implicitframe and (current_procinfo.procdef.proccalloption=pocall_safecall) then
+      cg.a_load_const_reg(current_asmdata.CurrAsmList,OS_INT,0,NR_FUNCTION_RESULT_REG);
+
+    cg.a_label(current_asmdata.CurrAsmList,endfinallylabel);
+
+    { generate the scope record in .xdata }
+    tcpuprocinfo(current_procinfo).add_finally_scope(trylabel,endtrylabel,
+      current_asmdata.RefAsmSymbol(finalizepi.procdef.mangledname,AT_FUNCTION),catch_frame);
+
+    if implicitframe then
+      current_procinfo.CurrExitLabel:=oldexitlabel;
+    flowcontrol:=oldflowcontrol;
+  end;
+
+function taarch64tryfinallynode.dogetcopy: tnode;
+  var
+    p : taarch64tryfinallynode absolute result;
+  begin
+    result:=inherited dogetcopy;
+    if (target_info.system=system_aarch64_win64) then
+      begin
+        if df_generic in current_procinfo.procdef.defoptions then
+          InternalError(2020033104);
+
+        p.finalizepi:=tcgprocinfo(current_procinfo.create_for_outlining('$fin$',current_procinfo.procdef.struct,potype_exceptfilter,voidtype,p.right));
+        if pi_do_call in finalizepi.flags then
+          include(p.finalizepi.flags,pi_do_call);
+        { the init/final code is messing with asm nodes, so inform the compiler about this }
+        include(p.finalizepi.flags,pi_has_assembler_block);
+        if implicitframe then
+          p.finalizepi.allocate_push_parasize(32);
+      end;
+  end;
+
+{ taarch64tryexceptnode }
+
+procedure taarch64tryexceptnode.pass_generate_code;
+  var
+    trylabel,
+    exceptlabel,oldendexceptlabel,
+    lastonlabel,
+    exitexceptlabel,
+    continueexceptlabel,
+    breakexceptlabel,
+    oldCurrExitLabel,
+    oldContinueLabel,
+    oldBreakLabel : tasmlabel;
+    onlabel,
+    filterlabel: tasmlabel;
+    oldflowcontrol,tryflowcontrol,
+    exceptflowcontrol : tflowcontrol;
+    hnode : tnode;
+    hlist : tasmlist;
+    onnodecount : tai_const;
+    sym : tasmsymbol;
+  label
+    errorexit;
+  begin
+    if (target_info.system<>system_aarch64_win64) then
+      begin
+        inherited pass_generate_code;
+        exit;
+      end;
+    location_reset(location,LOC_VOID,OS_NO);
+
+    oldflowcontrol:=flowcontrol;
+    exceptflowcontrol:=[];
+    continueexceptlabel:=nil;
+    breakexceptlabel:=nil;
+
+    include(flowcontrol,fc_inflowcontrol);
+    { this can be called recursivly }
+    oldBreakLabel:=nil;
+    oldContinueLabel:=nil;
+    oldendexceptlabel:=endexceptlabel;
+
+    { save the old labels for control flow statements }
+    oldCurrExitLabel:=current_procinfo.CurrExitLabel;
+    current_asmdata.getjumplabel(exitexceptlabel);
+    if assigned(current_procinfo.CurrBreakLabel) then
+      begin
+        oldContinueLabel:=current_procinfo.CurrContinueLabel;
+        oldBreakLabel:=current_procinfo.CurrBreakLabel;
+        current_asmdata.getjumplabel(breakexceptlabel);
+        current_asmdata.getjumplabel(continueexceptlabel);
+      end;
+
+    current_asmdata.getjumplabel(exceptlabel);
+    current_asmdata.getjumplabel(endexceptlabel);
+    current_asmdata.getjumplabel(lastonlabel);
+    filterlabel:=nil;
+
+    { start of scope }
+    current_asmdata.getjumplabel(trylabel);
+    emit_nop;
+    cg.a_label(current_asmdata.CurrAsmList,trylabel);
+
+    { control flow in try block needs no special handling,
+      just make sure that target labels are outside the scope }
+    secondpass(left);
+    tryflowcontrol:=flowcontrol;
+    if codegenerror then
+      goto errorexit;
+
+    { jump over except handlers }
+    cg.a_jmp_always(current_asmdata.CurrAsmList,endexceptlabel);
+
+    { end of scope }
+    cg.a_label(current_asmdata.CurrAsmList,exceptlabel);
+
+    { set control flow labels for the except block }
+    { and the on statements                        }
+    current_procinfo.CurrExitLabel:=exitexceptlabel;
+    if assigned(oldBreakLabel) then
+      begin
+        current_procinfo.CurrContinueLabel:=continueexceptlabel;
+        current_procinfo.CurrBreakLabel:=breakexceptlabel;
+      end;
+
+    flowcontrol:=[fc_inflowcontrol];
+    { on statements }
+    if assigned(right) then
+      begin
+        { emit filter table to a temporary asmlist }
+        hlist:=TAsmList.Create;
+        current_asmdata.getaddrlabel(filterlabel);
+        new_section(hlist,sec_rodata_norel,filterlabel.name,4);
+        cg.a_label(hlist,filterlabel);
+        onnodecount:=tai_const.create_32bit(0);
+        hlist.concat(onnodecount);
+
+        hnode:=right;
+        while assigned(hnode) do
+          begin
+            if hnode.nodetype<>onn then
+              InternalError(2011103101);
+            current_asmdata.getjumplabel(onlabel);
+            sym:=current_asmdata.RefAsmSymbol(tonnode(hnode).excepttype.vmt_mangledname,AT_DATA,true);
+            hlist.concat(tai_const.create_rva_sym(sym));
+            hlist.concat(tai_const.create_rva_sym(onlabel));
+            current_module.add_extern_asmsym(sym);
+            cg.a_label(current_asmdata.CurrAsmList,onlabel);
+            secondpass(hnode);
+            inc(onnodecount.value);
+            hnode:=tonnode(hnode).left;
+          end;
+        { add 'else' node to the filter list, too }
+        if assigned(t1) then
+          begin
+            hlist.concat(tai_const.create_32bit(-1));
+            hlist.concat(tai_const.create_rva_sym(lastonlabel));
+            inc(onnodecount.value);
+          end;
+        { now move filter table to permanent list all at once }
+        current_procinfo.aktlocaldata.concatlist(hlist);
+        hlist.free;
+      end;
+
+    cg.a_label(current_asmdata.CurrAsmList,lastonlabel);
+    if assigned(t1) then
+      begin
+        { here we don't have to reset flowcontrol           }
+        { the default and on flowcontrols are handled equal }
+        secondpass(t1);
+        cg.g_call(current_asmdata.CurrAsmList,'FPC_DONEEXCEPTION');
+        if (flowcontrol*[fc_exit,fc_break,fc_continue]<>[]) then
+          cg.a_jmp_always(current_asmdata.CurrAsmList,endexceptlabel);
+      end;
+    exceptflowcontrol:=flowcontrol;
+
+    if fc_exit in exceptflowcontrol then
+      begin
+        { do some magic for exit in the try block }
+        cg.a_label(current_asmdata.CurrAsmList,exitexceptlabel);
+        cg.g_call(current_asmdata.CurrAsmList,'FPC_DONEEXCEPTION');
+        if (fc_unwind_exit in oldflowcontrol) then
+          cg.g_local_unwind(current_asmdata.CurrAsmList,oldCurrExitLabel)
+        else
+          cg.a_jmp_always(current_asmdata.CurrAsmList,oldCurrExitLabel);
+      end;
+
+    if fc_break in exceptflowcontrol then
+      begin
+        cg.a_label(current_asmdata.CurrAsmList,breakexceptlabel);
+        cg.g_call(current_asmdata.CurrAsmList,'FPC_DONEEXCEPTION');
+        if (fc_unwind_loop in oldflowcontrol) then
+          cg.g_local_unwind(current_asmdata.CurrAsmList,oldBreakLabel)
+        else
+          cg.a_jmp_always(current_asmdata.CurrAsmList,oldBreakLabel);
+      end;
+
+    if fc_continue in exceptflowcontrol then
+      begin
+        cg.a_label(current_asmdata.CurrAsmList,continueexceptlabel);
+        cg.g_call(current_asmdata.CurrAsmList,'FPC_DONEEXCEPTION');
+        if (fc_unwind_loop in oldflowcontrol) then
+          cg.g_local_unwind(current_asmdata.CurrAsmList,oldContinueLabel)
+        else
+          cg.a_jmp_always(current_asmdata.CurrAsmList,oldContinueLabel);
+      end;
+
+    emit_nop;
+    cg.a_label(current_asmdata.CurrAsmList,endexceptlabel);
+    tcpuprocinfo(current_procinfo).add_except_scope(trylabel,exceptlabel,endexceptlabel,filterlabel);
+
+errorexit:
+    { restore all saved labels }
+    endexceptlabel:=oldendexceptlabel;
+
+    { restore the control flow labels }
+    current_procinfo.CurrExitLabel:=oldCurrExitLabel;
+    if assigned(oldBreakLabel) then
+      begin
+        current_procinfo.CurrContinueLabel:=oldContinueLabel;
+        current_procinfo.CurrBreakLabel:=oldBreakLabel;
+      end;
+
+    { return all used control flow statements }
+    flowcontrol:=oldflowcontrol+(exceptflowcontrol +
+      tryflowcontrol - [fc_inflowcontrol]);
+  end;
+
+initialization
+  craisenode:=taarch64raisenode;
+  connode:=taarch64onnode;
+  ctryexceptnode:=taarch64tryexceptnode;
+  ctryfinallynode:=taarch64tryfinallynode;
+end.
+

+ 30 - 1
compiler/aarch64/ncpuinl.pas

@@ -44,6 +44,7 @@ interface
         procedure second_trunc_real; override;
         procedure second_get_frame; override;
         procedure second_fma; override;
+        procedure second_prefetch; override;
       private
         procedure load_fpu_location;
       end;
@@ -55,7 +56,7 @@ implementation
       globtype,verbose,globals,
       cpuinfo, defutil,symdef,aasmdata,aasmcpu,
       cgbase,cgutils,pass_1,pass_2,
-      ncal,
+      ncal,nutils,
       cpubase,ncgutil,cgobj,cgcpu, hlcgobj;
 
 {*****************************************************************************
@@ -272,6 +273,34 @@ implementation
       end;
 
 
+    procedure taarch64inlinenode.second_prefetch;
+      var
+        ref : treference;
+        r : tregister;
+        checkpointer_used : boolean;
+      begin
+        { do not call Checkpointer for left node }
+        checkpointer_used:=(cs_checkpointer in current_settings.localswitches);
+        if checkpointer_used then
+          node_change_local_switch(left,cs_checkpointer,false);
+        secondpass(left);
+        if checkpointer_used then
+          node_change_local_switch(left,cs_checkpointer,false);
+       case left.location.loc of
+         LOC_CREFERENCE,
+         LOC_REFERENCE:
+           begin
+             r:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
+             cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.location.reference,r);
+             reference_reset_base(ref,r,0,location.reference.temppos,left.location.reference.alignment,location.reference.volatility);
+             current_asmdata.CurrAsmList.concat(taicpu.op_const_ref(A_PRFM,0,ref));
+           end;
+         else
+           { nothing to prefetch };
+       end;
+      end;
+
+
 begin
   cinlinenode:=taarch64inlinenode;
 end.

+ 77 - 10
compiler/aarch64/ncpumat.pas

@@ -76,9 +76,60 @@ implementation
          resultreg  : tregister;
          hl : tasmlabel;
          overflowloc: tlocation;
+         power: longint;
+
+       procedure genOrdConstNodeDiv;
+         var
+           helper1, helper2: TRegister;
+           so: tshifterop;
+           opsize: TCgSize;
+         begin
+           opsize:=def_cgsize(resultdef);
+           if tordconstnode(right).value=0 then
+             internalerror(2020021601)
+           else if tordconstnode(right).value=1 then
+             cg.a_load_reg_reg(current_asmdata.CurrAsmList, opsize, opsize, numerator, resultreg)
+           else if (tordconstnode(right).value = int64(-1)) then
+             begin
+               // note: only in the signed case possible..., may overflow
+               if cs_check_overflow in current_settings.localswitches then
+                 cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
+
+               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_NEG,
+                 resultreg,numerator),toppostfix(ord(cs_check_overflow in current_settings.localswitches)*ord(PF_S))));
+             end
+           else if ispowerof2(tordconstnode(right).value,power) then
+             begin
+               if (is_signed(right.resultdef)) then
+                 begin
+                    helper2:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
+                    if power = 1 then
+                      helper1:=numerator
+                    else
+                      begin
+                        helper1:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
+                        cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,opsize,resultdef.size*8-1,numerator,helper1);
+                      end;
+                    shifterop_reset(so);
+                    so.shiftmode:=SM_LSR;
+                    so.shiftimm:=resultdef.size*8-power;
+                    current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,helper2,numerator,helper1,so));
+                    cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,def_cgsize(resultdef),power,helper2,resultreg);
+                  end
+               else
+                 cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHR,opsize,power,numerator,resultreg)
+             end
+           else
+             { Everything else is handled in the generic code }
+             cg.g_div_const_reg_reg(current_asmdata.CurrAsmList,opsize,
+               tordconstnode(right).value.svalue,numerator,resultreg);
+         end;
+
       begin
        secondpass(left);
        secondpass(right);
+       { avoid warning }
+       divider:=NR_NO;
 
        { set result location }
        location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
@@ -89,16 +140,32 @@ implementation
        hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
        numerator:=left.location.register;
 
-       { load divider in a register }
-       hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
-       divider:=right.location.register;
-
-       { start division }
-       if is_signed(left.resultdef) then
-         op:=A_SDIV
+       if (right.nodetype=ordconstn) and
+          ((tordconstnode(right).value=1) or
+           (tordconstnode(right).value=int64(-1)) or
+           (tordconstnode(right).value=0) or
+           ispowerof2(tordconstnode(right).value,power)) then
+         begin
+           genOrdConstNodeDiv;
+           if nodetype=modn then
+             begin
+               divider:=cg.getintregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
+               cg.a_load_const_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),int64(tordconstnode(right).value),divider);
+             end;
+         end
        else
-         op:=A_UDIV;
-       current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,location.register,numerator,divider));
+         begin
+           { load divider in a register }
+           hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
+           divider:=right.location.register;
+
+           { start division }
+           if is_signed(left.resultdef) then
+             op:=A_SDIV
+           else
+             op:=A_UDIV;
+           current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,location.register,numerator,divider));
+         end;
 
        { no divide-by-zero detection available in hardware, emulate (if it's a
          constant, this will have been detected earlier already) }
@@ -149,9 +216,9 @@ implementation
 
     procedure taarch64notnode.second_boolean;
       begin
+        secondpass(left);
         if not handle_locjump then
           begin
-            secondpass(left);
             case left.location.loc of
               LOC_FLAGS :
                 begin

+ 144 - 16
compiler/aarch64/ncpuset.pas

@@ -34,6 +34,7 @@ interface
            procedure optimizevalues(var max_linear_list: int64; var max_dist: qword);override;
            function  has_jumptable: boolean;override;
            procedure genjumptable(hp: pcaselabel ;min_, max_: int64);override;
+           procedure genlinearlist(hp: pcaselabel);override;
        end;
 
 
@@ -41,7 +42,7 @@ implementation
 
     uses
       systems,
-      verbose,globals,constexp,
+      verbose,globals,constexp,cutils,
       symconst,symdef,defutil,
       paramgr,
       cpuinfo,
@@ -68,6 +69,120 @@ implementation
       end;
 
 
+    procedure taarch64casenode.genlinearlist(hp : pcaselabel);
+      var
+        first : boolean;
+        lastrange : boolean;
+        last : TConstExprInt;
+        cond_lt,cond_le : tresflags;
+        opcgsize, unsigned_opcgsize: tcgsize;
+
+        procedure genitem(t : pcaselabel);
+          var
+           ovloc: tlocation;
+          begin
+            if assigned(t^.less) then
+              genitem(t^.less);
+            { need we to test the first value }
+            if first and (t^._low>get_min_value(left.resultdef)) then
+              begin
+                cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opcgsize,jmp_lt,aint(t^._low.svalue),hregister,elselabel);
+              end;
+            if t^._low=t^._high then
+              begin
+                 if t^._low-last=0 then
+                   cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList, opcgsize, OC_EQ,0,hregister,blocklabel(t^.blockid))
+                 else
+                   begin
+                     { use unsigned_opcgsize here to avoid uncessary sign extensions, at this place hregister will never be negative, because
+                       then genlinearlist wouldn't be used }
+                     cg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList, OP_SUB, unsigned_opcgsize, aint(t^._low.svalue-last.svalue), hregister, hregister,
+                       true,ovloc);
+                     cg.a_jmp_flags(current_asmdata.CurrAsmList,F_EQ,blocklabel(t^.blockid));
+                   end;
+                 last:=t^._low;
+                 lastrange:=false;
+              end
+            else
+              begin
+                 { it begins with the smallest label, if the value }
+                 { is even smaller then jump immediately to the    }
+                 { ELSE-label                                }
+                 if first then
+                   begin
+                      { have we to ajust the first value ? }
+                      if (t^._low>get_min_value(left.resultdef)) or (get_min_value(left.resultdef)<>0) then
+                        begin
+                          { use unsigned_opcgsize here to avoid uncessary sign extensions, at this place hregister will never be negative, because
+                            then genlinearlist wouldn't be use }
+                          cg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList, OP_SUB, unsigned_opcgsize, aint(t^._low.svalue), hregister, hregister,
+                            true,ovloc);
+                        end;
+                   end
+                 else
+                   begin
+                     { if there is no unused label between the last and the }
+                     { present label then the lower limit can be checked    }
+                     { immediately. else check the range in between:       }
+
+                     { use unsigned_opcgsize here to avoid uncessary sign extensions, at this place hregister will never be negative, because
+                       then genlinearlist wouldn't be use }
+                     cg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList, OP_SUB, unsigned_opcgsize, aint(t^._low.svalue - last.svalue), hregister, hregister,
+                       true,ovloc);
+                     { no jump necessary here if the new range starts at }
+                     { at the value following the previous one           }
+                     if (aint(t^._low.svalue - last.svalue) <> 1) or
+                        (not lastrange) then
+                       cg.a_jmp_flags(current_asmdata.CurrAsmList,cond_lt,elselabel);
+                   end;
+                 { use unsigned_opcgsize here to avoid uncessary sign extensions, at this place hregister will never be negative, because
+                   then genlinearlist wouldn't be use }
+                 cg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,unsigned_opcgsize,aint(t^._high.svalue - t^._low.svalue), hregister, hregister,
+                   true,ovloc);
+                 cg.a_jmp_flags(current_asmdata.CurrAsmList,cond_le,blocklabel(t^.blockid));
+
+                 last:=t^._high;
+                 lastrange:=true;
+              end;
+            first:=false;
+            if assigned(t^.greater) then
+              genitem(t^.greater);
+          end;
+
+        begin
+           opcgsize:=def_cgsize(opsize);
+           case opcgsize of
+             OS_8,OS_16,OS_32,OS_S8,OS_S16,OS_S32:
+               unsigned_opcgsize:=OS_32;
+             OS_64,OS_S64:
+               unsigned_opcgsize:=OS_64;
+             else
+               Internalerror(2019090902);
+           end;
+           if with_sign then
+             begin
+                cond_lt:=F_LT;
+                cond_le:=F_LE;
+             end
+           else
+              begin
+                cond_lt:=F_CC;
+                cond_le:=F_LS;
+             end;
+           { do we need to generate cmps? }
+           if (with_sign and (min_label<0)) then
+             genlinearcmplist(hp)
+           else
+             begin
+                last:=0;
+                lastrange:=false;
+                first:=true;
+                genitem(hp);
+                cg.a_jmp_always(current_asmdata.CurrAsmList,elselabel);
+             end;
+        end;
+
+
     procedure taarch64casenode.genjumptable(hp: pcaselabel; min_, max_: int64);
       var
         last: TConstExprInt;
@@ -139,24 +254,37 @@ implementation
         { and finally jump }
         current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_BR,jumpreg));
         { generate jump table }
-        if not(target_info.system in systems_darwin) then
-          sectype:=sec_rodata
-        else
+        if target_info.system=system_aarch64_win64 then
           begin
-            { on Mac OS X, dead code stripping ("smart linking") happens based on
-              global symbols: every global/static symbol (symbols that do not
-              start with "L") marks the start of a new "subsection" that is
-              discarded by the linker if there are no references to this symbol.
-              This means that if you put the jump table in the rodata section, it
-              will become part of the block of data associated with the previous
-              non-L-label in the rodata section and stay or be thrown away
-              depending on whether that block of data is referenced. Therefore,
-              jump tables must be added in the code section and since aktlocaldata
-              is inserted right after the routine, it will become part of the
-              same subsection that contains the routine's code }
+            { for Windows we need to make sure that the jump table is located in the
+              same section as the corresponding code as for one clang generates a
+              ABSOLUTE32 relocation that can not be handled correctly and armasm64
+              rejects the difference entries due to the symbols being located in
+              different sections }
             sectype:=sec_code;
+            new_section(current_procinfo.aktlocaldata,sectype,lower(current_procinfo.procdef.mangledname),getprocalign);
+          end
+        else
+          begin
+            if not(target_info.system in systems_darwin) then
+              sectype:=sec_rodata
+            else
+              begin
+                { on Mac OS X, dead code stripping ("smart linking") happens based on
+                  global symbols: every global/static symbol (symbols that do not
+                  start with "L") marks the start of a new "subsection" that is
+                  discarded by the linker if there are no references to this symbol.
+                  This means that if you put the jump table in the rodata section, it
+                  will become part of the block of data associated with the previous
+                  non-L-label in the rodata section and stay or be thrown away
+                  depending on whether that block of data is referenced. Therefore,
+                  jump tables must be added in the code section and since aktlocaldata
+                  is inserted right after the routine, it will become part of the
+                  same subsection that contains the routine's code }
+                sectype:=sec_code;
+              end;
+            new_section(current_procinfo.aktlocaldata,sectype,current_procinfo.procdef.mangledname,4);
           end;
-        new_section(current_procinfo.aktlocaldata,sectype,current_procinfo.procdef.mangledname,4);
         if target_info.system in systems_darwin then
           begin
             { additionally, these tables are now marked via ".data_region jt32"

+ 452 - 36
compiler/aarch64/ra64con.inc

@@ -66,167 +66,583 @@ NR_WZR = tregister($0104001F);
 NR_XZR = tregister($0105001F);
 NR_WSP = tregister($01040020);
 NR_SP = tregister($01050020);
+NR_NZCV = tregister($05000000);
+NR_FPCR = tregister($05000001);
+NR_FPSR = tregister($05000002);
+NR_TPIDR_EL0 = tregister($05000003);
 NR_B0 = tregister($04010000);
 NR_H0 = tregister($04030000);
 NR_S0 = tregister($04090000);
 NR_D0 = tregister($040a0000);
-NR_Q0 = tregister($04050000);
+NR_Q0 = tregister($040b0000);
+NR_V0 = tregister($04000000);
+NR_V0_B = tregister($04200000);
+NR_V0_H = tregister($04210000);
+NR_V0_S = tregister($04220000);
+NR_V0_D = tregister($04230000);
+NR_V0_8B = tregister($04180000);
+NR_V0_16B = tregister($04190000);
+NR_V0_4H = tregister($041a0000);
+NR_V0_8H = tregister($041b0000);
+NR_V0_2S = tregister($041c0000);
+NR_V0_4S = tregister($041d0000);
+NR_V0_1D = tregister($041e0000);
+NR_V0_2D = tregister($041f0000);
 NR_B1 = tregister($04010001);
 NR_H1 = tregister($04030001);
 NR_S1 = tregister($04090001);
 NR_D1 = tregister($040a0001);
-NR_Q1 = tregister($04050001);
+NR_Q1 = tregister($040b0001);
+NR_V1 = tregister($04000001);
+NR_V1_B = tregister($04200001);
+NR_V1_H = tregister($04210001);
+NR_V1_S = tregister($04220001);
+NR_V1_D = tregister($04230001);
+NR_V1_8B = tregister($04180001);
+NR_V1_16B = tregister($04190001);
+NR_V1_4H = tregister($041a0001);
+NR_V1_8H = tregister($041b0001);
+NR_V1_2S = tregister($041c0001);
+NR_V1_4S = tregister($041d0001);
+NR_V1_1D = tregister($041e0001);
+NR_V1_2D = tregister($041f0001);
 NR_B2 = tregister($04010002);
 NR_H2 = tregister($04030002);
 NR_S2 = tregister($04090002);
 NR_D2 = tregister($040a0002);
-NR_Q2 = tregister($04050002);
+NR_Q2 = tregister($040b0002);
+NR_V2 = tregister($04000002);
+NR_V2_B = tregister($04200002);
+NR_V2_H = tregister($04210002);
+NR_V2_S = tregister($04220002);
+NR_V2_D = tregister($04230002);
+NR_V2_8B = tregister($04180002);
+NR_V2_16B = tregister($04190002);
+NR_V2_4H = tregister($041a0002);
+NR_V2_8H = tregister($041b0002);
+NR_V2_2S = tregister($041c0002);
+NR_V2_4S = tregister($041d0002);
+NR_V2_1D = tregister($041e0002);
+NR_V2_2D = tregister($041f0002);
 NR_B3 = tregister($04010003);
 NR_H3 = tregister($04030003);
 NR_S3 = tregister($04090003);
 NR_D3 = tregister($040a0003);
-NR_Q3 = tregister($04050003);
+NR_Q3 = tregister($040b0003);
+NR_V3 = tregister($04000003);
+NR_V3_B = tregister($04200003);
+NR_V3_H = tregister($04210003);
+NR_V3_S = tregister($04220003);
+NR_V3_D = tregister($04230003);
+NR_V3_8B = tregister($04180003);
+NR_V3_16B = tregister($04190003);
+NR_V3_4H = tregister($041a0003);
+NR_V3_8H = tregister($041b0003);
+NR_V3_2S = tregister($041c0003);
+NR_V3_4S = tregister($041d0003);
+NR_V3_1D = tregister($041e0003);
+NR_V3_2D = tregister($041f0003);
 NR_B4 = tregister($04010004);
 NR_H4 = tregister($04030004);
 NR_S4 = tregister($04090004);
 NR_D4 = tregister($040a0004);
-NR_Q4 = tregister($04050004);
+NR_Q4 = tregister($040b0004);
+NR_V4 = tregister($04000004);
+NR_V4_B = tregister($04200004);
+NR_V4_H = tregister($04210004);
+NR_V4_S = tregister($04220004);
+NR_V4_D = tregister($04230004);
+NR_V4_8B = tregister($04180004);
+NR_V4_16B = tregister($04190004);
+NR_V4_4H = tregister($041a0004);
+NR_V4_8H = tregister($041b0004);
+NR_V4_2S = tregister($041c0004);
+NR_V4_4S = tregister($041d0004);
+NR_V4_1D = tregister($041e0004);
+NR_V4_2D = tregister($041f0004);
 NR_B5 = tregister($04010005);
 NR_H5 = tregister($04030005);
 NR_S5 = tregister($04090005);
 NR_D5 = tregister($040a0005);
-NR_Q5 = tregister($04050005);
+NR_Q5 = tregister($040b0005);
+NR_V5 = tregister($04000005);
+NR_V5_B = tregister($04200005);
+NR_V5_H = tregister($04210005);
+NR_V5_S = tregister($04220005);
+NR_V5_D = tregister($04230005);
+NR_V5_8B = tregister($04180005);
+NR_V5_16B = tregister($04190005);
+NR_V5_4H = tregister($041a0005);
+NR_V5_8H = tregister($041b0005);
+NR_V5_2S = tregister($041c0005);
+NR_V5_4S = tregister($041d0005);
+NR_V5_1D = tregister($041e0005);
+NR_V5_2D = tregister($041f0005);
 NR_B6 = tregister($04010006);
 NR_H6 = tregister($04030006);
 NR_S6 = tregister($04090006);
 NR_D6 = tregister($040a0006);
-NR_Q6 = tregister($04050006);
+NR_Q6 = tregister($040b0006);
+NR_V6 = tregister($04000006);
+NR_V6_B = tregister($04200006);
+NR_V6_H = tregister($04210006);
+NR_V6_S = tregister($04220006);
+NR_V6_D = tregister($04230006);
+NR_V6_8B = tregister($04180006);
+NR_V6_16B = tregister($04190006);
+NR_V6_4H = tregister($041a0006);
+NR_V6_8H = tregister($041b0006);
+NR_V6_2S = tregister($041c0006);
+NR_V6_4S = tregister($041d0006);
+NR_V6_1D = tregister($041e0006);
+NR_V6_2D = tregister($041f0006);
 NR_B7 = tregister($04010007);
 NR_H7 = tregister($04030007);
 NR_S7 = tregister($04090007);
 NR_D7 = tregister($040a0007);
-NR_Q7 = tregister($04050007);
+NR_Q7 = tregister($040b0007);
+NR_V7 = tregister($04000007);
+NR_V7_B = tregister($04200007);
+NR_V7_H = tregister($04210007);
+NR_V7_S = tregister($04220007);
+NR_V7_D = tregister($04230007);
+NR_V7_8B = tregister($04180007);
+NR_V7_16B = tregister($04190007);
+NR_V7_4H = tregister($041a0007);
+NR_V7_8H = tregister($041b0007);
+NR_V7_2S = tregister($041c0007);
+NR_V7_4S = tregister($041d0007);
+NR_V7_1D = tregister($041e0007);
+NR_V7_2D = tregister($041f0007);
 NR_B8 = tregister($04010008);
 NR_H8 = tregister($04030008);
 NR_S8 = tregister($04090008);
 NR_D8 = tregister($040a0008);
-NR_Q8 = tregister($04050008);
+NR_Q8 = tregister($040b0008);
+NR_V8 = tregister($04000008);
+NR_V8_B = tregister($04200008);
+NR_V8_H = tregister($04210008);
+NR_V8_S = tregister($04220008);
+NR_V8_D = tregister($04230008);
+NR_V8_8B = tregister($04180008);
+NR_V8_16B = tregister($04190008);
+NR_V8_4H = tregister($041a0008);
+NR_V8_8H = tregister($041b0008);
+NR_V8_2S = tregister($041c0008);
+NR_V8_4S = tregister($041d0008);
+NR_V8_1D = tregister($041e0008);
+NR_V8_2D = tregister($041f0008);
 NR_B9 = tregister($04010009);
 NR_H9 = tregister($04030009);
 NR_S9 = tregister($04090009);
 NR_D9 = tregister($040a0009);
-NR_Q9 = tregister($04050009);
+NR_Q9 = tregister($040b0009);
+NR_V9 = tregister($04000009);
+NR_V9_B = tregister($04200009);
+NR_V9_H = tregister($04210009);
+NR_V9_S = tregister($04220009);
+NR_V9_D = tregister($04230009);
+NR_V9_8B = tregister($04180009);
+NR_V9_16B = tregister($04190009);
+NR_V9_4H = tregister($041a0009);
+NR_V9_8H = tregister($041b0009);
+NR_V9_2S = tregister($041c0009);
+NR_V9_4S = tregister($041d0009);
+NR_V9_1D = tregister($041e0009);
+NR_V9_2D = tregister($041f0009);
 NR_B10 = tregister($0401000A);
 NR_H10 = tregister($0403000A);
 NR_S10 = tregister($0409000A);
 NR_D10 = tregister($040a000A);
-NR_Q10 = tregister($0405000A);
+NR_Q10 = tregister($040b000A);
+NR_V10 = tregister($0400000A);
+NR_V10_B = tregister($0420000A);
+NR_V10_H = tregister($0421000A);
+NR_V10_S = tregister($0422000A);
+NR_V10_D = tregister($0423000A);
+NR_V10_8B = tregister($0418000A);
+NR_V10_16B = tregister($0419000A);
+NR_V10_4H = tregister($041a000A);
+NR_V10_8H = tregister($041b000A);
+NR_V10_2S = tregister($041c000A);
+NR_V10_4S = tregister($041d000A);
+NR_V10_1D = tregister($041e000A);
+NR_V10_2D = tregister($041f000A);
 NR_B11 = tregister($0401000B);
 NR_H11 = tregister($0403000B);
 NR_S11 = tregister($0409000B);
 NR_D11 = tregister($040a000B);
-NR_Q11 = tregister($0405000B);
+NR_Q11 = tregister($040b000B);
+NR_V11 = tregister($0400000B);
+NR_V11_B = tregister($0420000B);
+NR_V11_H = tregister($0421000B);
+NR_V11_S = tregister($0422000B);
+NR_V11_D = tregister($0423000B);
+NR_V11_8B = tregister($0418000B);
+NR_V11_16B = tregister($0419000B);
+NR_V11_4H = tregister($041a000B);
+NR_V11_8H = tregister($041b000B);
+NR_V11_2S = tregister($041c000B);
+NR_V11_4S = tregister($041d000B);
+NR_V11_1D = tregister($041e000B);
+NR_V11_2D = tregister($041f000B);
 NR_B12 = tregister($0401000C);
 NR_H12 = tregister($0403000C);
 NR_S12 = tregister($0409000C);
 NR_D12 = tregister($040a000C);
-NR_Q12 = tregister($0405000C);
+NR_Q12 = tregister($040b000C);
+NR_V12 = tregister($0400000C);
+NR_V12_B = tregister($0420000C);
+NR_V12_H = tregister($0421000C);
+NR_V12_S = tregister($0422000C);
+NR_V12_D = tregister($0423000C);
+NR_V12_8B = tregister($0418000C);
+NR_V12_16B = tregister($0419000C);
+NR_V12_4H = tregister($041a000C);
+NR_V12_8H = tregister($041b000C);
+NR_V12_2S = tregister($041c000C);
+NR_V12_4S = tregister($041d000C);
+NR_V12_1D = tregister($041e000C);
+NR_V12_2D = tregister($041f000C);
 NR_B13 = tregister($0401000D);
 NR_H13 = tregister($0403000D);
 NR_S13 = tregister($0409000D);
 NR_D13 = tregister($040a000D);
-NR_Q13 = tregister($0405000D);
+NR_Q13 = tregister($040b000D);
+NR_V13 = tregister($0400000D);
+NR_V13_B = tregister($0420000D);
+NR_V13_H = tregister($0421000D);
+NR_V13_S = tregister($0422000D);
+NR_V13_D = tregister($0423000D);
+NR_V13_8B = tregister($0418000D);
+NR_V13_16B = tregister($0419000D);
+NR_V13_4H = tregister($041a000D);
+NR_V13_8H = tregister($041b000D);
+NR_V13_2S = tregister($041c000D);
+NR_V13_4S = tregister($041d000D);
+NR_V13_1D = tregister($041e000D);
+NR_V13_2D = tregister($041f000D);
 NR_B14 = tregister($0401000E);
 NR_H14 = tregister($0403000E);
 NR_S14 = tregister($0409000E);
 NR_D14 = tregister($040a000E);
-NR_Q14 = tregister($0405000E);
+NR_Q14 = tregister($040b000E);
+NR_V14 = tregister($0400000E);
+NR_V14_B = tregister($0420000E);
+NR_V14_H = tregister($0421000E);
+NR_V14_S = tregister($0422000E);
+NR_V14_D = tregister($0423000E);
+NR_V14_8B = tregister($0418000E);
+NR_V14_16B = tregister($0419000E);
+NR_V14_4H = tregister($041a000E);
+NR_V14_8H = tregister($041b000E);
+NR_V14_2S = tregister($041c000E);
+NR_V14_4S = tregister($041d000E);
+NR_V14_1D = tregister($041e000E);
+NR_V14_2D = tregister($041f000E);
 NR_B15 = tregister($0401000F);
 NR_H15 = tregister($0403000F);
 NR_S15 = tregister($0409000F);
 NR_D15 = tregister($040a000F);
-NR_Q15 = tregister($0405000F);
+NR_Q15 = tregister($040b000F);
+NR_V15 = tregister($0400000F);
+NR_V15_B = tregister($0420000F);
+NR_V15_H = tregister($0421000F);
+NR_V15_S = tregister($0422000F);
+NR_V15_D = tregister($0423000F);
+NR_V15_8B = tregister($0418000F);
+NR_V15_16B = tregister($0419000F);
+NR_V15_4H = tregister($041a000F);
+NR_V15_8H = tregister($041b000F);
+NR_V15_2S = tregister($041c000F);
+NR_V15_4S = tregister($041d000F);
+NR_V15_1D = tregister($041e000F);
+NR_V15_2D = tregister($041f000F);
 NR_B16 = tregister($04010010);
 NR_H16 = tregister($04030010);
 NR_S16 = tregister($04090010);
 NR_D16 = tregister($040a0010);
-NR_Q16 = tregister($04050010);
+NR_Q16 = tregister($040b0010);
+NR_V16 = tregister($04000010);
+NR_V16_B = tregister($04200010);
+NR_V16_H = tregister($04210010);
+NR_V16_S = tregister($04220010);
+NR_V16_D = tregister($04230010);
+NR_V16_8B = tregister($04180010);
+NR_V16_16B = tregister($04190010);
+NR_V16_4H = tregister($041a0010);
+NR_V16_8H = tregister($041b0010);
+NR_V16_2S = tregister($041c0010);
+NR_V16_4S = tregister($041d0010);
+NR_V16_1D = tregister($041e0010);
+NR_V16_2D = tregister($041f0010);
 NR_B17 = tregister($04010011);
 NR_H17 = tregister($04030011);
 NR_S17 = tregister($04090011);
 NR_D17 = tregister($040a0011);
-NR_Q17 = tregister($04050011);
+NR_Q17 = tregister($040b0011);
+NR_V17 = tregister($04000011);
+NR_V17_B = tregister($04200011);
+NR_V17_H = tregister($04210011);
+NR_V17_S = tregister($04220011);
+NR_V17_D = tregister($04230011);
+NR_V17_8B = tregister($04180011);
+NR_V17_16B = tregister($04190011);
+NR_V17_4H = tregister($041a0011);
+NR_V17_8H = tregister($041b0011);
+NR_V17_2S = tregister($041c0011);
+NR_V17_4S = tregister($041d0011);
+NR_V17_1D = tregister($041e0011);
+NR_V17_2D = tregister($041f0011);
 NR_B18 = tregister($04010012);
 NR_H18 = tregister($04030012);
 NR_S18 = tregister($04090012);
 NR_D18 = tregister($040a0012);
-NR_Q18 = tregister($04050012);
+NR_Q18 = tregister($040b0012);
+NR_V18 = tregister($04000012);
+NR_V18_B = tregister($04200012);
+NR_V18_H = tregister($04210012);
+NR_V18_S = tregister($04220012);
+NR_V18_D = tregister($04230012);
+NR_V18_8B = tregister($04180012);
+NR_V18_16B = tregister($04190012);
+NR_V18_4H = tregister($041a0012);
+NR_V18_8H = tregister($041b0012);
+NR_V18_2S = tregister($041c0012);
+NR_V18_4S = tregister($041d0012);
+NR_V18_1D = tregister($041e0012);
+NR_V18_2D = tregister($041f0012);
 NR_B19 = tregister($04010013);
 NR_H19 = tregister($04030013);
 NR_S19 = tregister($04090013);
 NR_D19 = tregister($040a0013);
-NR_Q19 = tregister($04050013);
+NR_Q19 = tregister($040b0013);
+NR_V19 = tregister($04000013);
+NR_V19_B = tregister($04200013);
+NR_V19_H = tregister($04210013);
+NR_V19_S = tregister($04220013);
+NR_V19_D = tregister($04230013);
+NR_V19_8B = tregister($04180013);
+NR_V19_16B = tregister($04190013);
+NR_V19_4H = tregister($041a0013);
+NR_V19_8H = tregister($041b0013);
+NR_V19_2S = tregister($041c0013);
+NR_V19_4S = tregister($041d0013);
+NR_V19_1D = tregister($041e0013);
+NR_V19_2D = tregister($041f0013);
 NR_B20 = tregister($04010014);
 NR_H20 = tregister($04030014);
 NR_S20 = tregister($04090014);
 NR_D20 = tregister($040a0014);
-NR_Q20 = tregister($04050014);
+NR_Q20 = tregister($040b0014);
+NR_V20 = tregister($04000014);
+NR_V20_B = tregister($04200014);
+NR_V20_H = tregister($04210014);
+NR_V20_S = tregister($04220014);
+NR_V20_D = tregister($04230014);
+NR_V20_8B = tregister($04180014);
+NR_V20_16B = tregister($04190014);
+NR_V20_4H = tregister($041a0014);
+NR_V20_8H = tregister($041b0014);
+NR_V20_2S = tregister($041c0014);
+NR_V20_4S = tregister($041d0014);
+NR_V20_1D = tregister($041e0014);
+NR_V20_2D = tregister($041f0014);
 NR_B21 = tregister($04010015);
 NR_H21 = tregister($04030015);
 NR_S21 = tregister($04090015);
 NR_D21 = tregister($040a0015);
-NR_Q21 = tregister($04050015);
+NR_Q21 = tregister($040b0015);
+NR_V21 = tregister($04000015);
+NR_V21_B = tregister($04200015);
+NR_V21_H = tregister($04210015);
+NR_V21_S = tregister($04220015);
+NR_V21_D = tregister($04230015);
+NR_V21_8B = tregister($04180015);
+NR_V21_16B = tregister($04190015);
+NR_V21_4H = tregister($041a0015);
+NR_V21_8H = tregister($041b0015);
+NR_V21_2S = tregister($041c0015);
+NR_V21_4S = tregister($041d0015);
+NR_V21_1D = tregister($041e0015);
+NR_V21_2D = tregister($041f0015);
 NR_B22 = tregister($04010016);
 NR_H22 = tregister($04030016);
 NR_S22 = tregister($04090016);
 NR_D22 = tregister($040a0016);
-NR_Q22 = tregister($04050016);
+NR_Q22 = tregister($040b0016);
+NR_V22 = tregister($04000016);
+NR_V22_B = tregister($04200016);
+NR_V22_H = tregister($04210016);
+NR_V22_S = tregister($04220016);
+NR_V22_D = tregister($04230016);
+NR_V22_8B = tregister($04180016);
+NR_V22_16B = tregister($04190016);
+NR_V22_4H = tregister($041a0016);
+NR_V22_8H = tregister($041b0016);
+NR_V22_2S = tregister($041c0016);
+NR_V22_4S = tregister($041d0016);
+NR_V22_1D = tregister($041e0016);
+NR_V22_2D = tregister($041f0016);
 NR_B23 = tregister($04010017);
 NR_H23 = tregister($04030017);
 NR_S23 = tregister($04090017);
 NR_D23 = tregister($040a0017);
-NR_Q23 = tregister($04050017);
+NR_Q23 = tregister($040b0017);
+NR_V23 = tregister($04000017);
+NR_V23_B = tregister($04200017);
+NR_V23_H = tregister($04210017);
+NR_V23_S = tregister($04220017);
+NR_V23_D = tregister($04230017);
+NR_V23_8B = tregister($04180017);
+NR_V23_16B = tregister($04190017);
+NR_V23_4H = tregister($041a0017);
+NR_V23_8H = tregister($041b0017);
+NR_V23_2S = tregister($041c0017);
+NR_V23_4S = tregister($041d0017);
+NR_V23_1D = tregister($041e0017);
+NR_V23_2D = tregister($041f0017);
 NR_B24 = tregister($04010018);
 NR_H24 = tregister($04030018);
 NR_S24 = tregister($04090018);
 NR_D24 = tregister($040a0018);
-NR_Q24 = tregister($04050018);
+NR_Q24 = tregister($040b0018);
+NR_V24 = tregister($04000018);
+NR_V24_B = tregister($04200018);
+NR_V24_H = tregister($04210018);
+NR_V24_S = tregister($04220018);
+NR_V24_D = tregister($04230018);
+NR_V24_8B = tregister($04180018);
+NR_V24_16B = tregister($04190018);
+NR_V24_4H = tregister($041a0018);
+NR_V24_8H = tregister($041b0018);
+NR_V24_2S = tregister($041c0018);
+NR_V24_4S = tregister($041d0018);
+NR_V24_1D = tregister($041e0018);
+NR_V24_2D = tregister($041f0018);
 NR_B25 = tregister($04010019);
 NR_H25 = tregister($04030019);
 NR_S25 = tregister($04090019);
 NR_D25 = tregister($040a0019);
-NR_Q25 = tregister($04050019);
+NR_Q25 = tregister($040b0019);
+NR_V25 = tregister($04000019);
+NR_V25_B = tregister($04200019);
+NR_V25_H = tregister($04210019);
+NR_V25_S = tregister($04220019);
+NR_V25_D = tregister($04230019);
+NR_V25_8B = tregister($04180019);
+NR_V25_16B = tregister($04190019);
+NR_V25_4H = tregister($041a0019);
+NR_V25_8H = tregister($041b0019);
+NR_V25_2S = tregister($041c0019);
+NR_V25_4S = tregister($041d0019);
+NR_V25_1D = tregister($041e0019);
+NR_V25_2D = tregister($041f0019);
 NR_B26 = tregister($0401001A);
 NR_H26 = tregister($0403001A);
 NR_S26 = tregister($0409001A);
 NR_D26 = tregister($040a001A);
-NR_Q26 = tregister($0405001A);
+NR_Q26 = tregister($040b001A);
+NR_V26 = tregister($0400001A);
+NR_V26_B = tregister($0420001A);
+NR_V26_H = tregister($0421001A);
+NR_V26_S = tregister($0422001A);
+NR_V26_D = tregister($0423001A);
+NR_V26_8B = tregister($0418001A);
+NR_V26_16B = tregister($0419001A);
+NR_V26_4H = tregister($041a001A);
+NR_V26_8H = tregister($041b001A);
+NR_V26_2S = tregister($041c001A);
+NR_V26_4S = tregister($041d001A);
+NR_V26_1D = tregister($041e001A);
+NR_V26_2D = tregister($041f001A);
 NR_B27 = tregister($0401001B);
 NR_H27 = tregister($0403001B);
 NR_S27 = tregister($0409001B);
 NR_D27 = tregister($040a001B);
-NR_Q27 = tregister($0405001B);
+NR_Q27 = tregister($040b001B);
+NR_V27 = tregister($0400001B);
+NR_V27_B = tregister($0420001B);
+NR_V27_H = tregister($0421001B);
+NR_V27_S = tregister($0422001B);
+NR_V27_D = tregister($0423001B);
+NR_V27_8B = tregister($0418001B);
+NR_V27_16B = tregister($0419001B);
+NR_V27_4H = tregister($041a001B);
+NR_V27_8H = tregister($041b001B);
+NR_V27_2S = tregister($041c001B);
+NR_V27_4S = tregister($041d001B);
+NR_V27_1D = tregister($041e001B);
+NR_V27_2D = tregister($041f001B);
 NR_B28 = tregister($0401001C);
 NR_H28 = tregister($0403001C);
 NR_S28 = tregister($0409001C);
 NR_D28 = tregister($040a001C);
-NR_Q28 = tregister($0405001C);
+NR_Q28 = tregister($040b001C);
+NR_V28 = tregister($0400001C);
+NR_V28_B = tregister($0420001C);
+NR_V28_H = tregister($0421001C);
+NR_V28_S = tregister($0422001C);
+NR_V28_D = tregister($0423001C);
+NR_V28_8B = tregister($0418001C);
+NR_V28_16B = tregister($0419001C);
+NR_V28_4H = tregister($041a001C);
+NR_V28_8H = tregister($041b001C);
+NR_V28_2S = tregister($041c001C);
+NR_V28_4S = tregister($041d001C);
+NR_V28_1D = tregister($041e001C);
+NR_V28_2D = tregister($041f001C);
 NR_B29 = tregister($0401001D);
 NR_H29 = tregister($0403001D);
 NR_S29 = tregister($0409001D);
 NR_D29 = tregister($040a001D);
-NR_Q29 = tregister($0405001D);
+NR_Q29 = tregister($040b001D);
+NR_V29 = tregister($0400001D);
+NR_V29_B = tregister($0420001D);
+NR_V29_H = tregister($0421001D);
+NR_V29_S = tregister($0422001D);
+NR_V29_D = tregister($0423001D);
+NR_V29_8B = tregister($0418001D);
+NR_V29_16B = tregister($0419001D);
+NR_V29_4H = tregister($041a001D);
+NR_V29_8H = tregister($041b001D);
+NR_V29_2S = tregister($041c001D);
+NR_V29_4S = tregister($041d001D);
+NR_V29_1D = tregister($041e001D);
+NR_V29_2D = tregister($041f001D);
 NR_B30 = tregister($0401001E);
 NR_H30 = tregister($0403001E);
 NR_S30 = tregister($0409001E);
 NR_D30 = tregister($040a001E);
-NR_Q30 = tregister($0405001E);
+NR_Q30 = tregister($040b001E);
+NR_V30 = tregister($0400001E);
+NR_V30_B = tregister($0420001E);
+NR_V30_H = tregister($0421001E);
+NR_V30_S = tregister($0422001E);
+NR_V30_D = tregister($0423001E);
+NR_V30_8B = tregister($0418001E);
+NR_V30_16B = tregister($0419001E);
+NR_V30_4H = tregister($041a001E);
+NR_V30_8H = tregister($041b001E);
+NR_V30_2S = tregister($041c001E);
+NR_V30_4S = tregister($041d001E);
+NR_V30_1D = tregister($041e001E);
+NR_V30_2D = tregister($041f001E);
 NR_B31 = tregister($0401001F);
 NR_H31 = tregister($0403001F);
 NR_S31 = tregister($0409001F);
 NR_D31 = tregister($040a001F);
-NR_Q31 = tregister($0405001F);
-NR_NZCV = tregister($05000000);
-NR_FPCR = tregister($05000001);
-NR_FPSR = tregister($05000002);
-NR_TPIDR_EL0 = tregister($05000003);
+NR_Q31 = tregister($040b001F);
+NR_V31 = tregister($0400001F);
+NR_V31_B = tregister($0420001F);
+NR_V31_H = tregister($0421001F);
+NR_V31_S = tregister($0422001F);
+NR_V31_D = tregister($0423001F);
+NR_V31_8B = tregister($0418001F);
+NR_V31_16B = tregister($0419001F);
+NR_V31_4H = tregister($041a001F);
+NR_V31_8H = tregister($041b001F);
+NR_V31_2S = tregister($041c001F);
+NR_V31_4S = tregister($041d001F);
+NR_V31_1D = tregister($041e001F);
+NR_V31_2D = tregister($041f001F);

+ 420 - 4
compiler/aarch64/ra64dwa.inc

@@ -66,86 +66,311 @@
 31,
 31,
 31,
+0,
+0,
+0,
+0,
+64,
+64,
+64,
+64,
+64,
+64,
+64,
+64,
+64,
+64,
 64,
 64,
 64,
 64,
 64,
+64,
+64,
+64,
+65,
+65,
+65,
+65,
+65,
+65,
+65,
+65,
+65,
+65,
 65,
 65,
 65,
 65,
 65,
+65,
+65,
+65,
+66,
+66,
+66,
+66,
+66,
+66,
+66,
+66,
+66,
+66,
 66,
 66,
 66,
 66,
 66,
+66,
+66,
+66,
+67,
+67,
+67,
+67,
+67,
+67,
+67,
+67,
+67,
+67,
 67,
 67,
 67,
 67,
 67,
+67,
+67,
+67,
+68,
+68,
+68,
+68,
+68,
+68,
+68,
+68,
+68,
+68,
+68,
 68,
 68,
 68,
 68,
 68,
+68,
+68,
+69,
+69,
+69,
+69,
+69,
+69,
+69,
+69,
+69,
+69,
+69,
 69,
 69,
 69,
 69,
 69,
+69,
+69,
+70,
+70,
+70,
+70,
+70,
+70,
+70,
+70,
+70,
+70,
+70,
 70,
 70,
 70,
 70,
 70,
+70,
+70,
+71,
+71,
+71,
+71,
+71,
+71,
+71,
+71,
+71,
+71,
+71,
 71,
 71,
 71,
 71,
 71,
+71,
+71,
+72,
+72,
+72,
+72,
+72,
+72,
+72,
+72,
+72,
+72,
+72,
+72,
 72,
 72,
 72,
 72,
 72,
+72,
+73,
+73,
+73,
+73,
+73,
+73,
+73,
+73,
+73,
+73,
+73,
+73,
 73,
 73,
 73,
 73,
 73,
+73,
+74,
+74,
+74,
+74,
+74,
+74,
+74,
+74,
+74,
+74,
+74,
+74,
 74,
 74,
 74,
 74,
 74,
+74,
+75,
+75,
+75,
+75,
+75,
+75,
+75,
+75,
+75,
+75,
+75,
+75,
 75,
 75,
 75,
 75,
 75,
+75,
+76,
+76,
+76,
+76,
+76,
+76,
+76,
+76,
+76,
+76,
+76,
+76,
 76,
 76,
 76,
 76,
 76,
+76,
+77,
+77,
+77,
+77,
+77,
+77,
+77,
+77,
+77,
+77,
+77,
+77,
 77,
 77,
 77,
 77,
 77,
+77,
+78,
+78,
+78,
+78,
+78,
+78,
+78,
+78,
+78,
+78,
+78,
+78,
 78,
 78,
 78,
 78,
 78,
+78,
+79,
+79,
+79,
+79,
+79,
+79,
+79,
+79,
+79,
+79,
+79,
+79,
 79,
 79,
 79,
 79,
 79,
+79,
+80,
+80,
+80,
+80,
+80,
+80,
+80,
+80,
+80,
+80,
+80,
+80,
+80,
 80,
 80,
 80,
@@ -156,11 +381,50 @@
 81,
 81,
 81,
+81,
+81,
+81,
+81,
+81,
+81,
+81,
+81,
+81,
+81,
+81,
+81,
+81,
 82,
 82,
 82,
 82,
 82,
+82,
+82,
+82,
+82,
+82,
+82,
+82,
+82,
+82,
+82,
+82,
+82,
+82,
+83,
+83,
+83,
+83,
+83,
+83,
+83,
+83,
+83,
+83,
+83,
+83,
+83,
 83,
 83,
 83,
@@ -171,51 +435,194 @@
 84,
 84,
 84,
+84,
+84,
+84,
+84,
+84,
+84,
+84,
+84,
+84,
+84,
+84,
+84,
+84,
+85,
 85,
 85,
 85,
 85,
 85,
+85,
+85,
+85,
+85,
+85,
+85,
+85,
+85,
+85,
+85,
+85,
+85,
+86,
 86,
 86,
 86,
 86,
 86,
+86,
+86,
+86,
+86,
+86,
+86,
+86,
+86,
+86,
+86,
+86,
+86,
+87,
 87,
 87,
 87,
 87,
 87,
+87,
+87,
+87,
+87,
+87,
+87,
+87,
+87,
+87,
+87,
+87,
+87,
+88,
 88,
 88,
 88,
 88,
 88,
+88,
+88,
+88,
+88,
+88,
+88,
+88,
+88,
+88,
+88,
+88,
+88,
+89,
+89,
 89,
 89,
 89,
 89,
 89,
+89,
+89,
+89,
+89,
+89,
+89,
+89,
+89,
+89,
+89,
+89,
+90,
+90,
 90,
 90,
 90,
 90,
 90,
+90,
+90,
+90,
+90,
+90,
+90,
+90,
+90,
+90,
+90,
+90,
+91,
+91,
 91,
 91,
 91,
 91,
 91,
+91,
+91,
+91,
+91,
+91,
+91,
+91,
+91,
+91,
+91,
+91,
+92,
+92,
 92,
 92,
 92,
 92,
 92,
+92,
+92,
+92,
+92,
+92,
+92,
+92,
+92,
+92,
+92,
+92,
+93,
+93,
 93,
 93,
 93,
 93,
 93,
+93,
+93,
+93,
+93,
+93,
+93,
+93,
+93,
+93,
+93,
+93,
+94,
+94,
+94,
+94,
+94,
+94,
+94,
+94,
+94,
+94,
+94,
+94,
+94,
 94,
 94,
 94,
@@ -226,7 +633,16 @@
 95,
 95,
 95,
-0,
-0,
-0,
-0
+95,
+95,
+95,
+95,
+95,
+95,
+95,
+95,
+95,
+95,
+95,
+95,
+95

+ 1 - 1
compiler/aarch64/ra64nor.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from a64reg.dat }
-231
+647

+ 452 - 36
compiler/aarch64/ra64num.inc

@@ -66,167 +66,583 @@ tregister($0104001F),
 tregister($0105001F),
 tregister($01040020),
 tregister($01050020),
+tregister($05000000),
+tregister($05000001),
+tregister($05000002),
+tregister($05000003),
 tregister($04010000),
 tregister($04030000),
 tregister($04090000),
 tregister($040a0000),
-tregister($04050000),
+tregister($040b0000),
+tregister($04000000),
+tregister($04200000),
+tregister($04210000),
+tregister($04220000),
+tregister($04230000),
+tregister($04180000),
+tregister($04190000),
+tregister($041a0000),
+tregister($041b0000),
+tregister($041c0000),
+tregister($041d0000),
+tregister($041e0000),
+tregister($041f0000),
 tregister($04010001),
 tregister($04030001),
 tregister($04090001),
 tregister($040a0001),
-tregister($04050001),
+tregister($040b0001),
+tregister($04000001),
+tregister($04200001),
+tregister($04210001),
+tregister($04220001),
+tregister($04230001),
+tregister($04180001),
+tregister($04190001),
+tregister($041a0001),
+tregister($041b0001),
+tregister($041c0001),
+tregister($041d0001),
+tregister($041e0001),
+tregister($041f0001),
 tregister($04010002),
 tregister($04030002),
 tregister($04090002),
 tregister($040a0002),
-tregister($04050002),
+tregister($040b0002),
+tregister($04000002),
+tregister($04200002),
+tregister($04210002),
+tregister($04220002),
+tregister($04230002),
+tregister($04180002),
+tregister($04190002),
+tregister($041a0002),
+tregister($041b0002),
+tregister($041c0002),
+tregister($041d0002),
+tregister($041e0002),
+tregister($041f0002),
 tregister($04010003),
 tregister($04030003),
 tregister($04090003),
 tregister($040a0003),
-tregister($04050003),
+tregister($040b0003),
+tregister($04000003),
+tregister($04200003),
+tregister($04210003),
+tregister($04220003),
+tregister($04230003),
+tregister($04180003),
+tregister($04190003),
+tregister($041a0003),
+tregister($041b0003),
+tregister($041c0003),
+tregister($041d0003),
+tregister($041e0003),
+tregister($041f0003),
 tregister($04010004),
 tregister($04030004),
 tregister($04090004),
 tregister($040a0004),
-tregister($04050004),
+tregister($040b0004),
+tregister($04000004),
+tregister($04200004),
+tregister($04210004),
+tregister($04220004),
+tregister($04230004),
+tregister($04180004),
+tregister($04190004),
+tregister($041a0004),
+tregister($041b0004),
+tregister($041c0004),
+tregister($041d0004),
+tregister($041e0004),
+tregister($041f0004),
 tregister($04010005),
 tregister($04030005),
 tregister($04090005),
 tregister($040a0005),
-tregister($04050005),
+tregister($040b0005),
+tregister($04000005),
+tregister($04200005),
+tregister($04210005),
+tregister($04220005),
+tregister($04230005),
+tregister($04180005),
+tregister($04190005),
+tregister($041a0005),
+tregister($041b0005),
+tregister($041c0005),
+tregister($041d0005),
+tregister($041e0005),
+tregister($041f0005),
 tregister($04010006),
 tregister($04030006),
 tregister($04090006),
 tregister($040a0006),
-tregister($04050006),
+tregister($040b0006),
+tregister($04000006),
+tregister($04200006),
+tregister($04210006),
+tregister($04220006),
+tregister($04230006),
+tregister($04180006),
+tregister($04190006),
+tregister($041a0006),
+tregister($041b0006),
+tregister($041c0006),
+tregister($041d0006),
+tregister($041e0006),
+tregister($041f0006),
 tregister($04010007),
 tregister($04030007),
 tregister($04090007),
 tregister($040a0007),
-tregister($04050007),
+tregister($040b0007),
+tregister($04000007),
+tregister($04200007),
+tregister($04210007),
+tregister($04220007),
+tregister($04230007),
+tregister($04180007),
+tregister($04190007),
+tregister($041a0007),
+tregister($041b0007),
+tregister($041c0007),
+tregister($041d0007),
+tregister($041e0007),
+tregister($041f0007),
 tregister($04010008),
 tregister($04030008),
 tregister($04090008),
 tregister($040a0008),
-tregister($04050008),
+tregister($040b0008),
+tregister($04000008),
+tregister($04200008),
+tregister($04210008),
+tregister($04220008),
+tregister($04230008),
+tregister($04180008),
+tregister($04190008),
+tregister($041a0008),
+tregister($041b0008),
+tregister($041c0008),
+tregister($041d0008),
+tregister($041e0008),
+tregister($041f0008),
 tregister($04010009),
 tregister($04030009),
 tregister($04090009),
 tregister($040a0009),
-tregister($04050009),
+tregister($040b0009),
+tregister($04000009),
+tregister($04200009),
+tregister($04210009),
+tregister($04220009),
+tregister($04230009),
+tregister($04180009),
+tregister($04190009),
+tregister($041a0009),
+tregister($041b0009),
+tregister($041c0009),
+tregister($041d0009),
+tregister($041e0009),
+tregister($041f0009),
 tregister($0401000A),
 tregister($0403000A),
 tregister($0409000A),
 tregister($040a000A),
-tregister($0405000A),
+tregister($040b000A),
+tregister($0400000A),
+tregister($0420000A),
+tregister($0421000A),
+tregister($0422000A),
+tregister($0423000A),
+tregister($0418000A),
+tregister($0419000A),
+tregister($041a000A),
+tregister($041b000A),
+tregister($041c000A),
+tregister($041d000A),
+tregister($041e000A),
+tregister($041f000A),
 tregister($0401000B),
 tregister($0403000B),
 tregister($0409000B),
 tregister($040a000B),
-tregister($0405000B),
+tregister($040b000B),
+tregister($0400000B),
+tregister($0420000B),
+tregister($0421000B),
+tregister($0422000B),
+tregister($0423000B),
+tregister($0418000B),
+tregister($0419000B),
+tregister($041a000B),
+tregister($041b000B),
+tregister($041c000B),
+tregister($041d000B),
+tregister($041e000B),
+tregister($041f000B),
 tregister($0401000C),
 tregister($0403000C),
 tregister($0409000C),
 tregister($040a000C),
-tregister($0405000C),
+tregister($040b000C),
+tregister($0400000C),
+tregister($0420000C),
+tregister($0421000C),
+tregister($0422000C),
+tregister($0423000C),
+tregister($0418000C),
+tregister($0419000C),
+tregister($041a000C),
+tregister($041b000C),
+tregister($041c000C),
+tregister($041d000C),
+tregister($041e000C),
+tregister($041f000C),
 tregister($0401000D),
 tregister($0403000D),
 tregister($0409000D),
 tregister($040a000D),
-tregister($0405000D),
+tregister($040b000D),
+tregister($0400000D),
+tregister($0420000D),
+tregister($0421000D),
+tregister($0422000D),
+tregister($0423000D),
+tregister($0418000D),
+tregister($0419000D),
+tregister($041a000D),
+tregister($041b000D),
+tregister($041c000D),
+tregister($041d000D),
+tregister($041e000D),
+tregister($041f000D),
 tregister($0401000E),
 tregister($0403000E),
 tregister($0409000E),
 tregister($040a000E),
-tregister($0405000E),
+tregister($040b000E),
+tregister($0400000E),
+tregister($0420000E),
+tregister($0421000E),
+tregister($0422000E),
+tregister($0423000E),
+tregister($0418000E),
+tregister($0419000E),
+tregister($041a000E),
+tregister($041b000E),
+tregister($041c000E),
+tregister($041d000E),
+tregister($041e000E),
+tregister($041f000E),
 tregister($0401000F),
 tregister($0403000F),
 tregister($0409000F),
 tregister($040a000F),
-tregister($0405000F),
+tregister($040b000F),
+tregister($0400000F),
+tregister($0420000F),
+tregister($0421000F),
+tregister($0422000F),
+tregister($0423000F),
+tregister($0418000F),
+tregister($0419000F),
+tregister($041a000F),
+tregister($041b000F),
+tregister($041c000F),
+tregister($041d000F),
+tregister($041e000F),
+tregister($041f000F),
 tregister($04010010),
 tregister($04030010),
 tregister($04090010),
 tregister($040a0010),
-tregister($04050010),
+tregister($040b0010),
+tregister($04000010),
+tregister($04200010),
+tregister($04210010),
+tregister($04220010),
+tregister($04230010),
+tregister($04180010),
+tregister($04190010),
+tregister($041a0010),
+tregister($041b0010),
+tregister($041c0010),
+tregister($041d0010),
+tregister($041e0010),
+tregister($041f0010),
 tregister($04010011),
 tregister($04030011),
 tregister($04090011),
 tregister($040a0011),
-tregister($04050011),
+tregister($040b0011),
+tregister($04000011),
+tregister($04200011),
+tregister($04210011),
+tregister($04220011),
+tregister($04230011),
+tregister($04180011),
+tregister($04190011),
+tregister($041a0011),
+tregister($041b0011),
+tregister($041c0011),
+tregister($041d0011),
+tregister($041e0011),
+tregister($041f0011),
 tregister($04010012),
 tregister($04030012),
 tregister($04090012),
 tregister($040a0012),
-tregister($04050012),
+tregister($040b0012),
+tregister($04000012),
+tregister($04200012),
+tregister($04210012),
+tregister($04220012),
+tregister($04230012),
+tregister($04180012),
+tregister($04190012),
+tregister($041a0012),
+tregister($041b0012),
+tregister($041c0012),
+tregister($041d0012),
+tregister($041e0012),
+tregister($041f0012),
 tregister($04010013),
 tregister($04030013),
 tregister($04090013),
 tregister($040a0013),
-tregister($04050013),
+tregister($040b0013),
+tregister($04000013),
+tregister($04200013),
+tregister($04210013),
+tregister($04220013),
+tregister($04230013),
+tregister($04180013),
+tregister($04190013),
+tregister($041a0013),
+tregister($041b0013),
+tregister($041c0013),
+tregister($041d0013),
+tregister($041e0013),
+tregister($041f0013),
 tregister($04010014),
 tregister($04030014),
 tregister($04090014),
 tregister($040a0014),
-tregister($04050014),
+tregister($040b0014),
+tregister($04000014),
+tregister($04200014),
+tregister($04210014),
+tregister($04220014),
+tregister($04230014),
+tregister($04180014),
+tregister($04190014),
+tregister($041a0014),
+tregister($041b0014),
+tregister($041c0014),
+tregister($041d0014),
+tregister($041e0014),
+tregister($041f0014),
 tregister($04010015),
 tregister($04030015),
 tregister($04090015),
 tregister($040a0015),
-tregister($04050015),
+tregister($040b0015),
+tregister($04000015),
+tregister($04200015),
+tregister($04210015),
+tregister($04220015),
+tregister($04230015),
+tregister($04180015),
+tregister($04190015),
+tregister($041a0015),
+tregister($041b0015),
+tregister($041c0015),
+tregister($041d0015),
+tregister($041e0015),
+tregister($041f0015),
 tregister($04010016),
 tregister($04030016),
 tregister($04090016),
 tregister($040a0016),
-tregister($04050016),
+tregister($040b0016),
+tregister($04000016),
+tregister($04200016),
+tregister($04210016),
+tregister($04220016),
+tregister($04230016),
+tregister($04180016),
+tregister($04190016),
+tregister($041a0016),
+tregister($041b0016),
+tregister($041c0016),
+tregister($041d0016),
+tregister($041e0016),
+tregister($041f0016),
 tregister($04010017),
 tregister($04030017),
 tregister($04090017),
 tregister($040a0017),
-tregister($04050017),
+tregister($040b0017),
+tregister($04000017),
+tregister($04200017),
+tregister($04210017),
+tregister($04220017),
+tregister($04230017),
+tregister($04180017),
+tregister($04190017),
+tregister($041a0017),
+tregister($041b0017),
+tregister($041c0017),
+tregister($041d0017),
+tregister($041e0017),
+tregister($041f0017),
 tregister($04010018),
 tregister($04030018),
 tregister($04090018),
 tregister($040a0018),
-tregister($04050018),
+tregister($040b0018),
+tregister($04000018),
+tregister($04200018),
+tregister($04210018),
+tregister($04220018),
+tregister($04230018),
+tregister($04180018),
+tregister($04190018),
+tregister($041a0018),
+tregister($041b0018),
+tregister($041c0018),
+tregister($041d0018),
+tregister($041e0018),
+tregister($041f0018),
 tregister($04010019),
 tregister($04030019),
 tregister($04090019),
 tregister($040a0019),
-tregister($04050019),
+tregister($040b0019),
+tregister($04000019),
+tregister($04200019),
+tregister($04210019),
+tregister($04220019),
+tregister($04230019),
+tregister($04180019),
+tregister($04190019),
+tregister($041a0019),
+tregister($041b0019),
+tregister($041c0019),
+tregister($041d0019),
+tregister($041e0019),
+tregister($041f0019),
 tregister($0401001A),
 tregister($0403001A),
 tregister($0409001A),
 tregister($040a001A),
-tregister($0405001A),
+tregister($040b001A),
+tregister($0400001A),
+tregister($0420001A),
+tregister($0421001A),
+tregister($0422001A),
+tregister($0423001A),
+tregister($0418001A),
+tregister($0419001A),
+tregister($041a001A),
+tregister($041b001A),
+tregister($041c001A),
+tregister($041d001A),
+tregister($041e001A),
+tregister($041f001A),
 tregister($0401001B),
 tregister($0403001B),
 tregister($0409001B),
 tregister($040a001B),
-tregister($0405001B),
+tregister($040b001B),
+tregister($0400001B),
+tregister($0420001B),
+tregister($0421001B),
+tregister($0422001B),
+tregister($0423001B),
+tregister($0418001B),
+tregister($0419001B),
+tregister($041a001B),
+tregister($041b001B),
+tregister($041c001B),
+tregister($041d001B),
+tregister($041e001B),
+tregister($041f001B),
 tregister($0401001C),
 tregister($0403001C),
 tregister($0409001C),
 tregister($040a001C),
-tregister($0405001C),
+tregister($040b001C),
+tregister($0400001C),
+tregister($0420001C),
+tregister($0421001C),
+tregister($0422001C),
+tregister($0423001C),
+tregister($0418001C),
+tregister($0419001C),
+tregister($041a001C),
+tregister($041b001C),
+tregister($041c001C),
+tregister($041d001C),
+tregister($041e001C),
+tregister($041f001C),
 tregister($0401001D),
 tregister($0403001D),
 tregister($0409001D),
 tregister($040a001D),
-tregister($0405001D),
+tregister($040b001D),
+tregister($0400001D),
+tregister($0420001D),
+tregister($0421001D),
+tregister($0422001D),
+tregister($0423001D),
+tregister($0418001D),
+tregister($0419001D),
+tregister($041a001D),
+tregister($041b001D),
+tregister($041c001D),
+tregister($041d001D),
+tregister($041e001D),
+tregister($041f001D),
 tregister($0401001E),
 tregister($0403001E),
 tregister($0409001E),
 tregister($040a001E),
-tregister($0405001E),
+tregister($040b001E),
+tregister($0400001E),
+tregister($0420001E),
+tregister($0421001E),
+tregister($0422001E),
+tregister($0423001E),
+tregister($0418001E),
+tregister($0419001E),
+tregister($041a001E),
+tregister($041b001E),
+tregister($041c001E),
+tregister($041d001E),
+tregister($041e001E),
+tregister($041f001E),
 tregister($0401001F),
 tregister($0403001F),
 tregister($0409001F),
 tregister($040a001F),
-tregister($0405001F),
-tregister($05000000),
-tregister($05000001),
-tregister($05000002),
-tregister($05000003)
+tregister($040b001F),
+tregister($0400001F),
+tregister($0420001F),
+tregister($0421001F),
+tregister($0422001F),
+tregister($0423001F),
+tregister($0418001F),
+tregister($0419001F),
+tregister($041a001F),
+tregister($041b001F),
+tregister($041c001F),
+tregister($041d001F),
+tregister($041e001F),
+tregister($041f001F)

+ 565 - 149
compiler/aarch64/ra64rni.inc

@@ -66,167 +66,583 @@
 62,
 64,
 66,
-67,
-72,
-77,
-82,
-87,
-92,
-97,
-102,
-107,
+76,
+94,
 112,
-117,
-122,
-127,
-132,
-137,
-142,
-147,
-152,
-157,
-162,
-167,
-172,
-177,
-182,
-187,
-192,
-197,
+130,
+148,
+166,
+184,
 202,
-207,
-212,
-217,
-222,
-68,
-73,
-78,
-83,
-88,
-93,
-98,
-103,
-108,
-113,
-118,
-123,
-128,
-133,
-138,
+220,
+238,
+256,
+274,
+292,
+310,
+328,
+346,
+364,
+382,
+400,
+418,
+436,
+454,
+472,
+490,
+508,
+526,
+544,
+562,
+580,
+598,
+616,
+634,
+71,
+89,
+107,
+125,
 143,
-148,
-153,
-158,
-163,
-168,
-173,
-178,
-183,
-188,
-193,
+161,
+179,
+197,
+215,
+233,
+251,
+269,
+287,
+305,
+323,
+341,
+359,
+377,
+395,
+413,
+431,
+449,
+467,
+485,
+503,
+521,
+539,
+557,
+575,
+593,
+611,
+629,
+72,
+90,
+108,
+126,
+144,
+162,
+180,
 198,
-203,
-208,
-213,
-218,
-223,
-71,
-76,
-81,
-86,
+216,
+234,
+252,
+270,
+288,
+306,
+324,
+342,
+360,
+378,
+396,
+414,
+432,
+450,
+468,
+486,
+504,
+522,
+540,
+558,
+576,
+594,
+612,
+630,
+73,
 91,
-96,
-101,
-106,
-111,
-116,
-121,
-126,
-131,
-136,
-141,
-146,
-151,
-156,
-161,
-166,
-171,
-176,
+109,
+127,
+145,
+163,
 181,
-186,
-191,
-196,
-201,
-206,
-211,
-216,
-221,
-226,
-69,
+199,
+217,
+235,
+253,
+271,
+289,
+307,
+325,
+343,
+361,
+379,
+397,
+415,
+433,
+451,
+469,
+487,
+505,
+523,
+541,
+559,
+577,
+595,
+613,
+631,
 74,
-79,
-84,
-89,
-94,
-99,
-104,
-109,
-114,
-119,
-124,
-129,
-134,
-139,
-144,
-149,
-154,
-159,
+92,
+110,
+128,
+146,
 164,
-169,
-174,
-179,
-184,
-189,
-194,
-199,
-204,
-209,
-214,
-219,
-224,
-70,
+182,
+200,
+218,
+236,
+254,
+272,
+290,
+308,
+326,
+344,
+362,
+380,
+398,
+416,
+434,
+452,
+470,
+488,
+506,
+524,
+542,
+560,
+578,
+596,
+614,
+632,
 75,
-80,
-85,
-90,
-95,
+93,
+111,
+129,
+147,
+165,
+183,
+201,
+219,
+237,
+255,
+273,
+291,
+309,
+327,
+345,
+363,
+381,
+399,
+417,
+435,
+453,
+471,
+489,
+507,
+525,
+543,
+561,
+579,
+597,
+615,
+633,
+81,
+99,
+117,
+135,
+153,
+171,
+189,
+207,
+225,
+243,
+261,
+279,
+297,
+315,
+333,
+351,
+369,
+387,
+405,
+423,
+441,
+459,
+477,
+495,
+513,
+531,
+549,
+567,
+585,
+603,
+621,
+639,
+82,
 100,
-105,
-110,
-115,
+118,
+136,
+154,
+172,
+190,
+208,
+226,
+244,
+262,
+280,
+298,
+316,
+334,
+352,
+370,
+388,
+406,
+424,
+442,
+460,
+478,
+496,
+514,
+532,
+550,
+568,
+586,
+604,
+622,
+640,
+83,
+101,
+119,
+137,
+155,
+173,
+191,
+209,
+227,
+245,
+263,
+281,
+299,
+317,
+335,
+353,
+371,
+389,
+407,
+425,
+443,
+461,
+479,
+497,
+515,
+533,
+551,
+569,
+587,
+605,
+623,
+641,
+84,
+102,
 120,
-125,
-130,
-135,
+138,
+156,
+174,
+192,
+210,
+228,
+246,
+264,
+282,
+300,
+318,
+336,
+354,
+372,
+390,
+408,
+426,
+444,
+462,
+480,
+498,
+516,
+534,
+552,
+570,
+588,
+606,
+624,
+642,
+85,
+103,
+121,
+139,
+157,
+175,
+193,
+211,
+229,
+247,
+265,
+283,
+301,
+319,
+337,
+355,
+373,
+391,
+409,
+427,
+445,
+463,
+481,
+499,
+517,
+535,
+553,
+571,
+589,
+607,
+625,
+643,
+86,
+104,
+122,
 140,
-145,
-150,
-155,
+158,
+176,
+194,
+212,
+230,
+248,
+266,
+284,
+302,
+320,
+338,
+356,
+374,
+392,
+410,
+428,
+446,
+464,
+482,
+500,
+518,
+536,
+554,
+572,
+590,
+608,
+626,
+644,
+87,
+105,
+123,
+141,
+159,
+177,
+195,
+213,
+231,
+249,
+267,
+285,
+303,
+321,
+339,
+357,
+375,
+393,
+411,
+429,
+447,
+465,
+483,
+501,
+519,
+537,
+555,
+573,
+591,
+609,
+627,
+645,
+88,
+106,
+124,
+142,
 160,
-165,
-170,
-175,
-180,
+178,
+196,
+214,
+232,
+250,
+268,
+286,
+304,
+322,
+340,
+358,
+376,
+394,
+412,
+430,
+448,
+466,
+484,
+502,
+520,
+538,
+556,
+574,
+592,
+610,
+628,
+646,
+77,
+95,
+113,
+131,
+149,
+167,
 185,
-190,
-195,
-200,
+203,
+221,
+239,
+257,
+275,
+293,
+311,
+329,
+347,
+365,
+383,
+401,
+419,
+437,
+455,
+473,
+491,
+509,
+527,
+545,
+563,
+581,
+599,
+617,
+635,
+78,
+96,
+114,
+132,
+150,
+168,
+186,
+204,
+222,
+240,
+258,
+276,
+294,
+312,
+330,
+348,
+366,
+384,
+402,
+420,
+438,
+456,
+474,
+492,
+510,
+528,
+546,
+564,
+582,
+600,
+618,
+636,
+79,
+97,
+115,
+133,
+151,
+169,
+187,
 205,
-210,
-215,
-220,
-225,
-227,
-228,
-229,
-230
+223,
+241,
+259,
+277,
+295,
+313,
+331,
+349,
+367,
+385,
+403,
+421,
+439,
+457,
+475,
+493,
+511,
+529,
+547,
+565,
+583,
+601,
+619,
+637,
+80,
+98,
+116,
+134,
+152,
+170,
+188,
+206,
+224,
+242,
+260,
+278,
+296,
+314,
+332,
+350,
+368,
+386,
+404,
+422,
+440,
+458,
+476,
+494,
+512,
+530,
+548,
+566,
+584,
+602,
+620,
+638,
+67,
+68,
+69,
+70

+ 561 - 145
compiler/aarch64/ra64sri.inc

@@ -1,170 +1,586 @@
 { don't edit, this file is generated from a64reg.dat }
 0,
-67,
-72,
-117,
-122,
-127,
-132,
-137,
-142,
-147,
-152,
-157,
-162,
-77,
-167,
-172,
-177,
-182,
-187,
-192,
-197,
-202,
-207,
-212,
-82,
-217,
-222,
-87,
-92,
-97,
-102,
+71,
+89,
+251,
+269,
+287,
+305,
+323,
+341,
+359,
+377,
+395,
+413,
 107,
-112,
-70,
-75,
-120,
+431,
+449,
+467,
+485,
+503,
+521,
+539,
+557,
+575,
+593,
 125,
-130,
-135,
-140,
-145,
-150,
-155,
-160,
-165,
-80,
-170,
-175,
-180,
-185,
-190,
-195,
-200,
-205,
-210,
+611,
+629,
+143,
+161,
+179,
+197,
 215,
-85,
-220,
-225,
-90,
-95,
-100,
-105,
+233,
+74,
+92,
+254,
+272,
+290,
+308,
+326,
+344,
+362,
+380,
+398,
+416,
 110,
-115,
-228,
-229,
-68,
-73,
-118,
-123,
+434,
+452,
+470,
+488,
+506,
+524,
+542,
+560,
+578,
+596,
 128,
-133,
-138,
-143,
-148,
-153,
-158,
-163,
-78,
-168,
-173,
-178,
-183,
-188,
-193,
-198,
-203,
-208,
-213,
-83,
+614,
+632,
+146,
+164,
+182,
+200,
 218,
-223,
-88,
-93,
-98,
-103,
+236,
+68,
+69,
+72,
+90,
+252,
+270,
+288,
+306,
+324,
+342,
+360,
+378,
+396,
+414,
 108,
-113,
-227,
-71,
-76,
-121,
+432,
+450,
+468,
+486,
+504,
+522,
+540,
+558,
+576,
+594,
 126,
-131,
-136,
-141,
-146,
-151,
-156,
-161,
-166,
-81,
-171,
-176,
-181,
-186,
-191,
-196,
-201,
-206,
-211,
+612,
+630,
+144,
+162,
+180,
+198,
 216,
-86,
-221,
-226,
+234,
+67,
+75,
+93,
+255,
+273,
+291,
+309,
+327,
+345,
+363,
+381,
+399,
+417,
+111,
+435,
+453,
+471,
+489,
+507,
+525,
+543,
+561,
+579,
+597,
+129,
+615,
+633,
+147,
+165,
+183,
+201,
+219,
+237,
+73,
 91,
-96,
-101,
+253,
+271,
+289,
+307,
+325,
+343,
+361,
+379,
+397,
+415,
+109,
+433,
+451,
+469,
+487,
+505,
+523,
+541,
+559,
+577,
+595,
+127,
+613,
+631,
+145,
+163,
+181,
+199,
+217,
+235,
+66,
+70,
+76,
+82,
+87,
+88,
+85,
+83,
+86,
+81,
+84,
+77,
+80,
+78,
+79,
+94,
+100,
+105,
 106,
-111,
-116,
-69,
-74,
-119,
+103,
+101,
+104,
+99,
+102,
+95,
+98,
+96,
+97,
+256,
+262,
+267,
+268,
+265,
+263,
+266,
+261,
+264,
+257,
+260,
+258,
+259,
+274,
+280,
+285,
+286,
+283,
+281,
+284,
+279,
+282,
+275,
+278,
+276,
+277,
+292,
+298,
+303,
+304,
+301,
+299,
+302,
+297,
+300,
+293,
+296,
+294,
+295,
+310,
+316,
+321,
+322,
+319,
+317,
+320,
+315,
+318,
+311,
+314,
+312,
+313,
+328,
+334,
+339,
+340,
+337,
+335,
+338,
+333,
+336,
+329,
+332,
+330,
+331,
+346,
+352,
+357,
+358,
+355,
+353,
+356,
+351,
+354,
+347,
+350,
+348,
+349,
+364,
+370,
+375,
+376,
+373,
+371,
+374,
+369,
+372,
+365,
+368,
+366,
+367,
+382,
+388,
+393,
+394,
+391,
+389,
+392,
+387,
+390,
+383,
+386,
+384,
+385,
+400,
+406,
+411,
+412,
+409,
+407,
+410,
+405,
+408,
+401,
+404,
+402,
+403,
+418,
+424,
+429,
+430,
+427,
+425,
+428,
+423,
+426,
+419,
+422,
+420,
+421,
+112,
+118,
+123,
 124,
-129,
-134,
+121,
+119,
+122,
+117,
+120,
+113,
+116,
+114,
+115,
+436,
+442,
+447,
+448,
+445,
+443,
+446,
+441,
+444,
+437,
+440,
+438,
+439,
+454,
+460,
+465,
+466,
+463,
+461,
+464,
+459,
+462,
+455,
+458,
+456,
+457,
+472,
+478,
+483,
+484,
+481,
+479,
+482,
+477,
+480,
+473,
+476,
+474,
+475,
+490,
+496,
+501,
+502,
+499,
+497,
+500,
+495,
+498,
+491,
+494,
+492,
+493,
+508,
+514,
+519,
+520,
+517,
+515,
+518,
+513,
+516,
+509,
+512,
+510,
+511,
+526,
+532,
+537,
+538,
+535,
+533,
+536,
+531,
+534,
+527,
+530,
+528,
+529,
+544,
+550,
+555,
+556,
+553,
+551,
+554,
+549,
+552,
+545,
+548,
+546,
+547,
+562,
+568,
+573,
+574,
+571,
+569,
+572,
+567,
+570,
+563,
+566,
+564,
+565,
+580,
+586,
+591,
+592,
+589,
+587,
+590,
+585,
+588,
+581,
+584,
+582,
+583,
+598,
+604,
+609,
+610,
+607,
+605,
+608,
+603,
+606,
+599,
+602,
+600,
+601,
+130,
+136,
+141,
+142,
 139,
-144,
-149,
+137,
+140,
+135,
+138,
+131,
+134,
+132,
+133,
+616,
+622,
+627,
+628,
+625,
+623,
+626,
+621,
+624,
+617,
+620,
+618,
+619,
+634,
+640,
+645,
+646,
+643,
+641,
+644,
+639,
+642,
+635,
+638,
+636,
+637,
+148,
 154,
 159,
-164,
-79,
-169,
+160,
+157,
+155,
+158,
+153,
+156,
+149,
+152,
+150,
+151,
+166,
+172,
+177,
+178,
+175,
+173,
+176,
+171,
 174,
-179,
+167,
+170,
+168,
+169,
 184,
-189,
+190,
+195,
+196,
+193,
+191,
 194,
-199,
-204,
-209,
+189,
+192,
+185,
+188,
+186,
+187,
+202,
+208,
+213,
 214,
-84,
-219,
-224,
-89,
-94,
-99,
-104,
-109,
-114,
-66,
+211,
+209,
+212,
+207,
+210,
+203,
+206,
+204,
+205,
+220,
+226,
+231,
+232,
+229,
+227,
 230,
+225,
+228,
+221,
+224,
+222,
+223,
+238,
+244,
+249,
+250,
+247,
+245,
+248,
+243,
+246,
+239,
+242,
+240,
+241,
 1,
 3,
 21,

+ 420 - 4
compiler/aarch64/ra64sta.inc

@@ -66,86 +66,311 @@
 31,
 31,
 31,
+0,
+0,
+0,
+0,
+64,
+64,
+64,
+64,
+64,
+64,
+64,
+64,
+64,
+64,
 64,
 64,
 64,
 64,
 64,
+64,
+64,
+64,
+65,
+65,
+65,
+65,
+65,
+65,
+65,
+65,
+65,
+65,
 65,
 65,
 65,
 65,
 65,
+65,
+65,
+65,
+66,
+66,
+66,
+66,
+66,
+66,
+66,
+66,
+66,
+66,
 66,
 66,
 66,
 66,
 66,
+66,
+66,
+66,
+67,
+67,
+67,
+67,
+67,
+67,
+67,
+67,
+67,
+67,
 67,
 67,
 67,
 67,
 67,
+67,
+67,
+67,
+68,
+68,
+68,
+68,
+68,
+68,
+68,
+68,
+68,
+68,
+68,
 68,
 68,
 68,
 68,
 68,
+68,
+68,
+69,
+69,
+69,
+69,
+69,
+69,
+69,
+69,
+69,
+69,
+69,
 69,
 69,
 69,
 69,
 69,
+69,
+69,
+70,
+70,
+70,
+70,
+70,
+70,
+70,
+70,
+70,
+70,
+70,
 70,
 70,
 70,
 70,
 70,
+70,
+70,
+71,
+71,
+71,
+71,
+71,
+71,
+71,
+71,
+71,
+71,
+71,
 71,
 71,
 71,
 71,
 71,
+71,
+71,
+72,
+72,
+72,
+72,
+72,
+72,
+72,
+72,
+72,
+72,
+72,
+72,
 72,
 72,
 72,
 72,
 72,
+72,
+73,
+73,
+73,
+73,
+73,
+73,
+73,
+73,
+73,
+73,
+73,
+73,
 73,
 73,
 73,
 73,
 73,
+73,
+74,
+74,
+74,
+74,
+74,
+74,
+74,
+74,
+74,
+74,
+74,
+74,
 74,
 74,
 74,
 74,
 74,
+74,
+75,
+75,
+75,
+75,
+75,
+75,
+75,
+75,
+75,
+75,
+75,
+75,
 75,
 75,
 75,
 75,
 75,
+75,
+76,
+76,
+76,
+76,
+76,
+76,
+76,
+76,
+76,
+76,
+76,
+76,
 76,
 76,
 76,
 76,
 76,
+76,
+77,
+77,
+77,
+77,
+77,
+77,
+77,
+77,
+77,
+77,
+77,
+77,
 77,
 77,
 77,
 77,
 77,
+77,
+78,
+78,
+78,
+78,
+78,
+78,
+78,
+78,
+78,
+78,
+78,
+78,
 78,
 78,
 78,
 78,
 78,
+78,
+79,
+79,
+79,
+79,
+79,
+79,
+79,
+79,
+79,
+79,
+79,
+79,
 79,
 79,
 79,
 79,
 79,
+79,
+80,
+80,
+80,
+80,
+80,
+80,
+80,
+80,
+80,
+80,
+80,
+80,
+80,
 80,
 80,
 80,
@@ -156,11 +381,50 @@
 81,
 81,
 81,
+81,
+81,
+81,
+81,
+81,
+81,
+81,
+81,
+81,
+81,
+81,
+81,
+81,
 82,
 82,
 82,
 82,
 82,
+82,
+82,
+82,
+82,
+82,
+82,
+82,
+82,
+82,
+82,
+82,
+82,
+82,
+83,
+83,
+83,
+83,
+83,
+83,
+83,
+83,
+83,
+83,
+83,
+83,
+83,
 83,
 83,
 83,
@@ -171,51 +435,194 @@
 84,
 84,
 84,
+84,
+84,
+84,
+84,
+84,
+84,
+84,
+84,
+84,
+84,
+84,
+84,
+84,
+85,
 85,
 85,
 85,
 85,
 85,
+85,
+85,
+85,
+85,
+85,
+85,
+85,
+85,
+85,
+85,
+85,
+85,
+86,
 86,
 86,
 86,
 86,
 86,
+86,
+86,
+86,
+86,
+86,
+86,
+86,
+86,
+86,
+86,
+86,
+86,
+87,
 87,
 87,
 87,
 87,
 87,
+87,
+87,
+87,
+87,
+87,
+87,
+87,
+87,
+87,
+87,
+87,
+87,
+88,
 88,
 88,
 88,
 88,
 88,
+88,
+88,
+88,
+88,
+88,
+88,
+88,
+88,
+88,
+88,
+88,
+88,
+89,
+89,
 89,
 89,
 89,
 89,
 89,
+89,
+89,
+89,
+89,
+89,
+89,
+89,
+89,
+89,
+89,
+89,
+90,
+90,
 90,
 90,
 90,
 90,
 90,
+90,
+90,
+90,
+90,
+90,
+90,
+90,
+90,
+90,
+90,
+90,
+91,
+91,
 91,
 91,
 91,
 91,
 91,
+91,
+91,
+91,
+91,
+91,
+91,
+91,
+91,
+91,
+91,
+91,
+92,
+92,
 92,
 92,
 92,
 92,
 92,
+92,
+92,
+92,
+92,
+92,
+92,
+92,
+92,
+92,
+92,
+92,
+93,
+93,
 93,
 93,
 93,
 93,
 93,
+93,
+93,
+93,
+93,
+93,
+93,
+93,
+93,
+93,
+93,
+93,
+94,
+94,
+94,
+94,
+94,
+94,
+94,
+94,
+94,
+94,
+94,
+94,
+94,
 94,
 94,
 94,
@@ -226,7 +633,16 @@
 95,
 95,
 95,
-0,
-0,
-0,
-0
+95,
+95,
+95,
+95,
+95,
+95,
+95,
+95,
+95,
+95,
+95,
+95,
+95

+ 420 - 4
compiler/aarch64/ra64std.inc

@@ -66,167 +66,583 @@
 'xzr',
 'wsp',
 'sp',
+'nzcv',
+'fpcr',
+'fpsr',
+'tpidr_el0',
 'b0',
 'h0',
 's0',
 'd0',
 'q0',
+'v0',
+'v0.b',
+'v0.h',
+'v0.s',
+'v0.d',
+'v0.8b',
+'v0.16b',
+'v0.4h',
+'v0.8h',
+'v0.2s',
+'v0.4s',
+'v0.1d',
+'v0.2d',
 'b1',
 'h1',
 's1',
 'd1',
 'q1',
+'v1',
+'v1.b',
+'v1.h',
+'v1.s',
+'v1.d',
+'v1.8b',
+'v1.16b',
+'v1.4h',
+'v1.8h',
+'v1.2s',
+'v1.4s',
+'v1.1d',
+'v1.2d',
 'b2',
 'h2',
 's2',
 'd2',
 'q2',
+'v2',
+'v2.b',
+'v2.h',
+'v2.s',
+'v2.d',
+'v2.8b',
+'v2.16b',
+'v2.4h',
+'v2.8h',
+'v2.2s',
+'v2.4s',
+'v2.1d',
+'v2.2d',
 'b3',
 'h3',
 's3',
 'd3',
 'q3',
+'v3',
+'v3.b',
+'v3.h',
+'v3.s',
+'v3.d',
+'v3.8b',
+'v3.16b',
+'v3.4h',
+'v3.8h',
+'v3.2s',
+'v3.4s',
+'v3.1d',
+'v3.2d',
 'b4',
 'h4',
 's4',
 'd4',
 'q4',
+'v4',
+'v4.b',
+'v4.h',
+'v4.s',
+'v4.d',
+'v4.8b',
+'v4.16b',
+'v4.4h',
+'v4.8h',
+'v4.2s',
+'v4.4s',
+'v4.1d',
+'v4.2d',
 'b5',
 'h5',
 's5',
 'd5',
 'q5',
+'v5',
+'v5.b',
+'v5.h',
+'v5.s',
+'v5.d',
+'v5.8b',
+'v5.16b',
+'v5.4h',
+'v5.8h',
+'v5.2s',
+'v5.4s',
+'v5.1d',
+'v5.2d',
 'b6',
 'h6',
 's6',
 'd6',
 'q6',
+'v6',
+'v6.b',
+'v6.h',
+'v6.s',
+'v6.d',
+'v6.8b',
+'v6.16b',
+'v6.4h',
+'v6.8h',
+'v6.2s',
+'v6.4s',
+'v6.1d',
+'v6.2d',
 'b7',
 'h7',
 's7',
 'd7',
 'q7',
+'v7',
+'v7.b',
+'v7.h',
+'v7.s',
+'v7.d',
+'v7.8b',
+'v7.16b',
+'v7.4h',
+'v7.8h',
+'v7.2s',
+'v7.4s',
+'v7.1d',
+'v7.2d',
 'b8',
 'h8',
 's8',
 'd8',
 'q8',
+'v8',
+'v8.b',
+'v8.h',
+'v8.s',
+'v8.d',
+'v8.8b',
+'v8.16b',
+'v8.4h',
+'v8.8h',
+'v8.2s',
+'v8.4s',
+'v8.1d',
+'v8.2d',
 'b9',
 'h9',
 's9',
 'd9',
 'q9',
+'v9',
+'v9.b',
+'v9.h',
+'v9.s',
+'v9.d',
+'v9.8b',
+'v9.16b',
+'v9.4h',
+'v9.8h',
+'v9.2s',
+'v9.4s',
+'v9.1d',
+'v9.2d',
 'b10',
 'h10',
 's10',
 'd10',
 'q10',
+'v10',
+'v10.b',
+'v10.h',
+'v10.s',
+'v10.d',
+'v10.8b',
+'v10.16b',
+'v10.4h',
+'v10.8h',
+'v10.2s',
+'v10.4s',
+'v10.1d',
+'v10.2d',
 'b11',
 'h11',
 's11',
 'd11',
 'q11',
+'v11',
+'v11.b',
+'v11.h',
+'v11.s',
+'v11.d',
+'v11.8b',
+'v11.16b',
+'v11.4h',
+'v11.8h',
+'v11.2s',
+'v11.4s',
+'v11.1d',
+'v11.2d',
 'b12',
 'h12',
 's12',
 'd12',
 'q12',
+'v12',
+'v12.b',
+'v12.h',
+'v12.s',
+'v12.d',
+'v12.8b',
+'v12.16b',
+'v12.4h',
+'v12.8h',
+'v12.2s',
+'v12.4s',
+'v12.1d',
+'v12.2d',
 'b13',
 'h13',
 's13',
 'd13',
 'q13',
+'v13',
+'v13.b',
+'v13.h',
+'v13.s',
+'v13.d',
+'v13.8b',
+'v13.16b',
+'v13.4h',
+'v13.8h',
+'v13.2s',
+'v13.4s',
+'v13.1d',
+'v13.2d',
 'b14',
 'h14',
 's14',
 'd14',
 'q14',
+'v14',
+'v14.b',
+'v14.h',
+'v14.s',
+'v14.d',
+'v14.8b',
+'v14.16b',
+'v14.4h',
+'v14.8h',
+'v14.2s',
+'v14.4s',
+'v14.1d',
+'v14.2d',
 'b15',
 'h15',
 's15',
 'd15',
 'q15',
+'v15',
+'v15.b',
+'v15.h',
+'v15.s',
+'v15.d',
+'v15.8b',
+'v15.16b',
+'v15.4h',
+'v15.8h',
+'v15.2s',
+'v15.4s',
+'v15.1d',
+'v15.2d',
 'b16',
 'h16',
 's16',
 'd16',
 'q16',
+'v16',
+'v16.b',
+'v16.h',
+'v16.s',
+'v16.d',
+'v16.8b',
+'v16.16b',
+'v16.4h',
+'v16.8h',
+'v16.2s',
+'v16.4s',
+'v16.1d',
+'v16.2d',
 'b17',
 'h17',
 's17',
 'd17',
 'q17',
+'v17',
+'v17.b',
+'v17.h',
+'v17.s',
+'v17.d',
+'v17.8b',
+'v17.16b',
+'v17.4h',
+'v17.8h',
+'v17.2s',
+'v17.4s',
+'v17.1d',
+'v17.2d',
 'b18',
 'h18',
 's18',
 'd18',
 'q18',
+'v18',
+'v18.b',
+'v18.h',
+'v18.s',
+'v18.d',
+'v18.8b',
+'v18.16b',
+'v18.4h',
+'v18.8h',
+'v18.2s',
+'v18.4s',
+'v18.1d',
+'v18.2d',
 'b19',
 'h19',
 's19',
 'd19',
 'q19',
+'v19',
+'v19.b',
+'v19.h',
+'v19.s',
+'v19.d',
+'v19.8b',
+'v19.16b',
+'v19.4h',
+'v19.8h',
+'v19.2s',
+'v19.4s',
+'v19.1d',
+'v19.2d',
 'b20',
 'h20',
 's20',
 'd20',
 'q20',
+'v20',
+'v20.b',
+'v20.h',
+'v20.s',
+'v20.d',
+'v20.8b',
+'v20.16b',
+'v20.4h',
+'v20.8h',
+'v20.2s',
+'v20.4s',
+'v20.1d',
+'v20.2d',
 'b21',
 'h21',
 's21',
 'd21',
 'q21',
+'v21',
+'v21.b',
+'v21.h',
+'v21.s',
+'v21.d',
+'v21.8b',
+'v21.16b',
+'v21.4h',
+'v21.8h',
+'v21.2s',
+'v21.4s',
+'v21.1d',
+'v21.2d',
 'b22',
 'h22',
 's22',
 'd22',
 'q22',
+'v22',
+'v22.b',
+'v22.h',
+'v22.s',
+'v22.d',
+'v22.8b',
+'v22.16b',
+'v22.4h',
+'v22.8h',
+'v22.2s',
+'v22.4s',
+'v22.1d',
+'v22.2d',
 'b23',
 'h23',
 's23',
 'd23',
 'q23',
+'v23',
+'v23.b',
+'v23.h',
+'v23.s',
+'v23.d',
+'v23.8b',
+'v23.16b',
+'v23.4h',
+'v23.8h',
+'v23.2s',
+'v23.4s',
+'v23.1d',
+'v23.2d',
 'b24',
 'h24',
 's24',
 'd24',
 'q24',
+'v24',
+'v24.b',
+'v24.h',
+'v24.s',
+'v24.d',
+'v24.8b',
+'v24.16b',
+'v24.4h',
+'v24.8h',
+'v24.2s',
+'v24.4s',
+'v24.1d',
+'v24.2d',
 'b25',
 'h25',
 's25',
 'd25',
 'q25',
+'v25',
+'v25.b',
+'v25.h',
+'v25.s',
+'v25.d',
+'v25.8b',
+'v25.16b',
+'v25.4h',
+'v25.8h',
+'v25.2s',
+'v25.4s',
+'v25.1d',
+'v25.2d',
 'b26',
 'h26',
 's26',
 'd26',
 'q26',
+'v26',
+'v26.b',
+'v26.h',
+'v26.s',
+'v26.d',
+'v26.8b',
+'v26.16b',
+'v26.4h',
+'v26.8h',
+'v26.2s',
+'v26.4s',
+'v26.1d',
+'v26.2d',
 'b27',
 'h27',
 's27',
 'd27',
 'q27',
+'v27',
+'v27.b',
+'v27.h',
+'v27.s',
+'v27.d',
+'v27.8b',
+'v27.16b',
+'v27.4h',
+'v27.8h',
+'v27.2s',
+'v27.4s',
+'v27.1d',
+'v27.2d',
 'b28',
 'h28',
 's28',
 'd28',
 'q28',
+'v28',
+'v28.b',
+'v28.h',
+'v28.s',
+'v28.d',
+'v28.8b',
+'v28.16b',
+'v28.4h',
+'v28.8h',
+'v28.2s',
+'v28.4s',
+'v28.1d',
+'v28.2d',
 'b29',
 'h29',
 's29',
 'd29',
 'q29',
+'v29',
+'v29.b',
+'v29.h',
+'v29.s',
+'v29.d',
+'v29.8b',
+'v29.16b',
+'v29.4h',
+'v29.8h',
+'v29.2s',
+'v29.4s',
+'v29.1d',
+'v29.2d',
 'b30',
 'h30',
 's30',
 'd30',
 'q30',
+'v30',
+'v30.b',
+'v30.h',
+'v30.s',
+'v30.d',
+'v30.8b',
+'v30.16b',
+'v30.4h',
+'v30.8h',
+'v30.2s',
+'v30.4s',
+'v30.1d',
+'v30.2d',
 'b31',
 'h31',
 's31',
 'd31',
 'q31',
-'nzcv',
-'fpcr',
-'fpsr',
-'tpidr_el0'
+'v31',
+'v31.b',
+'v31.h',
+'v31.s',
+'v31.d',
+'v31.8b',
+'v31.16b',
+'v31.4h',
+'v31.8h',
+'v31.2s',
+'v31.4s',
+'v31.1d',
+'v31.2d'

+ 420 - 4
compiler/aarch64/ra64sup.inc

@@ -66,167 +66,583 @@ RS_WZR = $1F;
 RS_XZR = $1F;
 RS_WSP = $20;
 RS_SP = $20;
+RS_NZCV = $00;
+RS_FPCR = $01;
+RS_FPSR = $02;
+RS_TPIDR_EL0 = $03;
 RS_B0 = $00;
 RS_H0 = $00;
 RS_S0 = $00;
 RS_D0 = $00;
 RS_Q0 = $00;
+RS_V0 = $00;
+RS_V0_B = $00;
+RS_V0_H = $00;
+RS_V0_S = $00;
+RS_V0_D = $00;
+RS_V0_8B = $00;
+RS_V0_16B = $00;
+RS_V0_4H = $00;
+RS_V0_8H = $00;
+RS_V0_2S = $00;
+RS_V0_4S = $00;
+RS_V0_1D = $00;
+RS_V0_2D = $00;
 RS_B1 = $01;
 RS_H1 = $01;
 RS_S1 = $01;
 RS_D1 = $01;
 RS_Q1 = $01;
+RS_V1 = $01;
+RS_V1_B = $01;
+RS_V1_H = $01;
+RS_V1_S = $01;
+RS_V1_D = $01;
+RS_V1_8B = $01;
+RS_V1_16B = $01;
+RS_V1_4H = $01;
+RS_V1_8H = $01;
+RS_V1_2S = $01;
+RS_V1_4S = $01;
+RS_V1_1D = $01;
+RS_V1_2D = $01;
 RS_B2 = $02;
 RS_H2 = $02;
 RS_S2 = $02;
 RS_D2 = $02;
 RS_Q2 = $02;
+RS_V2 = $02;
+RS_V2_B = $02;
+RS_V2_H = $02;
+RS_V2_S = $02;
+RS_V2_D = $02;
+RS_V2_8B = $02;
+RS_V2_16B = $02;
+RS_V2_4H = $02;
+RS_V2_8H = $02;
+RS_V2_2S = $02;
+RS_V2_4S = $02;
+RS_V2_1D = $02;
+RS_V2_2D = $02;
 RS_B3 = $03;
 RS_H3 = $03;
 RS_S3 = $03;
 RS_D3 = $03;
 RS_Q3 = $03;
+RS_V3 = $03;
+RS_V3_B = $03;
+RS_V3_H = $03;
+RS_V3_S = $03;
+RS_V3_D = $03;
+RS_V3_8B = $03;
+RS_V3_16B = $03;
+RS_V3_4H = $03;
+RS_V3_8H = $03;
+RS_V3_2S = $03;
+RS_V3_4S = $03;
+RS_V3_1D = $03;
+RS_V3_2D = $03;
 RS_B4 = $04;
 RS_H4 = $04;
 RS_S4 = $04;
 RS_D4 = $04;
 RS_Q4 = $04;
+RS_V4 = $04;
+RS_V4_B = $04;
+RS_V4_H = $04;
+RS_V4_S = $04;
+RS_V4_D = $04;
+RS_V4_8B = $04;
+RS_V4_16B = $04;
+RS_V4_4H = $04;
+RS_V4_8H = $04;
+RS_V4_2S = $04;
+RS_V4_4S = $04;
+RS_V4_1D = $04;
+RS_V4_2D = $04;
 RS_B5 = $05;
 RS_H5 = $05;
 RS_S5 = $05;
 RS_D5 = $05;
 RS_Q5 = $05;
+RS_V5 = $05;
+RS_V5_B = $05;
+RS_V5_H = $05;
+RS_V5_S = $05;
+RS_V5_D = $05;
+RS_V5_8B = $05;
+RS_V5_16B = $05;
+RS_V5_4H = $05;
+RS_V5_8H = $05;
+RS_V5_2S = $05;
+RS_V5_4S = $05;
+RS_V5_1D = $05;
+RS_V5_2D = $05;
 RS_B6 = $06;
 RS_H6 = $06;
 RS_S6 = $06;
 RS_D6 = $06;
 RS_Q6 = $06;
+RS_V6 = $06;
+RS_V6_B = $06;
+RS_V6_H = $06;
+RS_V6_S = $06;
+RS_V6_D = $06;
+RS_V6_8B = $06;
+RS_V6_16B = $06;
+RS_V6_4H = $06;
+RS_V6_8H = $06;
+RS_V6_2S = $06;
+RS_V6_4S = $06;
+RS_V6_1D = $06;
+RS_V6_2D = $06;
 RS_B7 = $07;
 RS_H7 = $07;
 RS_S7 = $07;
 RS_D7 = $07;
 RS_Q7 = $07;
+RS_V7 = $07;
+RS_V7_B = $07;
+RS_V7_H = $07;
+RS_V7_S = $07;
+RS_V7_D = $07;
+RS_V7_8B = $07;
+RS_V7_16B = $07;
+RS_V7_4H = $07;
+RS_V7_8H = $07;
+RS_V7_2S = $07;
+RS_V7_4S = $07;
+RS_V7_1D = $07;
+RS_V7_2D = $07;
 RS_B8 = $08;
 RS_H8 = $08;
 RS_S8 = $08;
 RS_D8 = $08;
 RS_Q8 = $08;
+RS_V8 = $08;
+RS_V8_B = $08;
+RS_V8_H = $08;
+RS_V8_S = $08;
+RS_V8_D = $08;
+RS_V8_8B = $08;
+RS_V8_16B = $08;
+RS_V8_4H = $08;
+RS_V8_8H = $08;
+RS_V8_2S = $08;
+RS_V8_4S = $08;
+RS_V8_1D = $08;
+RS_V8_2D = $08;
 RS_B9 = $09;
 RS_H9 = $09;
 RS_S9 = $09;
 RS_D9 = $09;
 RS_Q9 = $09;
+RS_V9 = $09;
+RS_V9_B = $09;
+RS_V9_H = $09;
+RS_V9_S = $09;
+RS_V9_D = $09;
+RS_V9_8B = $09;
+RS_V9_16B = $09;
+RS_V9_4H = $09;
+RS_V9_8H = $09;
+RS_V9_2S = $09;
+RS_V9_4S = $09;
+RS_V9_1D = $09;
+RS_V9_2D = $09;
 RS_B10 = $0A;
 RS_H10 = $0A;
 RS_S10 = $0A;
 RS_D10 = $0A;
 RS_Q10 = $0A;
+RS_V10 = $0A;
+RS_V10_B = $0A;
+RS_V10_H = $0A;
+RS_V10_S = $0A;
+RS_V10_D = $0A;
+RS_V10_8B = $0A;
+RS_V10_16B = $0A;
+RS_V10_4H = $0A;
+RS_V10_8H = $0A;
+RS_V10_2S = $0A;
+RS_V10_4S = $0A;
+RS_V10_1D = $0A;
+RS_V10_2D = $0A;
 RS_B11 = $0B;
 RS_H11 = $0B;
 RS_S11 = $0B;
 RS_D11 = $0B;
 RS_Q11 = $0B;
+RS_V11 = $0B;
+RS_V11_B = $0B;
+RS_V11_H = $0B;
+RS_V11_S = $0B;
+RS_V11_D = $0B;
+RS_V11_8B = $0B;
+RS_V11_16B = $0B;
+RS_V11_4H = $0B;
+RS_V11_8H = $0B;
+RS_V11_2S = $0B;
+RS_V11_4S = $0B;
+RS_V11_1D = $0B;
+RS_V11_2D = $0B;
 RS_B12 = $0C;
 RS_H12 = $0C;
 RS_S12 = $0C;
 RS_D12 = $0C;
 RS_Q12 = $0C;
+RS_V12 = $0C;
+RS_V12_B = $0C;
+RS_V12_H = $0C;
+RS_V12_S = $0C;
+RS_V12_D = $0C;
+RS_V12_8B = $0C;
+RS_V12_16B = $0C;
+RS_V12_4H = $0C;
+RS_V12_8H = $0C;
+RS_V12_2S = $0C;
+RS_V12_4S = $0C;
+RS_V12_1D = $0C;
+RS_V12_2D = $0C;
 RS_B13 = $0D;
 RS_H13 = $0D;
 RS_S13 = $0D;
 RS_D13 = $0D;
 RS_Q13 = $0D;
+RS_V13 = $0D;
+RS_V13_B = $0D;
+RS_V13_H = $0D;
+RS_V13_S = $0D;
+RS_V13_D = $0D;
+RS_V13_8B = $0D;
+RS_V13_16B = $0D;
+RS_V13_4H = $0D;
+RS_V13_8H = $0D;
+RS_V13_2S = $0D;
+RS_V13_4S = $0D;
+RS_V13_1D = $0D;
+RS_V13_2D = $0D;
 RS_B14 = $0E;
 RS_H14 = $0E;
 RS_S14 = $0E;
 RS_D14 = $0E;
 RS_Q14 = $0E;
+RS_V14 = $0E;
+RS_V14_B = $0E;
+RS_V14_H = $0E;
+RS_V14_S = $0E;
+RS_V14_D = $0E;
+RS_V14_8B = $0E;
+RS_V14_16B = $0E;
+RS_V14_4H = $0E;
+RS_V14_8H = $0E;
+RS_V14_2S = $0E;
+RS_V14_4S = $0E;
+RS_V14_1D = $0E;
+RS_V14_2D = $0E;
 RS_B15 = $0F;
 RS_H15 = $0F;
 RS_S15 = $0F;
 RS_D15 = $0F;
 RS_Q15 = $0F;
+RS_V15 = $0F;
+RS_V15_B = $0F;
+RS_V15_H = $0F;
+RS_V15_S = $0F;
+RS_V15_D = $0F;
+RS_V15_8B = $0F;
+RS_V15_16B = $0F;
+RS_V15_4H = $0F;
+RS_V15_8H = $0F;
+RS_V15_2S = $0F;
+RS_V15_4S = $0F;
+RS_V15_1D = $0F;
+RS_V15_2D = $0F;
 RS_B16 = $10;
 RS_H16 = $10;
 RS_S16 = $10;
 RS_D16 = $10;
 RS_Q16 = $10;
+RS_V16 = $10;
+RS_V16_B = $10;
+RS_V16_H = $10;
+RS_V16_S = $10;
+RS_V16_D = $10;
+RS_V16_8B = $10;
+RS_V16_16B = $10;
+RS_V16_4H = $10;
+RS_V16_8H = $10;
+RS_V16_2S = $10;
+RS_V16_4S = $10;
+RS_V16_1D = $10;
+RS_V16_2D = $10;
 RS_B17 = $11;
 RS_H17 = $11;
 RS_S17 = $11;
 RS_D17 = $11;
 RS_Q17 = $11;
+RS_V17 = $11;
+RS_V17_B = $11;
+RS_V17_H = $11;
+RS_V17_S = $11;
+RS_V17_D = $11;
+RS_V17_8B = $11;
+RS_V17_16B = $11;
+RS_V17_4H = $11;
+RS_V17_8H = $11;
+RS_V17_2S = $11;
+RS_V17_4S = $11;
+RS_V17_1D = $11;
+RS_V17_2D = $11;
 RS_B18 = $12;
 RS_H18 = $12;
 RS_S18 = $12;
 RS_D18 = $12;
 RS_Q18 = $12;
+RS_V18 = $12;
+RS_V18_B = $12;
+RS_V18_H = $12;
+RS_V18_S = $12;
+RS_V18_D = $12;
+RS_V18_8B = $12;
+RS_V18_16B = $12;
+RS_V18_4H = $12;
+RS_V18_8H = $12;
+RS_V18_2S = $12;
+RS_V18_4S = $12;
+RS_V18_1D = $12;
+RS_V18_2D = $12;
 RS_B19 = $13;
 RS_H19 = $13;
 RS_S19 = $13;
 RS_D19 = $13;
 RS_Q19 = $13;
+RS_V19 = $13;
+RS_V19_B = $13;
+RS_V19_H = $13;
+RS_V19_S = $13;
+RS_V19_D = $13;
+RS_V19_8B = $13;
+RS_V19_16B = $13;
+RS_V19_4H = $13;
+RS_V19_8H = $13;
+RS_V19_2S = $13;
+RS_V19_4S = $13;
+RS_V19_1D = $13;
+RS_V19_2D = $13;
 RS_B20 = $14;
 RS_H20 = $14;
 RS_S20 = $14;
 RS_D20 = $14;
 RS_Q20 = $14;
+RS_V20 = $14;
+RS_V20_B = $14;
+RS_V20_H = $14;
+RS_V20_S = $14;
+RS_V20_D = $14;
+RS_V20_8B = $14;
+RS_V20_16B = $14;
+RS_V20_4H = $14;
+RS_V20_8H = $14;
+RS_V20_2S = $14;
+RS_V20_4S = $14;
+RS_V20_1D = $14;
+RS_V20_2D = $14;
 RS_B21 = $15;
 RS_H21 = $15;
 RS_S21 = $15;
 RS_D21 = $15;
 RS_Q21 = $15;
+RS_V21 = $15;
+RS_V21_B = $15;
+RS_V21_H = $15;
+RS_V21_S = $15;
+RS_V21_D = $15;
+RS_V21_8B = $15;
+RS_V21_16B = $15;
+RS_V21_4H = $15;
+RS_V21_8H = $15;
+RS_V21_2S = $15;
+RS_V21_4S = $15;
+RS_V21_1D = $15;
+RS_V21_2D = $15;
 RS_B22 = $16;
 RS_H22 = $16;
 RS_S22 = $16;
 RS_D22 = $16;
 RS_Q22 = $16;
+RS_V22 = $16;
+RS_V22_B = $16;
+RS_V22_H = $16;
+RS_V22_S = $16;
+RS_V22_D = $16;
+RS_V22_8B = $16;
+RS_V22_16B = $16;
+RS_V22_4H = $16;
+RS_V22_8H = $16;
+RS_V22_2S = $16;
+RS_V22_4S = $16;
+RS_V22_1D = $16;
+RS_V22_2D = $16;
 RS_B23 = $17;
 RS_H23 = $17;
 RS_S23 = $17;
 RS_D23 = $17;
 RS_Q23 = $17;
+RS_V23 = $17;
+RS_V23_B = $17;
+RS_V23_H = $17;
+RS_V23_S = $17;
+RS_V23_D = $17;
+RS_V23_8B = $17;
+RS_V23_16B = $17;
+RS_V23_4H = $17;
+RS_V23_8H = $17;
+RS_V23_2S = $17;
+RS_V23_4S = $17;
+RS_V23_1D = $17;
+RS_V23_2D = $17;
 RS_B24 = $18;
 RS_H24 = $18;
 RS_S24 = $18;
 RS_D24 = $18;
 RS_Q24 = $18;
+RS_V24 = $18;
+RS_V24_B = $18;
+RS_V24_H = $18;
+RS_V24_S = $18;
+RS_V24_D = $18;
+RS_V24_8B = $18;
+RS_V24_16B = $18;
+RS_V24_4H = $18;
+RS_V24_8H = $18;
+RS_V24_2S = $18;
+RS_V24_4S = $18;
+RS_V24_1D = $18;
+RS_V24_2D = $18;
 RS_B25 = $19;
 RS_H25 = $19;
 RS_S25 = $19;
 RS_D25 = $19;
 RS_Q25 = $19;
+RS_V25 = $19;
+RS_V25_B = $19;
+RS_V25_H = $19;
+RS_V25_S = $19;
+RS_V25_D = $19;
+RS_V25_8B = $19;
+RS_V25_16B = $19;
+RS_V25_4H = $19;
+RS_V25_8H = $19;
+RS_V25_2S = $19;
+RS_V25_4S = $19;
+RS_V25_1D = $19;
+RS_V25_2D = $19;
 RS_B26 = $1A;
 RS_H26 = $1A;
 RS_S26 = $1A;
 RS_D26 = $1A;
 RS_Q26 = $1A;
+RS_V26 = $1A;
+RS_V26_B = $1A;
+RS_V26_H = $1A;
+RS_V26_S = $1A;
+RS_V26_D = $1A;
+RS_V26_8B = $1A;
+RS_V26_16B = $1A;
+RS_V26_4H = $1A;
+RS_V26_8H = $1A;
+RS_V26_2S = $1A;
+RS_V26_4S = $1A;
+RS_V26_1D = $1A;
+RS_V26_2D = $1A;
 RS_B27 = $1B;
 RS_H27 = $1B;
 RS_S27 = $1B;
 RS_D27 = $1B;
 RS_Q27 = $1B;
+RS_V27 = $1B;
+RS_V27_B = $1B;
+RS_V27_H = $1B;
+RS_V27_S = $1B;
+RS_V27_D = $1B;
+RS_V27_8B = $1B;
+RS_V27_16B = $1B;
+RS_V27_4H = $1B;
+RS_V27_8H = $1B;
+RS_V27_2S = $1B;
+RS_V27_4S = $1B;
+RS_V27_1D = $1B;
+RS_V27_2D = $1B;
 RS_B28 = $1C;
 RS_H28 = $1C;
 RS_S28 = $1C;
 RS_D28 = $1C;
 RS_Q28 = $1C;
+RS_V28 = $1C;
+RS_V28_B = $1C;
+RS_V28_H = $1C;
+RS_V28_S = $1C;
+RS_V28_D = $1C;
+RS_V28_8B = $1C;
+RS_V28_16B = $1C;
+RS_V28_4H = $1C;
+RS_V28_8H = $1C;
+RS_V28_2S = $1C;
+RS_V28_4S = $1C;
+RS_V28_1D = $1C;
+RS_V28_2D = $1C;
 RS_B29 = $1D;
 RS_H29 = $1D;
 RS_S29 = $1D;
 RS_D29 = $1D;
 RS_Q29 = $1D;
+RS_V29 = $1D;
+RS_V29_B = $1D;
+RS_V29_H = $1D;
+RS_V29_S = $1D;
+RS_V29_D = $1D;
+RS_V29_8B = $1D;
+RS_V29_16B = $1D;
+RS_V29_4H = $1D;
+RS_V29_8H = $1D;
+RS_V29_2S = $1D;
+RS_V29_4S = $1D;
+RS_V29_1D = $1D;
+RS_V29_2D = $1D;
 RS_B30 = $1E;
 RS_H30 = $1E;
 RS_S30 = $1E;
 RS_D30 = $1E;
 RS_Q30 = $1E;
+RS_V30 = $1E;
+RS_V30_B = $1E;
+RS_V30_H = $1E;
+RS_V30_S = $1E;
+RS_V30_D = $1E;
+RS_V30_8B = $1E;
+RS_V30_16B = $1E;
+RS_V30_4H = $1E;
+RS_V30_8H = $1E;
+RS_V30_2S = $1E;
+RS_V30_4S = $1E;
+RS_V30_1D = $1E;
+RS_V30_2D = $1E;
 RS_B31 = $1F;
 RS_H31 = $1F;
 RS_S31 = $1F;
 RS_D31 = $1F;
 RS_Q31 = $1F;
-RS_NZCV = $00;
-RS_FPCR = $01;
-RS_FPSR = $02;
-RS_TPIDR_EL0 = $03;
+RS_V31 = $1F;
+RS_V31_B = $1F;
+RS_V31_H = $1F;
+RS_V31_S = $1F;
+RS_V31_D = $1F;
+RS_V31_8B = $1F;
+RS_V31_16B = $1F;
+RS_V31_4H = $1F;
+RS_V31_8H = $1F;
+RS_V31_2S = $1F;
+RS_V31_4S = $1F;
+RS_V31_1D = $1F;
+RS_V31_2D = $1F;

+ 12 - 3
compiler/aarch64/racpu.pas

@@ -67,9 +67,18 @@ unit racpu;
       begin
         if ops<1 then
           internalerror(2014122001);
-        if operands[1].opr.typ<>OPR_REGISTER then
-          internalerror(2014122002);
-        result:=reg_cgsize(operands[1].opr.reg);
+        if (ops=1) and (operands[1].opr.typ=OPR_REFERENCE) then
+          exit(OS_NO);
+        case operands[1].opr.typ of
+          OPR_REGISTER:
+            result:=reg_cgsize(operands[1].opr.reg);
+          OPR_INDEXEDREG:
+            result:=reg_cgsize(operands[1].opr.indexedreg);
+          OPR_REGSET:
+            result:=OS_NO;
+          else
+           internalerror(2014122002);
+        end;
         { a 32 bit integer register could actually be 16 or 8 bit }
         if result=OS_32 then
           case oppostfix of

+ 412 - 19
compiler/aarch64/racpugas.pas

@@ -28,14 +28,23 @@ Unit racpugas;
 
     uses
       raatt,racpu,
-      cpubase;
+      aasmtai,
+      cgbase,cpubase;
 
     type
+
+      { taarch64attreader }
+
       taarch64attreader = class(tattreader)
         actoppostfix : TOpPostfix;
+        actinsmmsubreg : TSubRegister;
+        actsehdirective : TAsmSehDirective;
         function is_asmopcode(const s: string):boolean;override;
         function is_register(const s:string):boolean;override;
+        function is_targetdirective(const s: string): boolean;override;
         procedure handleopcode;override;
+        procedure handletargetdirective; override;
+       protected
         procedure BuildReference(oper: taarch64operand; is64bit: boolean);
         procedure BuildOperand(oper: taarch64operand; is64bit: boolean);
         function TryBuildShifterOp(instr: taarch64instruction; opnr: longint) : boolean;
@@ -43,6 +52,8 @@ Unit racpugas;
         procedure ReadSym(oper: taarch64operand; is64bit: boolean);
         procedure ConvertCalljmp(instr: taarch64instruction);
         function ToConditionCode(const hs: string; is_operand: boolean): tasmcond;
+        function ParseArrangementSpecifier(const hs: string): TSubRegister;
+        function ParseRegIndex(const hs: string): byte;
       end;
 
 
@@ -53,12 +64,12 @@ Unit racpugas;
       cutils,
       { global }
       globtype,verbose,
-      systems,aasmbase,aasmtai,aasmdata,aasmcpu,
+      systems,aasmbase,aasmdata,aasmcpu,
       { symtable }
       symconst,symsym,symdef,
       procinfo,
       rabase,rautils,
-      cgbase,cgutils,paramgr;
+      cgutils,paramgr;
 
 
     function taarch64attreader.is_register(const s:string):boolean;
@@ -69,9 +80,10 @@ Unit racpugas;
         end;
 
       const
-        extraregs : array[0..3] of treg2str = (
+        extraregs : array[0..4] of treg2str = (
           (name: 'FP' ; reg: NR_FP),
           (name: 'LR' ; reg: NR_LR),
+          (name: 'XR' ; reg: NR_XR),
           (name: 'IP0'; reg: NR_IP0),
           (name: 'IP1'; reg: NR_IP1));
 
@@ -81,9 +93,9 @@ Unit racpugas;
       begin
         result:=inherited is_register(s);
         { reg found?
-          possible aliases are always 2 or 3 chars
+          possible aliases are always 2 chars
         }
-        if result or not(length(s) in [2,3]) then
+        if result or not(length(s) in [2]) then
           exit;
         for i:=low(extraregs) to high(extraregs) do
           begin
@@ -98,6 +110,46 @@ Unit racpugas;
       end;
 
 
+    const
+      { Aarch64 subset of SEH directives. .seh_proc, .seh_endproc and .seh_endepilogue
+        excluded because they are generated automatically when needed. }
+      recognized_directives: set of TAsmSehDirective=[
+        ash_endprologue,ash_handler,ash_handlerdata,
+        ash_stackalloc,ash_nop,ash_savefplr,ash_savefplr_x,
+        ash_savereg,ash_savereg_x,ash_saveregp,ash_saveregp_x,
+        ash_savefreg,ash_savefreg_x,ash_savefregp,ash_savefregp_x,
+        ash_setfp,ash_addfp
+      ];
+
+
+    function taarch64attreader.is_targetdirective(const s: string): boolean;
+      var
+        i: TAsmSehDirective;
+      begin
+        result:=false;
+        if target_info.system<>system_aarch64_win64 then
+          exit;
+
+        for i:=low(TAsmSehDirective) to high(TAsmSehDirective) do
+          begin
+            if not (i in recognized_directives) then
+              continue;
+            if s=sehdirectivestr[i] then
+              begin
+                actsehdirective:=i;
+                result:=true;
+                break;
+              end;
+          end;
+        { allow SEH directives only in pure assember routines }
+        if result and not (po_assembler in current_procinfo.procdef.procoptions) then
+          begin
+            Message(asmr_e_seh_in_pure_asm_only);
+            result:=false;
+          end;
+      end;
+
+
     procedure taarch64attreader.ReadSym(oper: taarch64operand; is64bit: boolean);
       var
          tempstr, mangledname : string;
@@ -461,7 +513,7 @@ Unit racpugas;
 
       const
         shiftmode2str: array[SM_LSL..SM_SXTX] of string[4] =
-          ('LSL','LSR','ASR',
+          ('LSL','LSR','ASR','ROR',
            'UXTB','UXTH','UXTW','UXTX',
            'SXTB','SXTH','SXTW','SXTX');
       var
@@ -526,7 +578,51 @@ Unit racpugas;
           else
             ;
         end;
-        result:=C_None;;
+        result:=C_None;
+      end;
+
+
+    function taarch64attreader.ParseArrangementSpecifier(const hs: string): TSubRegister;
+{$push}{$j-}
+      const
+        arrangements: array[R_SUBMM8B..R_SUBMM2D] of string[4] =
+          ('.8B','.16B','.4H','.8H','.2S','.4S','.1D','.2D');
+{$pop}
+      begin
+        if length(hs)>2 then
+          begin
+            for result:=low(arrangements) to high(arrangements) do
+              if hs=arrangements[result] then
+                exit;
+            result:=R_SUBNONE;
+          end
+        else
+          case hs of
+            '.B': result:=R_SUBMMB1;
+            '.H': result:=R_SUBMMH1;
+            '.S': result:=R_SUBMMS1;
+            '.D': result:=R_SUBMMD1;
+            else
+              result:=R_SUBNONE;
+          end
+      end;
+
+
+    function taarch64attreader.ParseRegIndex(const hs: string): byte;
+      var
+        b: cardinal;
+        error: longint;
+      begin
+        b:=0;
+        val(hs,b,error);
+        if (error<>0) then
+          Message(asmr_e_syn_constant)
+        else if b > 31 then
+          begin
+            Message(asmr_e_constant_out_of_bounds);
+            b:=0;
+          end;
+        result:=b;
       end;
 
 
@@ -565,7 +661,8 @@ Unit racpugas;
                oper.opr.symbol:=hl;
              end
             else if (actopcode=A_ADR) or
-               (actopcode=A_ADRP) then
+               (actopcode=A_ADRP) or
+               (actopcode=A_LDR) then
               begin
                 oper.InitRef;
                 MaybeAddGotAddrMode;
@@ -675,11 +772,50 @@ Unit racpugas;
             end; { end case }
           end;
 
+      function parsereg: tregister;
+         var
+           subreg: tsubregister;
+        begin
+          result:=actasmregister;
+          Consume(AS_REGISTER);
+          if (actasmtoken=AS_ID) and
+             (actasmpattern[1]='.') then
+            begin
+              subreg:=ParseArrangementSpecifier(upper(actasmpattern));
+              if (subreg<>R_SUBNONE) and
+                 (getregtype(result)=R_MMREGISTER) and
+                 ((actinsmmsubreg=R_SUBNONE) or
+                  (actinsmmsubreg=subreg)) then
+                begin
+                  setsubreg(result,subreg);
+                  { they all have to be the same }
+                  actinsmmsubreg:=subreg;
+                end
+              else
+                Message1(asmr_e_invalid_arrangement,actasmpattern);
+              Consume(AS_ID);
+            end
+          else if (getregtype(result)=R_MMREGISTER) then
+            begin
+              if actinsmmsubreg<>R_SUBNONE then
+                begin
+                  if (getsubreg(result)=R_SUBNONE) or
+                     (getsubreg(result)=actinsmmsubreg) then
+                    setsubreg(result,actinsmmsubreg)
+                  else
+                     Message1(asmr_e_invalid_arrangement,actasmpattern);
+                end
+              else if getsubreg(result)=R_SUBNONE then
+                { Vxx without an arrangement is invalid, use Qxx to specify the entire 128 bits}
+                Message1(asmr_e_invalid_arrangement,'');
+            end;
+        end;
 
       var
         tempreg: tregister;
         hl: tasmlabel;
         icond: tasmcond;
+        regindex: byte;
       Begin
         expr:='';
         case actasmtoken of
@@ -689,6 +825,35 @@ Unit racpugas;
               BuildReference(oper,is64bit);
             end;
 
+          AS_LSBRACKET: { register set }
+            begin
+              consume(AS_LSBRACKET);
+              oper.opr.typ:=OPR_REGSET;
+              oper.opr.basereg:=parsereg;
+              oper.opr.nregs:=1;
+              while (oper.opr.nregs<4) and
+                    (actasmtoken=AS_COMMA) do
+                begin
+                  consume(AS_COMMA);
+                  tempreg:=parsereg;
+                  if getsupreg(tempreg)<>((getsupreg(oper.opr.basereg)+oper.opr.nregs) mod 32) then
+                    Message(asmr_e_a64_invalid_regset);
+                  inc(oper.opr.nregs);
+                end;
+              consume(AS_RSBRACKET);
+              if actasmtoken=AS_LBRACKET then
+                begin
+                  consume(AS_LBRACKET);
+                  oper.opr.regsetindex:=ParseRegIndex(actasmpattern);
+                  consume(AS_INTNUM);
+                  consume(AS_RBRACKET);
+                end
+              else
+                oper.opr.regsetindex:=255;
+              if not(actasmtoken in [AS_END,AS_SEPARATOR,AS_COMMA]) then
+                Message(asmr_e_syn_operand);
+            end;
+
           AS_HASH: { Constant expression  }
             Begin
               Consume(AS_HASH);
@@ -790,8 +955,11 @@ Unit racpugas;
                           else
                             Message1(sym_e_unknown_id,expr);
                         end
-                       else
-                         MaybeAddGotAddrMode;
+                       else if oper.opr.typ<>OPR_LOCAL then
+                         begin
+                           oper.InitRef;
+                           MaybeAddGotAddrMode;
+                         end;
                      end;
                   end;
                   if actasmtoken=AS_DOT then
@@ -808,7 +976,7 @@ Unit racpugas;
                        OPR_REFERENCE :
                          inc(oper.opr.ref.offset,l);
                        else
-                         internalerror(200309202);
+                         internalerror(2003092005);
                      end;
                    end
                end;
@@ -821,14 +989,31 @@ Unit racpugas;
           AS_REGISTER:
             Begin
               { save the type of register used. }
-              tempreg:=actasmregister;
-              Consume(AS_REGISTER);
-              if (actasmtoken in [AS_end,AS_SEPARATOR,AS_COMMA]) then
-                Begin
-                  if not (oper.opr.typ in [OPR_NONE,OPR_REGISTER]) then
+              tempreg:=parsereg;
+              regindex:=255;
+              if (getregtype(tempreg)=R_MMREGISTER) and
+                 (actasmtoken=AS_LBRACKET) then
+                begin
+                  consume(AS_LBRACKET);
+                  regindex:=ParseRegIndex(actasmpattern);
+                  consume(AS_INTNUM);
+                  consume(AS_RBRACKET);
+                end;
+              if actasmtoken in [AS_END,AS_SEPARATOR,AS_COMMA] then
+                begin
+                  if (oper.opr.typ<>OPR_NONE) then
                     Message(asmr_e_invalid_operand_type);
-                  oper.opr.typ:=OPR_REGISTER;
-                  oper.opr.reg:=tempreg;
+                  if regindex=255 then
+                    begin
+                      oper.opr.typ:=OPR_REGISTER;
+                      oper.opr.reg:=tempreg;
+                    end
+                  else
+                    begin
+                      oper.opr.typ:=OPR_INDEXEDREG;
+                      oper.opr.indexedreg:=tempreg;
+                      oper.opr.regindex:=regindex;
+                    end;
                 end
               else
                 Message(asmr_e_syn_operand);
@@ -934,6 +1119,13 @@ Unit racpugas;
           PF_B,PF_H,PF_W,
           PF_S);
 
+                      { store replicate }
+        ldst14: array[boolean,boolean,'1'..'4'] of tasmop =
+          (((A_LD1,A_LD2,A_LD3,A_LD4),
+            (A_LD1R,A_LD2R,A_LD3R,A_LD4R)),
+           ((A_ST1,A_ST2,A_ST3,A_ST4),
+            (A_NONE,A_NONE,A_NONE,A_NONE)));
+
       var
         j  : longint;
         hs : string;
@@ -960,6 +1152,29 @@ Unit racpugas;
             exit;
           end;
 
+        (* ldN(r)/stN.size ? (shorthand for "ldN(r)/stN { Vx.size, Vy.size } ..."
+          supported by clang and possibly gas *)
+        actinsmmsubreg:=R_SUBNONE;
+        if (length(s)>=5) and
+           (((hs[1]='L') and
+             (hs[2]='D')) or
+            ((hs[1]='S') and
+             (hs[2]='T'))) and
+           (hs[3] in ['1'..'4']) and
+           ((hs[4]='.') or
+            ((hs[4]='R') and
+             (hs[5]='.'))) then
+          begin
+            actinsmmsubreg:=ParseArrangementSpecifier(copy(hs,4+ord(hs[4]='R'),255));
+            if actinsmmsubreg=R_SUBNONE then
+              exit;
+            actopcode:=ldst14[hs[1]='S',hs[4]='R',hs[3]];
+            actasmtoken:=AS_OPCODE;
+            if actopcode<>A_NONE then
+              is_asmopcode:=true;
+            exit;
+          end;
+
         maxlen:=max(length(hs),7);
         actopcode:=A_NONE;
         for j:=maxlen downto 1 do
@@ -1031,6 +1246,184 @@ Unit racpugas;
       end;
 
 
+    procedure taarch64attreader.handletargetdirective;
+
+      function maxoffset(ash:TAsmSehDirective):aint;
+        begin
+          case ash of
+            ash_savefplr,
+            ash_saveregp,
+            ash_savereg,
+            ash_savefregp,
+            ash_savefreg:
+              result:=504;
+            ash_savefplr_x,
+            ash_saveregp_x,
+            ash_savefregp_x:
+              result:=-512;
+            ash_savereg_x,
+            ash_savefreg_x:
+              result:=-256;
+            ash_addfp:
+              result:=2040;
+            else
+              internalerror(2020041204);
+          end;
+        end;
+
+      procedure add_reg_with_offset(ash:TAsmSehDirective;hreg:tregister;hnum:aint;neg:boolean);
+        begin
+          if (neg and ((hnum>0) or (hnum<maxoffset(ash)) or (((-hnum) and $7)<>0))) or
+              (not neg and ((hnum<0) or (hnum>maxoffset(ash)) or ((hnum and $7)<>0))) then
+            Message1(asmr_e_bad_seh_directive_offset,sehdirectivestr[actsehdirective])
+          else
+            begin
+              if neg then
+                hnum:=-hnum;
+              if hreg=NR_NO then
+                curlist.concat(cai_seh_directive.create_offset(actsehdirective,hnum))
+              else
+                curlist.concat(cai_seh_directive.create_reg_offset(actsehdirective,hreg,hnum));
+            end;
+        end;
+
+      var
+        hreg,
+        hreg2 : TRegister;
+        hnum : aint;
+        flags : integer;
+        ai : tai_seh_directive;
+        hs : string;
+        err : boolean;
+      begin
+        if actasmtoken<>AS_TARGET_DIRECTIVE then
+          InternalError(2020033102);
+        Consume(AS_TARGET_DIRECTIVE);
+        Include(current_procinfo.flags,pi_has_unwind_info);
+
+        case actsehdirective of
+          ash_nop,
+          ash_setfp,
+          ash_endprologue,
+          ash_handlerdata:
+            curlist.concat(cai_seh_directive.create(actsehdirective));
+
+          ash_handler:
+            begin
+              hs:=actasmpattern;
+              Consume(AS_ID);
+              flags:=0;
+              err:=false;
+              while actasmtoken=AS_COMMA do
+                begin
+                  Consume(AS_COMMA);
+                  if actasmtoken=AS_AT then
+                    begin
+                      Consume(AS_AT);
+                      if actasmtoken=AS_ID then
+                        begin
+                          uppervar(actasmpattern);
+                          if actasmpattern='EXCEPT' then
+                            flags:=flags or 1
+                          else if actasmpattern='UNWIND' then
+                            flags:=flags or 2
+                          else
+                            err:=true;
+                          Consume(AS_ID);
+                        end
+                      else
+                        err:=true;
+                    end
+                  else
+                    err:=true;
+                  if err then
+                    begin
+                      Message(asmr_e_syntax_error);
+                      RecoverConsume(false);
+                      exit;
+                    end;
+                end;
+
+              ai:=cai_seh_directive.create_name(ash_handler,hs);
+              ai.data.flags:=flags;
+              curlist.concat(ai);
+            end;
+          ash_savefplr,
+          ash_savefplr_x:
+            begin
+              hnum:=BuildConstExpression(false,false);
+              add_reg_with_offset(actsehdirective,NR_NO,hnum,actsehdirective=ash_savefplr_x);
+            end;
+          ash_savereg,
+          ash_savereg_x:
+            begin
+              hreg:=actasmregister;
+              Consume(AS_REGISTER);
+              if (getregtype(hreg)<>R_INTREGISTER) or (getsubreg(hreg)<>R_SUBWHOLE) or (getsupreg(hreg)<19) then
+                Message1(asmr_e_bad_seh_directive_register,sehdirectivestr[actsehdirective]);
+              Consume(AS_COMMA);
+              hnum:=BuildConstExpression(false,false);
+              add_reg_with_offset(actsehdirective,hreg,hnum,actsehdirective=ash_savereg_x);
+            end;
+          ash_saveregp,
+          ash_saveregp_x:
+            begin
+              hreg:=actasmregister;
+              consume(AS_REGISTER);
+              if (getregtype(hreg)<>R_INTREGISTER) or (getsubreg(hreg)<>R_SUBWHOLE) or (getsupreg(hreg)<19) then
+                Message1(asmr_e_bad_seh_directive_register,sehdirectivestr[actsehdirective]);
+              consume(AS_COMMA);
+              hreg2:=actasmregister;
+              consume(AS_REGISTER);
+              if (getregtype(hreg2)<>R_INTREGISTER) or (getsubreg(hreg2)<>R_SUBWHOLE) or (getsupreg(hreg2)<>getsupreg(hreg)+1) then
+                Message1(asmr_e_bad_seh_directive_register,sehdirectivestr[actsehdirective]);
+              consume(AS_COMMA);
+              hnum:=BuildConstExpression(false,false);
+              add_reg_with_offset(actsehdirective,hreg,hnum,actsehdirective=ash_saveregp_x);
+            end;
+          ash_savefreg,
+          ash_savefreg_x:
+            begin
+              hreg:=actasmregister;
+              Consume(AS_REGISTER);
+              if (getregtype(hreg)<>R_MMREGISTER) or (getsubreg(hreg)<>R_SUBWHOLE) or (getsupreg(hreg)<8) then
+                Message1(asmr_e_bad_seh_directive_register,sehdirectivestr[actsehdirective]);
+              Consume(AS_COMMA);
+              hnum:=BuildConstExpression(false,false);
+              add_reg_with_offset(actsehdirective,hreg,hnum,actsehdirective=ash_savefreg_x);
+            end;
+          ash_savefregp,
+          ash_savefregp_x:
+            begin
+              hreg:=actasmregister;
+              consume(AS_REGISTER);
+              if (getregtype(hreg)<>R_MMREGISTER) or (getsubreg(hreg)<>R_SUBWHOLE) or (getsupreg(hreg)<8) then
+                Message1(asmr_e_bad_seh_directive_register,sehdirectivestr[actsehdirective]);
+              consume(AS_COMMA);
+              hreg2:=actasmregister;
+              consume(AS_REGISTER);
+              if (getregtype(hreg2)<>R_MMREGISTER) or (getsubreg(hreg2)<>R_SUBWHOLE) or (getsupreg(hreg2)<>getsupreg(hreg)+1) then
+                Message1(asmr_e_bad_seh_directive_register,sehdirectivestr[actsehdirective]);
+              consume(AS_COMMA);
+              hnum:=BuildConstExpression(false,false);
+              add_reg_with_offset(actsehdirective,hreg,hnum,actsehdirective=ash_savefregp_x);
+            end;
+          ash_stackalloc:
+            begin
+              hnum:=BuildConstExpression(false,false);
+              if (hnum<0) or (hnum>$FFFFFF) or ((hnum and 7)<>0) then
+                Message1(asmr_e_bad_seh_directive_offset,sehdirectivestr[ash_stackalloc])
+              else
+                curlist.concat(cai_seh_directive.create_offset(ash_stackalloc,hnum));
+            end;
+          else
+            InternalError(2020033103);
+        end;
+        if actasmtoken<>AS_SEPARATOR then
+          Consume(AS_SEPARATOR);
+      end;
+
+
 {*****************************************************************************
                                      Initialize
 *****************************************************************************}

+ 10 - 0
compiler/aarch64/rgcpu.pas

@@ -36,6 +36,7 @@ unit rgcpu;
       trgcpu=class(trgobj)
         procedure do_spill_read(list: TAsmList; pos: tai; const spilltemp: treference; tempreg: tregister; orgsupreg: tsuperregister); override;
         procedure do_spill_written(list: TAsmList; pos: tai; const spilltemp: treference; tempreg: tregister; orgsupreg: tsuperregister); override;
+        function get_spill_subreg(r: tregister): tsubregister; override;
        protected
         procedure do_spill_op(list: tasmlist; op: tasmop; pos: tai; const spilltemp: treference; tempreg: tregister; orgsupreg: tsuperregister);
       end;
@@ -51,6 +52,15 @@ implementation
       verbose,cutils,
       cgobj;
 
+    function  trgcpu.get_spill_subreg(r:tregister) : tsubregister;
+      begin
+        if (getregtype(r)<>R_MMREGISTER) then
+          result:=defaultsub
+        else
+          result:=getsubreg(r);
+      end;
+
+
     procedure trgcpu.do_spill_read(list: TAsmList; pos: tai; const spilltemp: treference; tempreg: tregister; orgsupreg: tsuperregister);
       begin
         do_spill_op(list,A_LDR,pos,spilltemp,tempreg,orgsupreg);

+ 48 - 0
compiler/aarch64/tripletcpu.pas

@@ -0,0 +1,48 @@
+{
+    Copyright (c) 2020 by Jonas Maebe
+
+    Construct the cpu part of the triplet
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit tripletcpu;
+
+{$i fpcdefs.inc}
+
+interface
+
+uses
+  globtype;
+
+function tripletcpustr(tripletstyle: ttripletstyle): ansistring;
+
+implementation
+
+uses
+  globals, systems, cpuinfo;
+
+function tripletcpustr(tripletstyle: ttripletstyle): ansistring;
+  begin
+    if target_info.system in systems_darwin then
+      result:='arm64'
+    else
+      result:='aarch64'
+  end;
+
+
+end.
+

+ 31 - 8
compiler/aasmbase.pas

@@ -170,13 +170,18 @@ interface
          { initial heap segment for 16-bit DOS }
          sec_heap,
          { dwarf based/gcc style exception handling }
-         sec_gcc_except_table
+         sec_gcc_except_table,
+         sec_arm_attribute
        );
 
        TObjCAsmSectionType = sec_objc_class..sec_objc_protolist;
 
        TAsmSectionOrder = (secorder_begin,secorder_default,secorder_end);
 
+       TSectionFlag = (SF_A,SF_W,SF_X);
+       TSectionFlags = set of TSectionFlag;
+       TSectionProgbits = (SPB_None,SPB_PROGBITS,SPB_NOBITS,SPB_NOTE,SPB_ARM_ATTRIBUTES);
+
        TAsmSymbol = class(TFPHashObject)
        private
          { this need to be incremented with every symbol loading into the
@@ -224,6 +229,7 @@ interface
          labeltype : TAsmLabelType;
          is_set    : boolean;
          is_public : boolean;
+         defined_in_asmstatement : boolean;
          constructor Createlocal(AList: TFPHashObjectList; nr: longint; ltyp: TAsmLabelType);
          constructor Createstatic(AList: TFPHashObjectList; nr: longint; ltyp: TAsmLabelType);
          constructor Createglobal(AList: TFPHashObjectList; const modulename: TSymStr; nr: longint; ltyp: TAsmLabelType);
@@ -234,7 +240,7 @@ interface
     function create_smartlink_library:boolean;inline;
     function create_smartlink:boolean;inline;
 
-    function ReplaceForbiddenAsmSymbolChars(const s: ansistring): ansistring;
+    function ApplyAsmSymbolRestrictions(const s: ansistring): ansistring;
 
     { dummy default noop callback }
     procedure default_global_used;
@@ -251,7 +257,7 @@ interface
 implementation
 
     uses
-      verbose;
+      verbose,fpccrc;
 
 
     function create_smartlink_sections:boolean;inline;
@@ -282,16 +288,33 @@ implementation
       end;
 
 
-    function ReplaceForbiddenAsmSymbolChars(const s: ansistring): ansistring;
+    function ApplyAsmSymbolRestrictions(const s: ansistring): ansistring;
       var
         i : longint;
-        rchar: char;
+        rchar, ochar: char;
+        crc: Cardinal;
+        charstoremove: integer;
       begin
         Result:=s;
         rchar:=target_asm.dollarsign;
-        for i:=1 to Length(Result) do
-          if Result[i]='$' then
-            Result[i]:=rchar;
+        if target_asm.id=as_i386_wasm then
+          ochar:='.'
+        else
+          ochar:='$';
+        if (ochar<>rchar) then
+          for  i:=1 to Length(Result) do
+            if Result[i]=ochar then
+              Result[i]:=rchar;
+        if (target_asm.labelmaxlen<>-1) and (Length(Result)>target_asm.labelmaxlen) then
+          begin
+            crc:=0;
+            crc:=UpdateCrc32(crc,Result[1],Length(Result));
+            charstoremove:=Length(Result)-target_asm.labelmaxlen+13;
+            Delete(Result,(Length(Result)-charstoremove) div 2,charstoremove);
+            Result:='_'+target_asm.dollarsign+'CRC'+hexstr(crc,8)+Result;
+            if Length(Result)>target_asm.labelmaxlen then
+              Internalerror(2020042502);
+          end;
       end;
 
 

+ 48 - 28
compiler/aasmcnst.pas

@@ -155,7 +155,7 @@ type
     private
      fnextfieldname: TIDString;
      function getcuroffset: asizeint;
-     procedure setnextfieldname(AValue: TIDString);
+     procedure setnextfieldname(const AValue: TIDString);
     protected
      { type of the aggregate }
      fdef: tdef;
@@ -217,7 +217,7 @@ type
     private
      function getcurragginfo: taggregateinformation;
      procedure set_next_field(AValue: tfieldvarsym);
-     procedure set_next_field_name(AValue: TIDString);
+     procedure set_next_field_name(const AValue: TIDString);
     protected
      { temporary list in which all data is collected }
      fasmlist: tasmlist;
@@ -295,6 +295,8 @@ type
      { get a label in the middle of an internal data section (no dead
        stripping) }
      function get_internal_data_section_internal_label: tasmlabel; virtual;
+     { adds a new entry to current_module.linkorderedsymbols }
+     procedure add_link_ordered_symbol(sym: tasmsymbol; const secname: TSymStr); virtual;
 
      { easy access to the top level aggregate information instance }
      property curagginfo: taggregateinformation read getcurragginfo;
@@ -336,11 +338,13 @@ type
      procedure insert_marked_aggregate_alignment(def: tdef); virtual; abstract;
      class function get_vectorized_dead_strip_section_symbol(const basename: string; st: tsymtable; options: ttcdeadstripsectionsymboloptions; start: boolean): tasmsymbol; virtual;
     public
-     class function get_vectorized_dead_strip_custom_section_name(const basename: TSymStr; st: tsymtable; out secname: TSymStr): boolean; virtual;
+     class function get_vectorized_dead_strip_custom_section_name(const basename: TSymStr; st: tsymtable; options: ttcasmlistoptions; out secname: TSymStr): boolean; virtual;
      { get the start/end symbol for a dead stripable vectorized section, such
        as the resourcestring data of a unit }
      class function get_vectorized_dead_strip_section_symbol_start(const basename: string; st: tsymtable; options: ttcdeadstripsectionsymboloptions): tasmsymbol; virtual;
      class function get_vectorized_dead_strip_section_symbol_end(const basename: string; st: tsymtable; options: ttcdeadstripsectionsymboloptions): tasmsymbol; virtual;
+     { returns true if smartlinking of the dead stripable vectorized lists is supported }
+     class function is_smartlink_vectorized_dead_strip: boolean; virtual;
 
      class function get_dynstring_rec_name(typ: tstringtype; winlike: boolean; len: asizeint): TSymStr;
      class function get_dynstring_rec(typ: tstringtype; winlike: boolean; len: asizeint): trecorddef;
@@ -384,7 +388,7 @@ type
         b) the def of the record should be automatically constructed based on
            the types of the emitted fields
 
-        packrecords: same as "pacrecords x"
+        packrecords: same as "packrecords x"
         recordalign: specify the (minimum) alignment of the start of the record
           (no equivalent in source code), used as an alternative for explicit
           align statements. Use "1" if it should be calculated based on the
@@ -512,7 +516,7 @@ implementation
      cutils,
      verbose,globals,systems,widestr,
      fmodule,
-     symtable,defutil;
+     symtable,symutil,defutil;
 
 {****************************************************************************
                        taggregateinformation
@@ -534,7 +538,7 @@ implementation
       end;
 
 
-    procedure taggregateinformation.setnextfieldname(AValue: TIDString);
+    procedure taggregateinformation.setnextfieldname(const AValue: TIDString);
       begin
         if (fnextfieldname<>'') or
            not anonrecord then
@@ -589,8 +593,7 @@ implementation
             repeat
               inc(i);
               sym:=tsym(tabstractrecorddef(def).symtable.symlist[i]);
-            until (sym.typ=fieldvarsym) and
-              not(sp_static in sym.symoptions);
+            until is_normal_fieldvarsym(sym);
             curfield:=tfieldvarsym(sym);
             nextoffset:=curfield.fieldoffset;
             curindex:=i;
@@ -828,8 +831,6 @@ implementation
 
 
    destructor tai_aggregatetypedconst.destroy;
-     var
-       ai: tai_abstracttypedconst;
      begin
        fvalues.free;
        inherited destroy;
@@ -861,7 +862,7 @@ implementation
      end;
 
 
-    procedure ttai_typedconstbuilder.set_next_field_name(AValue: TIDString);
+    procedure ttai_typedconstbuilder.set_next_field_name(const AValue: TIDString);
       var
         info: taggregateinformation;
       begin
@@ -921,6 +922,12 @@ implementation
      end;
 
 
+   procedure ttai_typedconstbuilder.add_link_ordered_symbol(sym: tasmsymbol; const secname: TSymStr);
+     begin
+       current_module.linkorderedsymbols.concat(sym.Name);
+     end;
+
+
    function ttai_typedconstbuilder.aggregate_kind(def: tdef): ttypedconstkind;
      begin
        if (def.typ in [recorddef,filedef,variantdef]) or
@@ -980,14 +987,7 @@ implementation
            new_section(prelist,section,secname,alignment);
          end
        else if tcalo_new_section in options then
-         begin
-           { insert ait_cutobject for smart-linking on targets
-             that do not support smarlinking based on sections,
-             like msdos }
-           if not (tf_smartlink_sections in target_info.flags) then
-             maybe_new_object_file(prelist);
-           new_section(prelist,section,secname,alignment);
-         end
+         new_section(prelist,section,secname,alignment)
        else
          prelist.concat(cai_align.Create(alignment));
 
@@ -1046,7 +1046,7 @@ implementation
              indsecname:=secname
            else
              indsecname:=lower(symind.name);
-           indtcb:=ctai_typedconstbuilder.create([tcalo_new_section]);
+           indtcb:=ctai_typedconstbuilder.create([tcalo_new_section,tcalo_make_dead_strippable]);
            indtcb.emit_tai(tai_const.create_sym_offset(sym,0),ptrdef);
            current_asmdata.asmlists[al_indirectglobals].concatlist(indtcb.get_final_asmlist(
              symind,
@@ -1072,7 +1072,7 @@ implementation
      begin
        fvectorized_finalize_called:=true;
        sym:=nil;
-       customsecname:=get_vectorized_dead_strip_custom_section_name(basename,st,secname);
+       customsecname:=get_vectorized_dead_strip_custom_section_name(basename,st,options,secname);
        if customsecname then
          sectype:=sec_user
        else
@@ -1113,7 +1113,17 @@ implementation
              secname:=make_mangledname(basename,st,'2_'+itemname);
            exclude(options,tcalo_vectorized_dead_strip_item);
          end;
-       current_module.linkorderedsymbols.concat(sym.Name);
+       add_link_ordered_symbol(sym,secname);
+       if is_smartlink_vectorized_dead_strip then
+         options:=options+[tcalo_new_section,tcalo_make_dead_strippable]
+       else
+         begin
+           { if smartlinking of vectorized lists is not supported,
+             put the whole list into a single section. }
+           options:=options-[tcalo_new_section,tcalo_make_dead_strippable];
+           if tcalo_vectorized_dead_strip_start in options then
+             include(options,tcalo_new_section);
+         end;
        finalize_asmlist(sym,def,sectype,secname,alignment,options);
      end;
 
@@ -1151,7 +1161,9 @@ implementation
    class function ttai_typedconstbuilder.get_string_symofs(typ: tstringtype; winlikewidestring: boolean): pint;
      begin
        { darwin's linker does not support negative offsets }
-       if not(target_info.system in systems_darwin) then
+       if not(target_info.system in systems_darwin) and
+          { it seems that clang's assembler has a bug with the ADRP instruction... }
+          (target_info.system<>system_aarch64_win64) then
          result:=0
        else
          result:=get_string_header_size(typ,winlikewidestring);
@@ -1161,7 +1173,9 @@ implementation
    class function ttai_typedconstbuilder.get_dynarray_symofs:pint;
      begin
        { darwin's linker does not support negative offsets }
-       if not (target_info.system in systems_darwin) then
+       if not (target_info.system in systems_darwin) and
+          { it seems that clang's assembler has a bug with the ADRP instruction... }
+          (target_info.system<>system_aarch64_win64) then
          result:=0
        else
          result:=get_dynarray_header_size;
@@ -1298,7 +1312,7 @@ implementation
        else if (assigned(finternal_data_asmlist) and
            (list<>finternal_data_asmlist)) or
            not assigned(list) then
-         internalerror(2015032101);
+         internalerror(2015032102);
        finternal_data_asmlist:=list;
        if not assigned(l) then
          l:=get_internal_data_section_internal_label;
@@ -1546,7 +1560,7 @@ implementation
      end;
 
 
-   class function ttai_typedconstbuilder.get_vectorized_dead_strip_custom_section_name(const basename: TSymStr; st: tsymtable; out secname: TSymStr): boolean;
+   class function ttai_typedconstbuilder.get_vectorized_dead_strip_custom_section_name(const basename: TSymStr; st: tsymtable; options: ttcasmlistoptions; out secname: TSymStr): boolean;
      begin
        result:=false;
      end;
@@ -1564,6 +1578,12 @@ implementation
      end;
 
 
+   class function ttai_typedconstbuilder.is_smartlink_vectorized_dead_strip: boolean;
+     begin
+       result:=tf_smartlink_sections in target_info.flags;
+     end;
+
+
    class function ttai_typedconstbuilder.get_dynstring_rec_name(typ: tstringtype; winlike: boolean; len: asizeint): TSymStr;
      begin
        case typ of
@@ -1843,7 +1863,7 @@ implementation
 
    procedure ttai_typedconstbuilder.emit_procdef_const(pd: tprocdef);
      begin
-       emit_tai(Tai_const.Createname(pd.mangledname,AT_FUNCTION,0),cprocvardef.getreusableprocaddr(pd));
+       emit_tai(Tai_const.Createname(pd.mangledname,AT_FUNCTION,0),cprocvardef.getreusableprocaddr(pd,pc_address_only));
      end;
 
 
@@ -2086,7 +2106,7 @@ implementation
          begin
            sym:=search_struct_member_no_helper(tabstractrecorddef(curdef),fields[i]);
            if not assigned(sym) or
-              (sym.typ<>fieldvarsym) or
+              not is_normal_fieldvarsym(sym) or
               ((i<>high(fields)) and
                not(tfieldvarsym(sym).vardef.typ in [objectdef,recorddef])) then
              internalerror(2015071505);

+ 1 - 1
compiler/aasmdef.pas

@@ -56,7 +56,7 @@ function TAsmDataDef.DefineAsmSymbolByClass(symclass: TAsmSymbolClass; const s:
     result:=DefineAsmSymbolByClassBase(symclass,s,_bind,_typ,def,wasdefined);
     { define the indirect asmsymbol if necessary }
     if not wasdefined and
-       (_bind in [AB_GLOBAL,AB_COMMON]) and
+       (_bind in [AB_GLOBAL,AB_COMMON,AB_PRIVATE_EXTERN]) and
        (_typ<>AT_DATA_NOINDIRECT) and
        (((_typ=AT_DATA) and
          (tf_supports_packages in target_info.flags) and

+ 130 - 27
compiler/aasmtai.pas

@@ -94,7 +94,8 @@ interface
           { SEH directives used in ARM,MIPS and x86_64 COFF targets }
           ait_seh_directive,
           { Dwarf CFI directive }
-          ait_cfi
+          ait_cfi,
+          ait_eabi_attribute
           );
 
         taiconst_type = (
@@ -151,10 +152,13 @@ interface
           aitconst_got,
           { offset of symbol itself from GOT }
           aitconst_gotoff_symbol,
+          { offset in TLS block }
+          aitconst_dtpoff,
           { ARM TLS code }
           aitconst_gottpoff,
-          aitconst_tpoff
-
+          aitconst_tpoff,
+          aitconst_tlsgd,
+          aitconst_tlsdesc
         );
 
         tairealconsttype = (
@@ -230,7 +234,8 @@ interface
           'llvmmetadatarefop',
 {$endif}
           'cfi',
-          'seh_directive'
+          'seh_directive',
+          'eabi_attribute'
           );
 
     type
@@ -238,15 +243,18 @@ interface
       toptype=(top_none,top_reg,top_ref,top_const,top_bool,top_local
 {$ifdef arm}
        { ARM only }
-       ,top_regset
        ,top_modeflags
        ,top_specialreg
 {$endif arm}
 {$if defined(arm) or defined(aarch64)}
+       ,top_regset
        ,top_conditioncode
        ,top_shifterop
        ,top_realconst
 {$endif defined(arm) or defined(aarch64)}
+{$ifdef aarch64}
+       ,top_indexedreg
+{$endif}
 {$ifdef m68k}
        { m68k only }
        ,top_regset
@@ -334,7 +342,8 @@ interface
                      ait_llvmmetadatarefoperand,
 {$endif llvm}
                      ait_seh_directive,
-                     ait_cfi
+                     ait_cfi,
+                     ait_eabi_attribute
                     ];
 
 
@@ -392,7 +401,10 @@ interface
           ash_endprologue,ash_handler,ash_handlerdata,
           ash_eh,ash_32,ash_no32,
           ash_setframe,ash_stackalloc,ash_pushreg,
-          ash_savereg,ash_savexmm,ash_pushframe,
+          ash_savereg,ash_savereg_x,ash_saveregp,ash_saveregp_x,
+          ash_savexmm,ash_savefreg,ash_savefreg_x,ash_savefregp,ash_savefregp_x,ash_pushframe,
+          ash_setfp,ash_addfp,ash_savefplr,ash_savefplr_x,
+          ash_nop,
           ash_pushnv,ash_savenv
         );
 
@@ -433,7 +445,10 @@ interface
         '.seh_endprologue','.seh_handler','.seh_handlerdata',
         '.seh_eh','.seh_32','seh_no32',
         '.seh_setframe','.seh_stackalloc','.seh_pushreg',
-        '.seh_savereg','.seh_savexmm','.seh_pushframe',
+        '.seh_savereg','.seh_savereg_x','.seh_saveregp','.seh_saveregp_x',
+        '.seh_savexmm','.seh_savefreg','.seh_savefreg_x','.seh_savefregp','.seh_savefregp_x','.seh_pushframe',
+        '.seh_setfp','.seh_addfp','.seh_savefplr','.seh_savefplr_x',
+        '.seh_nop',
         '.pushnv','.savenv'
       );
       symbolpairkindstr: array[TSymbolPairKind] of string[11]=(
@@ -467,6 +482,10 @@ interface
             top_conditioncode : (cc : TAsmCond);
             top_realconst : (val_real:bestreal);
         {$endif defined(arm) or defined(aarch64)}
+        {$ifdef aarch64}
+            top_regset : (basereg: tregister; nregs, regsetindex: byte);
+            top_indexedreg : (indexedreg: tregister; regindex: byte);
+        {$endif}
         {$ifdef m68k}
             top_regset : (dataregset,addrregset,fpuregset: tcpuregisterset);
             top_regpair : (reghi,reglo: tregister);
@@ -605,10 +624,6 @@ interface
           function getcopy:tlinkedlistitem;override;
        end;
 
-       type
-         TSectionFlags = (SF_None,SF_A,SF_W,SF_X);
-         TSectionProgbits = (SPB_None,SPB_PROGBITS,SPB_NOBITS);
-
        { Generates a section / segment directive }
        tai_section = class(tai)
           sectype  : TAsmSectiontype;
@@ -637,9 +652,9 @@ interface
           is_global : boolean;
           sym       : tasmsymbol;
           size      : asizeint;
-          constructor Create(const _name : string;_size : asizeint; def: tdef);
-          constructor Create_hidden(const _name : string;_size : asizeint; def: tdef);
-          constructor Create_global(const _name : string;_size : asizeint; def: tdef);
+          constructor Create(const _name: string; _size: asizeint; def: tdef; _typ: Tasmsymtype);
+          constructor Create_hidden(const _name: string; _size: asizeint; def: tdef; _typ: Tasmsymtype);
+          constructor Create_global(const _name: string; _size: asizeint; def: tdef; _typ: Tasmsymtype);
           constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
           procedure ppuwrite(ppufile:tcompilerppufile);override;
           procedure derefimpl;override;
@@ -694,6 +709,7 @@ interface
           constructor Create_rel_sym_offset(_typ : taiconst_type; _sym,_endsym : tasmsymbol; _ofs : int64);
           constructor Create_rva_sym(_sym:tasmsymbol);
           constructor Createname(const name:string;ofs:asizeint);
+          constructor Createname_rel(const name, endname: string);
           constructor Createname(const name:string;_symtyp:Tasmsymtype;ofs:asizeint);
           constructor Create_type_name(_typ:taiconst_type;const name:string;ofs:asizeint);
           constructor Create_type_name(_typ:taiconst_type;const name:string;_symtyp:Tasmsymtype;ofs:asizeint);
@@ -980,6 +996,18 @@ interface
           procedure ppuwrite(ppufile:tcompilerppufile);override;
         end;
 
+        teattrtyp = (eattrtype_none,eattrtype_dword,eattrtype_ntbs);
+        tai_eabi_attribute = class(tai)
+          eattr_typ : teattrtyp;
+          tag,value : dword;
+          valuestr : pstring;
+          constructor create(atag,avalue : dword);
+          constructor create(atag : dword;const avalue : string);
+          destructor destroy;override;
+          constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
+          procedure ppuwrite(ppufile:tcompilerppufile);override;
+        end;
+
     var
       { array with all class types for tais }
       aiclass : taiclassarray;
@@ -1007,7 +1035,8 @@ implementation
 {$endif x86}
       SysUtils,
       verbose,
-      globals;
+      globals,
+      ppu;
 
     const
       pputaimarker = 254;
@@ -1093,7 +1122,7 @@ implementation
     constructor tai_symbolpair.ppuload(t: taitype; ppufile: tcompilerppufile);
       begin
         inherited ppuload(t,ppufile);
-        kind:=TSymbolPairKind(ppufile.getbyte);;
+        kind:=TSymbolPairKind(ppufile.getbyte);
         sym:=ppufile.getpshortstring;
         value:=ppufile.getpshortstring;
       end;
@@ -1246,6 +1275,7 @@ implementation
         sectype:=asectype;
         secalign:=Aalign;
         secorder:=Asecorder;
+        TObjData.sectiontype2progbitsandflags(sectype,secprogbits,secflags);
         name:=stringdup(Aname);
         sec:=nil;
       end;
@@ -1257,7 +1287,7 @@ implementation
         sectype:=TAsmSectiontype(ppufile.getbyte);
         secalign:=ppufile.getlongint;
         name:=ppufile.getpshortstring;
-        secflags:=TSectionFlags(ppufile.getbyte);
+        ppufile.getset(tppuset1(secflags));
         secprogbits:=TSectionProgbits(ppufile.getbyte);
         sec:=nil;
       end;
@@ -1275,7 +1305,7 @@ implementation
         ppufile.putbyte(byte(sectype));
         ppufile.putlongint(secalign);
         ppufile.putstring(name^);
-        ppufile.putbyte(byte(secflags));
+        ppufile.putset(tppuset1(secflags));
         ppufile.putbyte(byte(secprogbits));
       end;
 
@@ -1284,12 +1314,12 @@ implementation
                              TAI_DATABLOCK
  ****************************************************************************}
 
-    constructor tai_datablock.Create(const _name : string;_size : asizeint; def: tdef);
+    constructor tai_datablock.Create(const _name : string;_size : asizeint; def: tdef; _typ:Tasmsymtype);
 
       begin
          inherited Create;
          typ:=ait_datablock;
-         sym:=current_asmdata.DefineAsmSymbol(_name,AB_LOCAL,AT_DATA,def);
+         sym:=current_asmdata.DefineAsmSymbol(_name,AB_LOCAL,_typ,def);
          { keep things aligned }
          if _size<=0 then
            _size:=sizeof(aint);
@@ -1297,13 +1327,13 @@ implementation
          is_global:=false;
       end;
 
-    constructor tai_datablock.Create_hidden(const _name: string; _size: asizeint; def: tdef);
+    constructor tai_datablock.Create_hidden(const _name: string; _size: asizeint; def: tdef; _typ:Tasmsymtype);
       begin
         if tf_supports_hidden_symbols in target_info.flags then
           begin
             inherited Create;
             typ:=ait_datablock;
-            sym:=current_asmdata.DefineAsmSymbol(_name,AB_PRIVATE_EXTERN,AT_DATA,def);
+            sym:=current_asmdata.DefineAsmSymbol(_name,AB_PRIVATE_EXTERN,_typ,def);
             { keep things aligned }
             if _size<=0 then
               _size:=sizeof(aint);
@@ -1311,15 +1341,15 @@ implementation
             is_global:=true;
           end
         else
-          Create(_name,_size,def);
+          Create(_name,_size,def,_typ);
       end;
 
 
-    constructor tai_datablock.Create_global(const _name : string;_size : asizeint; def: tdef);
+    constructor tai_datablock.Create_global(const _name : string;_size : asizeint; def: tdef; _typ:Tasmsymtype);
       begin
          inherited Create;
          typ:=ait_datablock;
-         sym:=current_asmdata.DefineAsmSymbol(_name,AB_GLOBAL,AT_DATA,def);
+         sym:=current_asmdata.DefineAsmSymbol(_name,AB_GLOBAL,_typ,def);
          { keep things aligned }
          if _size<=0 then
            _size:=sizeof(aint);
@@ -1855,6 +1885,13 @@ implementation
       end;
 
 
+    constructor tai_const.Createname_rel(const name,endname:string);
+      begin
+         self.create_sym_offset(current_asmdata.RefAsmSymbol(name,AT_NONE),0);
+         endsym:=current_asmdata.RefAsmSymbol(endname,AT_NONE)
+      end;
+
+
     constructor tai_const.Create_type_name(_typ:taiconst_type;const name:string;ofs:asizeint);
       begin
          self.Create_type_name(_typ,name,AT_NONE,ofs);
@@ -2056,7 +2093,7 @@ implementation
             result:=8;
           aitconst_secrel32_symbol,
           aitconst_rva_symbol :
-            if target_info.system=system_x86_64_win64 then
+            if target_info.system in systems_peoptplus then
               result:=sizeof(longint)
             else
               result:=sizeof(pint);
@@ -2099,6 +2136,16 @@ implementation
             result:=sizeof(pint);
           aitconst_gotoff_symbol:
             result:=4;
+          aitconst_gottpoff:
+            result:=4;
+          aitconst_tlsgd:
+            result:=4;
+          aitconst_tpoff:
+            result:=4;
+          aitconst_tlsdesc:
+            result:=4;
+          aitconst_dtpoff:
+            result:=4;
           else
             internalerror(200603253);
         end;
@@ -3308,8 +3355,20 @@ implementation
         sd_offset,     { stackalloc }
         sd_reg,        { pushreg }
         sd_regoffset,  { savereg }
+        sd_regoffset,  { savereg_x }
+        sd_regoffset,  { saveregp }
+        sd_regoffset,  { saveregp_x }
         sd_regoffset,  { savexmm }
+        sd_regoffset,  { savefreg }
+        sd_regoffset,  { savefreg_x }
+        sd_regoffset,  { savefregp }
+        sd_regoffset,  { savefregp_x }
         sd_none,       { pushframe }
+        sd_none,       { setfp }
+        sd_none,       { addfp }
+        sd_offset,     { savefplr }
+        sd_offset,     { savefplr_x }
+        sd_none,       { nop }
         sd_reg,        { pushnv }
         sd_none        { savenv }
       );
@@ -3400,6 +3459,50 @@ implementation
       begin
       end;
 
+
+{****************************************************************************
+                              tai_eabi_attribute
+ ****************************************************************************}
+
+    constructor tai_eabi_attribute.create(atag,avalue : dword);
+      begin
+        inherited Create;
+        typ:=ait_eabi_attribute;
+        eattr_typ:=eattrtype_dword;
+        tag:=atag;
+        value:=avalue;
+      end;
+
+
+    constructor tai_eabi_attribute.create(atag: dword; const avalue: string);
+      begin
+        inherited Create;
+        typ:=ait_eabi_attribute;
+        eattr_typ:=eattrtype_ntbs;
+        tag:=atag;
+        valuestr:=NewStr(avalue);
+      end;
+
+
+    destructor tai_eabi_attribute.destroy;
+      begin
+        Inherited Destroy;
+      end;
+
+
+    constructor tai_eabi_attribute.ppuload(t:taitype;ppufile:tcompilerppufile);
+      begin
+      end;
+
+
+    procedure tai_eabi_attribute.ppuwrite(ppufile:tcompilerppufile);
+      begin
+        inherited ppuwrite(ppufile);
+        ppufile.putdword(tag);
+        ppufile.putdword(value);
+      end;
+
+
 {$ifdef JVM}
 
 {****************************************************************************

+ 181 - 112
compiler/aggas.pas

@@ -49,8 +49,9 @@ interface
         function sectionattrs(atype:TAsmSectiontype):string;virtual;
         function sectionattrs_coff(atype:TAsmSectiontype):string;virtual;
         function sectionalignment_aix(atype:TAsmSectiontype;secalign: longint):string;
+        function sectionflags(secflags:TSectionFlags):string;virtual;
         procedure WriteSection(atype:TAsmSectiontype;const aname:string;aorder:TAsmSectionOrder;secalign:longint;
-          secflags:TSectionFlags=SF_None;secprogbits:TSectionProgbits=SPB_None);virtual;
+          secflags:TSectionFlags=[];secprogbits:TSectionProgbits=SPB_None);virtual;
         procedure WriteExtraHeader;virtual;
         procedure WriteExtraFooter;virtual;
         procedure WriteInstruction(hp: tai);
@@ -155,10 +156,12 @@ implementation
 
       { Generic unaligned pseudo-instructions, seems ELF specific }
       use_ua_elf_systems = [system_mipsel_linux,system_mipseb_linux,system_mipsel_android,system_mipsel_embedded,system_mipseb_embedded];
-      ait_ua_elf_const2str : array[aitconst_16bit_unaligned..aitconst_64bit_unaligned]
-        of string[20]=(
-          #9'.2byte'#9,#9'.4byte'#9,#9'.8byte'#9
-        );
+      ait_ua_elf_const2str : array[aitconst_128bit..aitconst_64bit_unaligned] of string[20]=(
+        #9'.fixme128'#9,#9'.8byte'#9,#9'.4byte'#9,#9'.2byte'#9,#9'.byte'#9,
+        #9'.sleb128'#9,#9'.uleb128'#9,
+        #9'.rva'#9,#9'.secrel32'#9,#9'.8byte'#9,#9'.4byte'#9,#9'.2byte'#9,#9'.2byte'#9,
+        #9'.2byte'#9,#9'.4byte'#9,#9'.8byte'#9
+      );
 
 
 
@@ -199,7 +202,7 @@ implementation
            (atype<>sec_toc) and
            (atype<>sec_user) and
            { on embedded systems every byte counts, so smartlink bss too }
-           ((atype<>sec_bss) or (target_info.system in systems_embedded));
+           ((atype<>sec_bss) or (target_info.system in (systems_embedded+systems_freertos)));
       end;
 
     function TGNUAssembler.sectionname(atype:TAsmSectiontype;const aname:string;aorder:TAsmSectionOrder):string;
@@ -216,9 +219,9 @@ implementation
 { TODO: .data.ro not yet working}
 {$if defined(arm) or defined(riscv64) or defined(powerpc)}
           '.rodata',
-{$else arm}
+{$else defined(arm) or defined(riscv64) or defined(powerpc)}
           '.data',
-{$endif arm}
+{$endif defined(arm) or defined(riscv64) or defined(powerpc)}
           '.rodata',
           '.bss',
           '.threadvar',
@@ -273,7 +276,8 @@ implementation
           '.objc_protolist',
           '.stack',
           '.heap',
-          '.gcc_except_table'
+          '.gcc_except_table',
+          '.ARM.attributes'
         );
         secnames_pic : array[TAsmSectiontype] of string[length('__DATA, __datacoal_nt,coalesced')] = ('','',
           '.text',
@@ -333,7 +337,8 @@ implementation
           '.objc_protolist',
           '.stack',
           '.heap',
-          '.gcc_except_table'
+          '.gcc_except_table',
+          '..ARM.attributes'
         );
       var
         sep     : string[3];
@@ -407,7 +412,7 @@ implementation
     function TGNUAssembler.sectionattrs(atype:TAsmSectiontype):string;
       begin
         result:='';
-        if (target_info.system in [system_i386_win32,system_x86_64_win64]) then
+        if (target_info.system in [system_i386_win32,system_x86_64_win64,system_aarch64_win64]) then
           begin
             result:=sectionattrs_coff(atype);
           end;
@@ -429,7 +434,10 @@ implementation
 
           { TODO: these need a fix to become read-only }
           sec_rodata, sec_rodata_norel:
-            result:='d';
+            if target_info.system=system_aarch64_win64 then
+              result:='r'
+            else
+              result:='d';
 
           sec_bss:
             result:='b';
@@ -452,6 +460,24 @@ implementation
       end;
 
 
+    function TGNUAssembler.sectionflags(secflags:TSectionFlags):string;
+      var
+        secflag : TSectionFlag;
+      begin
+        result:='';
+        for secflag in secflags do begin
+          case secflag of
+            SF_A:
+              result:=result+'a';
+            SF_W:
+              result:=result+'w';
+            SF_X:
+              result:=result+'x';
+          end;
+        end;
+      end;
+
+
     function TGNUAssembler.sectionalignment_aix(atype:TAsmSectiontype;secalign: longint): string;
       var
         l: longint;
@@ -468,11 +494,15 @@ implementation
       end;
 
 
-    procedure TGNUAssembler.WriteSection(atype:TAsmSectiontype;const aname:string;aorder:TAsmSectionOrder;secalign:longint;secflags:TSectionFlags=SF_None;secprogbits:TSectionProgbits=SPB_None);
+    procedure TGNUAssembler.WriteSection(atype:TAsmSectiontype;const aname:string;aorder:TAsmSectionOrder;secalign:longint;secflags:TSectionFlags=[];secprogbits:TSectionProgbits=SPB_None);
       var
         s : string;
+        usesectionprogbits,
+        usesectionflags: boolean;
       begin
         writer.AsmLn;
+        usesectionflags:=false;
+        usesectionprogbits:=false;
         case target_info.system of
          system_i386_OS2,
          system_i386_EMX: ;
@@ -481,14 +511,37 @@ implementation
            begin
              { ... but vasm is GAS compatible on amiga/atari, and supports named sections }
              if create_smartlink_sections then
-               writer.AsmWrite('.section ');
+               begin
+                 writer.AsmWrite('.section ');
+                 usesectionflags:=true;
+                 usesectionprogbits:=true;
+                 { hack, to avoid linker warnings on Amiga/Atari, when vlink merges
+                   rodata sections into data sections. Also avoid the warning when
+                   the linker realizes the code section cannot be write protected and
+                   adds the writable bit. }
+                 if atype in [sec_code,sec_rodata,sec_rodata_norel] then
+                   include(secflags,SF_W);
+               end;
+           end;
+         system_i386_go32v2,
+         system_i386_win32,
+         system_x86_64_win64,
+         system_i386_wince,
+         system_arm_wince,
+         system_aarch64_win64:
+           begin
+             { according to the GNU AS guide AS for COFF does not support the
+               progbits }
+             writer.AsmWrite('.section ');
+             usesectionflags:=true;
            end;
          system_powerpc_darwin,
          system_i386_darwin,
          system_i386_iphonesim,
          system_powerpc64_darwin,
          system_x86_64_darwin,
-         system_arm_darwin,
+         system_arm_ios,
+         system_aarch64_ios,
          system_aarch64_darwin,
          system_x86_64_iphonesim,
          system_powerpc_aix,
@@ -498,31 +551,44 @@ implementation
                writer.AsmWrite('.section ');
            end
          else
-          writer.AsmWrite('.section ');
+           begin
+             writer.AsmWrite('.section ');
+             { sectionname may rename those sections, so we do not write flags/progbits for them,
+               the assembler will ignore them/spite out a warning anyways }
+             if not(atype in [sec_data,sec_rodata,sec_rodata_norel]) then
+               begin
+                 usesectionflags:=true;
+                 usesectionprogbits:=true;
+               end;
+           end
         end;
         s:=sectionname(atype,aname,aorder);
         writer.AsmWrite(s);
         { flags explicitly defined? }
-        if (secflags<>SF_None) or (secprogbits<>SPB_None) then
+        if (usesectionflags or usesectionprogbits) and
+           ((secflags<>[]) or
+            (secprogbits<>SPB_None)) then
           begin
-            case secflags of
-              SF_A:
-                writer.AsmWrite(',"a"');
-              SF_W:
-                writer.AsmWrite(',"w"');
-              SF_X:
-                writer.AsmWrite(',"x"');
-              SF_None:
-                writer.AsmWrite(',""');
-            end;
-            case secprogbits of
-              SPB_PROGBITS:
-                writer.AsmWrite(',%progbits');
-              SPB_NOBITS:
-                writer.AsmWrite(',%nobits');
-              SPB_None:
-                ;
-            end;
+            if usesectionflags then
+              begin
+                s:=',"'+sectionflags(secflags);
+                writer.AsmWrite(s+'"');
+              end;
+            if usesectionprogbits then
+              begin
+                case secprogbits of
+                  SPB_PROGBITS:
+                    writer.AsmWrite(',%progbits');
+                  SPB_NOBITS:
+                    writer.AsmWrite(',%nobits');
+                  SPB_NOTE:
+                    writer.AsmWrite(',%note');
+                  SPB_None:
+                    ;
+                  else
+                    InternalError(2019100801);
+                end;
+              end;
           end
         else
           case atype of
@@ -544,7 +610,7 @@ implementation
                   system_i386_darwin,
                   system_i386_iphonesim:
                     writer.AsmWriteln('__IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5');
-                  system_arm_darwin:
+                  system_arm_ios:
                     if (cs_create_pic in current_settings.moduleswitches) then
                       writer.AsmWriteln('__TEXT,__picsymbolstub4,symbol_stubs,none,16')
                     else
@@ -771,38 +837,6 @@ implementation
 
          case hp.typ of
 
-           ait_comment :
-             Begin
-               writer.AsmWrite(asminfo^.comment);
-               writer.AsmWritePChar(tai_comment(hp).str);
-               writer.AsmLn;
-             End;
-
-           ait_regalloc :
-             begin
-               if (cs_asm_regalloc in current_settings.globalswitches) then
-                 begin
-                   writer.AsmWrite(#9+asminfo^.comment+'Register ');
-                   repeat
-                     writer.AsmWrite(std_regname(Tai_regalloc(hp).reg));
-                     if (hp.next=nil) or
-                        (tai(hp.next).typ<>ait_regalloc) or
-                        (tai_regalloc(hp.next).ratype<>tai_regalloc(hp).ratype) then
-                       break;
-                     hp:=tai(hp.next);
-                     writer.AsmWrite(',');
-                   until false;
-                   writer.AsmWrite(' ');
-                   writer.AsmWriteLn(regallocstr[tai_regalloc(hp).ratype]);
-                 end;
-             end;
-
-           ait_tempalloc :
-             begin
-               if (cs_asm_tempalloc in current_settings.globalswitches) then
-                 WriteTempalloc(tai_tempalloc(hp));
-             end;
-
            ait_align :
              begin
                doalign(tai_align_abstract(hp).aligntype,tai_align_abstract(hp).use_op,tai_align_abstract(hp).fillop,tai_align_abstract(hp).maxbytes,last_align,lasthp);
@@ -812,7 +846,7 @@ implementation
              begin
                if tai_section(hp).sectype<>sec_none then
                  if replaceforbidden then
-                   WriteSection(tai_section(hp).sectype,ReplaceForbiddenAsmSymbolChars(tai_section(hp).name^),tai_section(hp).secorder,
+                   WriteSection(tai_section(hp).sectype,ApplyAsmSymbolRestrictions(tai_section(hp).name^),tai_section(hp).secorder,
                      tai_section(hp).secalign,tai_section(hp).secflags,tai_section(hp).secprogbits)
                  else
                    WriteSection(tai_section(hp).sectype,tai_section(hp).name^,tai_section(hp).secorder,
@@ -864,8 +898,8 @@ implementation
                    if tai_datablock(hp).is_global then
                      begin
                        writer.AsmWrite(#9'.globl ');
-                       writer.AsmWriteln(ReplaceForbiddenAsmSymbolChars(tai_datablock(hp).sym.name));
-                       writer.AsmWrite(ReplaceForbiddenAsmSymbolChars(tai_datablock(hp).sym.name));
+                       writer.AsmWriteln(ApplyAsmSymbolRestrictions(tai_datablock(hp).sym.name));
+                       writer.AsmWrite(ApplyAsmSymbolRestrictions(tai_datablock(hp).sym.name));
                        writer.AsmWriteln(':');
                        writer.AsmWrite(#9'.space ');
                        writer.AsmWriteln(tostr(tai_datablock(hp).size));
@@ -875,7 +909,7 @@ implementation
                    else
                      begin
                        writer.AsmWrite(#9'.lcomm ');
-                       writer.AsmWrite(ReplaceForbiddenAsmSymbolChars(tai_datablock(hp).sym.name));
+                       writer.AsmWrite(ApplyAsmSymbolRestrictions(tai_datablock(hp).sym.name));
                        writer.AsmWrite(',');
                        writer.AsmWrite(tostr(tai_datablock(hp).size)+',');
                        writer.AsmWrite('_data.bss_,');
@@ -895,7 +929,7 @@ implementation
                          begin
                            writer.AsmWrite(#9'.comm'#9);
                            if replaceforbidden then
-                             writer.AsmWrite(ReplaceForbiddenAsmSymbolChars(tai_datablock(hp).sym.name))
+                             writer.AsmWrite(ApplyAsmSymbolRestrictions(tai_datablock(hp).sym.name))
                            else
                              writer.AsmWrite(tai_datablock(hp).sym.name);
                            writer.AsmWrite(','+tostr(tai_datablock(hp).size));
@@ -906,7 +940,7 @@ implementation
                          begin
                            writer.AsmWrite(#9'.lcomm'#9);
                            if replaceforbidden then
-                             writer.AsmWrite(ReplaceForbiddenAsmSymbolChars(tai_datablock(hp).sym.name));
+                             writer.AsmWrite(ApplyAsmSymbolRestrictions(tai_datablock(hp).sym.name));
                            else
                              writer.AsmWrite(tai_datablock(hp).sym.name);
                            writer.AsmWrite(','+tostr(tai_datablock(hp).size));
@@ -923,7 +957,7 @@ implementation
                              WriteHiddenSymbol(tai_datablock(hp).sym);
                            writer.AsmWrite(#9'.globl ');
                            if replaceforbidden then
-                             writer.AsmWriteln(ReplaceForbiddenAsmSymbolChars(Tai_datablock(hp).sym.name))
+                             writer.AsmWriteln(ApplyAsmSymbolRestrictions(Tai_datablock(hp).sym.name))
                            else
                              writer.AsmWriteln(Tai_datablock(hp).sym.name);
                          end;
@@ -934,10 +968,10 @@ implementation
                        if replaceforbidden then
                          begin
                            if (tf_needs_symbol_type in target_info.flags) then
-                             writer.AsmWriteln(#9'.type '+ReplaceForbiddenAsmSymbolChars(Tai_datablock(hp).sym.name)+','+sepChar+'object');
+                             writer.AsmWriteln(#9'.type '+ApplyAsmSymbolRestrictions(Tai_datablock(hp).sym.name)+','+sepChar+'object');
                            if (tf_needs_symbol_size in target_info.flags) and (tai_datablock(hp).size > 0) then
-                              writer.AsmWriteln(#9'.size '+ReplaceForbiddenAsmSymbolChars(Tai_datablock(hp).sym.name)+','+tostr(Tai_datablock(hp).size));
-                           writer.AsmWrite(ReplaceForbiddenAsmSymbolChars(Tai_datablock(hp).sym.name))
+                              writer.AsmWriteln(#9'.size '+ApplyAsmSymbolRestrictions(Tai_datablock(hp).sym.name)+','+tostr(Tai_datablock(hp).size));
+                           writer.AsmWrite(ApplyAsmSymbolRestrictions(Tai_datablock(hp).sym.name))
                          end
                        else
                          begin
@@ -992,7 +1026,39 @@ implementation
                      writer.AsmWrite(#9'.word'#9+tai_const(hp).sym.name+'(gottpoff)+(.-'+tai_const(hp).endsym.name+tostr_with_plus(tai_const(hp).symofs)+')');
                      writer.Asmln;
                    end;
+                 aitconst_tlsgd:
+                   begin
+                     writer.AsmWrite(#9'.word'#9+tai_const(hp).sym.name+'(tlsgd)+(.-'+tai_const(hp).endsym.name+tostr_with_plus(tai_const(hp).symofs)+')');
+                     writer.Asmln;
+                   end;
+                 aitconst_tlsdesc:
+                   begin
+                     writer.AsmWrite(#9'.word'#9+tai_const(hp).sym.name+'(tlsdesc)+(.-'+tai_const(hp).endsym.name+tostr_with_plus(tai_const(hp).symofs)+')');
+                     writer.Asmln;
+                   end;
+                 aitconst_tpoff:
+                   begin
+                     if assigned(tai_const(hp).endsym) or (tai_const(hp).symofs<>0) then
+                       Internalerror(2019092805);
+                     writer.AsmWrite(#9'.word'#9+tai_const(hp).sym.name+'(tpoff)');
+                     writer.Asmln;
+                   end;
 {$endif cpu64bitaddr}
+                 aitconst_dtpoff:
+                   begin
+{$ifdef arm}
+                     writer.AsmWrite(#9'.word'#9+tai_const(hp).sym.name+'(tlsldo)');
+                     writer.Asmln;
+{$endif arm}
+{$ifdef x86_64}
+                     writer.AsmWrite(#9'.long'#9+tai_const(hp).sym.name+'@dtpoff');
+                     writer.Asmln;
+{$endif x86_64}
+{$ifdef i386}
+                     writer.AsmWrite(#9'.word'#9+tai_const(hp).sym.name+'@tdpoff');
+                     writer.Asmln;
+{$endif i386}
+                   end;
                  aitconst_got:
                    begin
                      if tai_const(hp).symofs<>0 then
@@ -1075,8 +1141,7 @@ implementation
                          if (constdef in ait_unaligned_consts) and
                             (target_info.system in use_ua_sparc_systems) then
                            writer.AsmWrite(ait_ua_sparc_const2str[constdef])
-                         else if (constdef in ait_unaligned_consts) and
-                                 (target_info.system in use_ua_elf_systems) then
+                         else if (target_info.system in use_ua_elf_systems) then
                            writer.AsmWrite(ait_ua_elf_const2str[constdef])
                          { we can also have unaligned pointers in packed record
                            constants, which don't get translated into
@@ -1105,7 +1170,7 @@ implementation
                                else
                                  s:=tai_const(hp).sym.name;
                                if replaceforbidden then
-                                 s:=ReplaceForbiddenAsmSymbolChars(s);
+                                 s:=ApplyAsmSymbolRestrictions(s);
                                if tai_const(hp).value<>0 then
                                  s:=s+tostr_with_plus(tai_const(hp).value);
                              end
@@ -1207,12 +1272,12 @@ implementation
 {$endif arm}
                      writer.AsmWrite('.globl'#9);
                      if replaceforbidden then
-                       writer.AsmWriteLn(ReplaceForbiddenAsmSymbolChars(tai_label(hp).labsym.name))
+                       writer.AsmWriteLn(ApplyAsmSymbolRestrictions(tai_label(hp).labsym.name))
                      else
                        writer.AsmWriteLn(tai_label(hp).labsym.name);
                    end;
                   if replaceforbidden then
-                    writer.AsmWrite(ReplaceForbiddenAsmSymbolChars(tai_label(hp).labsym.name))
+                    writer.AsmWrite(ApplyAsmSymbolRestrictions(tai_label(hp).labsym.name))
                   else
                     writer.AsmWrite(tai_label(hp).labsym.name);
                   writer.AsmWriteLn(':');
@@ -1230,7 +1295,7 @@ implementation
                 begin
                   writer.AsmWrite('.globl'#9);
                   if replaceforbidden then
-                    writer.AsmWriteln(ReplaceForbiddenAsmSymbolChars(tai_symbol(hp).sym.name))
+                    writer.AsmWriteln(ApplyAsmSymbolRestrictions(tai_symbol(hp).sym.name))
                   else
                     writer.AsmWriteln(tai_symbol(hp).sym.name);
                   if (tai_symbol(hp).sym.bind=AB_PRIVATE_EXTERN) then
@@ -1265,14 +1330,14 @@ implementation
                        s:=#9'.llong .';
                        ch:='3';
                      end;
-                   writer.AsmWriteLn(#9'.csect '+ReplaceForbiddenAsmSymbolChars(tai_symbol(hp).sym.name)+'[DS],'+ch);
-                   writer.AsmWriteLn(ReplaceForbiddenAsmSymbolChars(tai_symbol(hp).sym.name)+':');
-                   writer.AsmWriteln(s+ReplaceForbiddenAsmSymbolChars(tai_symbol(hp).sym.name)+', TOC[tc0], 0');
+                   writer.AsmWriteLn(#9'.csect '+ApplyAsmSymbolRestrictions(tai_symbol(hp).sym.name)+'[DS],'+ch);
+                   writer.AsmWriteLn(ApplyAsmSymbolRestrictions(tai_symbol(hp).sym.name)+':');
+                   writer.AsmWriteln(s+ApplyAsmSymbolRestrictions(tai_symbol(hp).sym.name)+', TOC[tc0], 0');
                    writer.AsmWriteln(#9'.csect .text[PR]');
                    if (tai_symbol(hp).is_global) then
-                     writer.AsmWriteLn('.globl .'+ReplaceForbiddenAsmSymbolChars(tai_symbol(hp).sym.name))
+                     writer.AsmWriteLn('.globl .'+ApplyAsmSymbolRestrictions(tai_symbol(hp).sym.name))
                    else
-                     writer.AsmWriteLn('.lglobl .'+ReplaceForbiddenAsmSymbolChars(tai_symbol(hp).sym.name));
+                     writer.AsmWriteLn('.lglobl .'+ApplyAsmSymbolRestrictions(tai_symbol(hp).sym.name));
                    { the dotted name is the name of the actual function entry }
                    writer.AsmWrite('.');
                  end
@@ -1293,9 +1358,9 @@ implementation
                  end;
                if replaceforbidden then
                  if not(tai_symbol(hp).has_value) then
-                   writer.AsmWriteLn(ReplaceForbiddenAsmSymbolChars(tai_symbol(hp).sym.name + ':'))
+                   writer.AsmWriteLn(ApplyAsmSymbolRestrictions(tai_symbol(hp).sym.name + ':'))
                  else
-                   writer.AsmWriteLn(ReplaceForbiddenAsmSymbolChars(tai_symbol(hp).sym.name + '=' + tostr(tai_symbol(hp).value)))
+                   writer.AsmWriteLn(ApplyAsmSymbolRestrictions(tai_symbol(hp).sym.name + '=' + tostr(tai_symbol(hp).value)))
                else if not(tai_symbol(hp).has_value) then
                  writer.AsmWriteLn(tai_symbol(hp).sym.name + ':')
                else
@@ -1315,13 +1380,13 @@ implementation
                if replaceforbidden then
                  begin
                    { avoid string truncation }
-                   writer.AsmWrite(ReplaceForbiddenAsmSymbolChars(tai_symbolpair(hp).sym^));
+                   writer.AsmWrite(ApplyAsmSymbolRestrictions(tai_symbolpair(hp).sym^));
                    writer.AsmWrite(s);
-                   writer.AsmWriteLn(ReplaceForbiddenAsmSymbolChars(tai_symbolpair(hp).value^));
+                   writer.AsmWriteLn(ApplyAsmSymbolRestrictions(tai_symbolpair(hp).value^));
                    if tai_symbolpair(hp).kind=spk_set_global then
                      begin
                        writer.AsmWrite(#9'.globl ');
-                       writer.AsmWriteLn(ReplaceForbiddenAsmSymbolChars(tai_symbolpair(hp).sym^));
+                       writer.AsmWriteLn(ApplyAsmSymbolRestrictions(tai_symbolpair(hp).sym^));
                      end;
                  end
                else
@@ -1350,7 +1415,7 @@ implementation
                      (tai_symbol_end(hp).sym.typ=AT_FUNCTION) then
                     writer.AsmWrite('.');
                   if replaceforbidden then
-                    writer.AsmWrite(ReplaceForbiddenAsmSymbolChars(tai_symbol_end(hp).sym.name))
+                    writer.AsmWrite(ApplyAsmSymbolRestrictions(tai_symbol_end(hp).sym.name))
                   else
                     writer.AsmWrite(tai_symbol_end(hp).sym.name);
                   writer.AsmWrite(', '+s+' - ');
@@ -1359,7 +1424,7 @@ implementation
                      (tai_symbol_end(hp).sym.typ=AT_FUNCTION) then
                     writer.AsmWrite('.');
                   if replaceforbidden then
-                    writer.AsmWriteLn(ReplaceForbiddenAsmSymbolChars(tai_symbol_end(hp).sym.name))
+                    writer.AsmWriteLn(ApplyAsmSymbolRestrictions(tai_symbol_end(hp).sym.name))
                   else
                     writer.AsmWriteLn(tai_symbol_end(hp).sym.name);
                 end;
@@ -1434,7 +1499,7 @@ implementation
                if tai_directive(hp).name <>'' then
                  begin
                    if replaceforbidden then
-                     writer.AsmWrite(ReplaceForbiddenAsmSymbolChars(tai_directive(hp).name))
+                     writer.AsmWrite(ApplyAsmSymbolRestrictions(tai_directive(hp).name))
                    else
                      writer.AsmWrite(tai_directive(hp).name);
                  end;
@@ -1467,22 +1532,25 @@ implementation
 {$endif DISABLE_WIN64_SEH}
              end;
 
-           ait_varloc:
-             begin
-               if tai_varloc(hp).newlocationhi<>NR_NO then
-                 writer.AsmWrite(strpnew('Var '+tai_varloc(hp).varsym.realname+' located in register '+
-                   std_regname(tai_varloc(hp).newlocationhi)+':'+std_regname(tai_varloc(hp).newlocation)))
-               else
-                 writer.AsmWrite(strpnew('Var '+tai_varloc(hp).varsym.realname+' located in register '+
-                   std_regname(tai_varloc(hp).newlocation)));
-               writer.AsmLn;
-             end;
            ait_cfi:
              begin
                WriteCFI(tai_cfi_base(hp));
              end;
+           ait_eabi_attribute:
+             begin
+               case tai_eabi_attribute(hp).eattr_typ of
+                 eattrtype_dword:
+                   writer.AsmWrite(#9'.eabi_attribute '+tostr(tai_eabi_attribute(hp).tag)+','+tostr(tai_eabi_attribute(hp).value));
+                 eattrtype_ntbs:
+                   writer.AsmWrite(#9'.eabi_attribute '+tostr(tai_eabi_attribute(hp).tag)+',"'+tai_eabi_attribute(hp).valuestr^+'"');
+                 else
+                   Internalerror(2019100601);
+               end;
+               writer.AsmLn;
+             end;
            else
-             internalerror(2006012201);
+             if not WriteComments(hp) then
+               internalerror(2006012201);
          end;
          lasthp:=hp;
          hp:=tai(hp.next);
@@ -1512,7 +1580,7 @@ implementation
         if asminfo^.dollarsign='$' then
           writer.AsmWriteLn(s.name)
         else
-          writer.AsmWriteLn(ReplaceForbiddenAsmSymbolChars(s.name))
+          writer.AsmWriteLn(ApplyAsmSymbolRestrictions(s.name))
       end;
 
 
@@ -1521,7 +1589,7 @@ implementation
         { on Windows/(PE)COFF, global symbols are hidden by default: global
           symbols that are not explicitly exported from an executable/library,
           become hidden }
-        if target_info.system in systems_windows then
+        if (target_info.system in (systems_windows+systems_wince)) then
           exit;
         if target_info.system in systems_darwin then
           writer.AsmWrite(#9'.private_extern ')
@@ -1530,7 +1598,7 @@ implementation
         if asminfo^.dollarsign='$' then
           writer.AsmWriteLn(sym.name)
         else
-          writer.AsmWriteLn(ReplaceForbiddenAsmSymbolChars(sym.name))
+          writer.AsmWriteLn(ApplyAsmSymbolRestrictions(sym.name))
       end;
 
 
@@ -1957,7 +2025,8 @@ implementation
          sec_none (* sec_objc_protlist *),
          sec_none (* sec_stack *),
          sec_none (* sec_heap *),
-         sec_none (* gcc_except_table *)
+         sec_none (* gcc_except_table *),
+         sec_none (* sec_arm_attribute *)
         );
       begin
         Result := inherited SectionName (SecXTable [AType], AName, AOrder);

+ 10 - 16
compiler/aopt.pas

@@ -79,6 +79,7 @@ Unit aopt;
 
     uses
       cutils,
+      cprofile,
       globtype, globals,
       verbose,
       cpubase,
@@ -147,6 +148,7 @@ Unit aopt;
           p := BlockStart;
           While (P <> BlockEnd) Do
             Begin
+              prefetch(pointer(p.Next)^);
               Case p.typ Of
                 ait_Label:
                   begin
@@ -190,7 +192,6 @@ Unit aopt;
                       End
                     else if tai_regalloc(p).ratype=ra_dealloc then
                       Begin
-                        ExcludeRegFromUsedRegs(tai_regalloc(p).Reg,Regs);
                         hp1 := p;
                         hp2 := nil;
                         While Not(assigned(FindRegAlloc(tai_regalloc(p).Reg, tai(hp1.Next)))) And
@@ -231,7 +232,9 @@ Unit aopt;
                             AsmL.remove(p);
                             p.free;
                             p := hp1;
-                          end;
+                          end
+                        else
+                          ExcludeRegFromUsedRegs(tai_regalloc(p).Reg,Regs);
                       End
                   End
                 else
@@ -270,28 +273,16 @@ Unit aopt;
     Procedure TAsmOptimizer.Optimize;
       Var
         HP: tai;
-        pass: longint;
       Begin
-        pass:=0;
         BlockStart := tai(AsmL.First);
         pass_1;
         While Assigned(BlockStart) Do
           Begin
             if (cs_opt_peephole in current_settings.optimizerswitches) then
               begin
-                if pass = 0 then
-                  PrePeepHoleOpts;
-                { Peephole optimizations }
+                PrePeepHoleOpts;
                 PeepHoleOptPass1;
-                { Only perform them twice in the first pass }
-                if pass = 0 then
-                  PeepHoleOptPass1;
-              end;
-            { more peephole optimizations }
-            if (cs_opt_peephole in current_settings.optimizerswitches) then
-              begin
                 PeepHoleOptPass2;
-                { if pass = last_pass then }
                 PostPeepHoleOpts;
               end;
             { free memory }
@@ -345,6 +336,7 @@ Unit aopt;
         p:=BlockStart;
         while p<>BlockEnd Do
           begin
+            prefetch(pointer(p.Next)^);
             if SchedulerPass1Cpu(p) then
               continue;
             p:=tai(p.next);
@@ -387,12 +379,14 @@ Unit aopt;
       var
         p : TAsmOptimizer;
       begin
+        ResumeTimer(ct_aopt);
         p:=casmoptimizer.Create(AsmL);
         p.Optimize;
 {$ifdef DEBUG_INSTRUCTIONREGISTERDEPENDENCIES}
         p.Debug_InsertInstrRegisterDependencyInfo;
 {$endif DEBUG_INSTRUCTIONREGISTERDEPENDENCIES}
-        p.free
+        p.free;
+        StopTimer;
       end;
 
 

+ 62 - 27
compiler/aoptbase.pas

@@ -49,9 +49,9 @@ unit aoptbase;
         { returns true if register Reg is used by instruction p1 }
         Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;virtual;
         { returns true if register Reg occurs in operand op }
-        Function RegInOp(Reg: TRegister; const op: toper): Boolean;
+        class function RegInOp(Reg: TRegister; const op: toper): Boolean; static;
         { returns true if register Reg is used in the reference Ref }
-        Function RegInRef(Reg: TRegister; Const Ref: TReference): Boolean;
+        class function RegInRef(Reg: TRegister; Const Ref: TReference): Boolean; static;
 
         function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;virtual;
 
@@ -61,13 +61,13 @@ unit aoptbase;
         { gets the next tai object after current that contains info relevant }
         { to the optimizer in p1. If there is none, it returns false and     }
         { sets p1 to nil                                                     }
-        class Function GetNextInstruction(Current: tai; Var Next: tai): Boolean;
-        { gets the previous tai object after current that contains info  }
-        { relevant to the optimizer in last. If there is none, it retuns }
-        { false and sets last to nil                                     }
-        Function GetLastInstruction(Current: tai; Var Last: tai): Boolean;
+        class function GetNextInstruction(Current: tai; out Next: tai): Boolean; static;
+        { gets the previous tai object after current that contains info   }
+        { relevant to the optimizer in last. If there is none, it returns }
+        { false and sets last to nil                                      }
+        class function GetLastInstruction(Current: tai; out Last: tai): Boolean; static;
 
-        function SkipEntryExitMarker(current: tai; var next: tai): boolean;
+        class function SkipEntryExitMarker(current: tai; out next: tai): boolean; static;
 
         { processor dependent methods }
 
@@ -104,10 +104,13 @@ unit aoptbase;
 
         { compares reg1 and reg2 having the same type and being the same super registers
           so the register size is neglected }
-        function SuperRegistersEqual(reg1,reg2 : TRegister) : Boolean;
+        class function SuperRegistersEqual(reg1,reg2 : TRegister) : Boolean; static; {$ifdef USEINLINE}inline;{$endif}
+
+        { returns true if changing reg1 changes reg2 or vice versa }
+        class function RegistersInterfere(reg1,reg2 : TRegister) : Boolean; static; {$ifdef USEINLINE}inline;{$endif}
     end;
 
-    function labelCanBeSkipped(p: tai_label): boolean;
+    function labelCanBeSkipped(p: tai_label): boolean; {$ifdef USEINLINE}inline;{$endif}
 
   implementation
 
@@ -140,10 +143,10 @@ unit aoptbase;
     End;
 
 
-  Function TAOptBase.RegInOp(Reg: TRegister; const op: toper): Boolean;
+  class function TAOptBase.RegInOp(Reg: TRegister; const op: toper): Boolean;
     Begin
       Case op.typ Of
-        Top_Reg: RegInOp := SuperRegistersEqual(Reg,op.reg);
+        Top_Reg: RegInOp := RegistersInterfere(Reg,op.reg);
         Top_Ref: RegInOp := RegInRef(Reg, op.ref^);
         {$ifdef arm}
         Top_Shifterop: RegInOp := op.shifterop^.rs = Reg;
@@ -154,18 +157,18 @@ unit aoptbase;
     End;
 
 
-  Function TAOptBase.RegInRef(Reg: TRegister; Const Ref: TReference): Boolean;
+  class function TAOptBase.RegInRef(Reg: TRegister; Const Ref: TReference): Boolean;
   Begin
-    RegInRef := SuperRegistersEqual(Ref.Base,Reg)
+    RegInRef := RegistersInterfere(Ref.Base,Reg)
 {$ifdef cpurefshaveindexreg}
-    Or SuperRegistersEqual(Ref.Index,Reg)
+    Or RegistersInterfere(Ref.Index,Reg)
 {$endif cpurefshaveindexreg}
 {$ifdef x86}
     or (Reg=Ref.segment)
     { if Ref.segment isn't set, the cpu uses implicitly ss or ds, depending on the base register }
     or ((Ref.segment=NR_NO) and (
-      ((Reg=NR_SS) and (SuperRegistersEqual(Ref.base,NR_EBP) or SuperRegistersEqual(Ref.base,NR_ESP))) or
-      ((Reg=NR_DS) and not(SuperRegistersEqual(Ref.base,NR_EBP) or SuperRegistersEqual(Ref.base,NR_ESP)))
+      ((Reg=NR_SS) and (RegistersInterfere(Ref.base,NR_EBP) or RegistersInterfere(Ref.base,NR_ESP))) or
+      ((Reg=NR_DS) and not(RegistersInterfere(Ref.base,NR_EBP) or RegistersInterfere(Ref.base,NR_ESP)))
     ))
 {$endif x86}
   End;
@@ -176,13 +179,13 @@ unit aoptbase;
   End;
 
 
-  function labelCanBeSkipped(p: tai_label): boolean; inline;
+  function labelCanBeSkipped(p: tai_label): boolean; {$ifdef USEINLINE}inline;{$endif}
   begin
     labelCanBeSkipped := not(p.labsym.is_used) or (p.labsym.labeltype<>alt_jump);
   end;
 
 
-  class Function TAOptBase.GetNextInstruction(Current: tai; Var Next: tai): Boolean;
+  class function TAOptBase.GetNextInstruction(Current: tai; out Next: tai): Boolean;
   Begin
     Repeat
       Current := tai(Current.Next);
@@ -195,7 +198,12 @@ unit aoptbase;
 {$endif cpudelayslot}
              ((Current.typ = ait_label) And
               labelCanBeSkipped(Tai_Label(Current)))) Do
-        Current := tai(Current.Next);
+        begin
+          { this won't help the current loop, but it helps when returning from GetNextInstruction
+            as the next entry is probably already in the cache }
+          prefetch(pointer(Current.Next)^);
+          Current := Tai(Current.Next);
+        end;
       If Assigned(Current) And
          (Current.typ = ait_Marker) And
          (Tai_Marker(Current).Kind = mark_NoPropInfoStart) Then
@@ -203,7 +211,12 @@ unit aoptbase;
           While Assigned(Current) And
                 ((Current.typ <> ait_Marker) Or
                  (Tai_Marker(Current).Kind <> mark_NoPropInfoEnd)) Do
-            Current := Tai(Current.Next);
+            begin
+              { this won't help the current loop, but it helps when returning from GetNextInstruction
+                as the next entry is probably already in the cache }
+              prefetch(pointer(Current.Next)^);
+              Current := Tai(Current.Next);
+            end;
         End;
     Until Not(Assigned(Current)) Or
           (Current.typ <> ait_Marker) Or
@@ -221,7 +234,7 @@ unit aoptbase;
         End;
   End;
 
-  Function TAOptBase.GetLastInstruction(Current: tai; Var Last: tai): Boolean;
+  class function TAOptBase.GetLastInstruction(Current: tai; out Last: tai): Boolean;
   Begin
     Repeat
       Current := Tai(Current.previous);
@@ -263,12 +276,12 @@ unit aoptbase;
   End;
 
 
-  function TAOptBase.SkipEntryExitMarker(current: tai; var next: tai): boolean;
+  class function TAOptBase.SkipEntryExitMarker(current: tai; out next: tai): boolean;
     begin
       result:=true;
+      next:=current;
       if current.typ<>ait_marker then
         exit;
-      next:=current;
       while GetNextInstruction(next,next) do
         begin
           if (next.typ<>ait_marker) or not(tai_marker(next).Kind in [mark_Position,mark_BlockStart]) then
@@ -316,12 +329,34 @@ unit aoptbase;
     end;
 
 
-  function TAOptBase.SuperRegistersEqual(reg1,reg2 : TRegister) : Boolean;
+  class function TAOptBase.SuperRegistersEqual(reg1,reg2 : TRegister) : Boolean;{$ifdef USEINLINE}inline;{$endif}
   Begin
-    Result:=(getregtype(reg1) = getregtype(reg2)) and
-            (getsupreg(reg1) = getsupreg(Reg2));
+    { Do an optimized version of
+
+      Result:=(getregtype(reg1) = getregtype(reg2)) and
+      (getsupreg(reg1) = getsupreg(Reg2));
+
+      as SuperRegistersEqual is used a lot
+    }
+{$ifdef Z80}
+    { Z80 registers are indexed in an incompatible way (without R_SUBH), so it
+      needs a special check. }
+    Result:=super_registers_equal(reg1,reg2);
+{$else Z80}
+    Result:=(DWord(reg1) and $ff00ffff)=(DWord(reg2) and $ff00ffff);
+{$endif Z80}
   end;
 
+
+  class function TAOptBase.RegistersInterfere(reg1,reg2 : TRegister) : Boolean; static; {$ifdef USEINLINE}inline;{$endif}
+    begin
+{$ifdef Z80}
+      result:=registers_interfere(reg1,reg2);
+{$else Z80}
+      result:=SuperRegistersEqual(reg1,reg2);
+{$endif Z80}
+    end;
+
   { ******************* Processor dependent stuff *************************** }
 
   Function TAOptBase.RegMaxSize(Reg: TRegister): TRegister;

File diff suppressed because it is too large
+ 1041 - 129
compiler/aoptobj.pas


+ 2 - 2
compiler/aoptutils.pas

@@ -36,7 +36,7 @@ unit aoptutils;
 {$endif max_operands>2}
 
     { skips all labels and returns the next "real" instruction }
-    function SkipLabels(hp: tai; var hp2: tai): boolean;
+    function SkipLabels(hp: tai; out hp2: tai): boolean;
 
     { sets hp2 to hp and returns True if hp is not nil }
     function SetAndTest(const hp: tai; out hp2: tai): Boolean;
@@ -68,7 +68,7 @@ unit aoptutils;
 
 
     { skips all labels and returns the next "real" instruction }
-    function SkipLabels(hp: tai; var hp2: tai): boolean;
+    function SkipLabels(hp: tai; out hp2: tai): boolean;
       begin
         while assigned(hp.next) and
               (tai(hp.next).typ in SkipInstr + [ait_label,ait_align]) Do

+ 26 - 9
compiler/arm/aasmcpu.pas

@@ -723,7 +723,7 @@ implementation
           R_MMREGISTER :
             result:=taicpu.op_reg_ref(A_VLDR,r,ref);
           else
-            internalerror(200401041);
+            internalerror(2004010415);
         end;
       end;
 
@@ -741,7 +741,7 @@ implementation
           R_MMREGISTER :
             result:=taicpu.op_reg_ref(A_VSTR,r,ref);
           else
-            internalerror(200401041);
+            internalerror(2004010416);
         end;
       end;
 
@@ -866,6 +866,7 @@ implementation
             A_NEG,
             A_VABS,A_VADD,A_VCVT,A_VDIV,A_VLDR,A_VMOV,A_VMUL,A_VNEG,A_VSQRT,A_VSUB,
             A_VEOR,
+            A_VMRS,A_VMSR,
             A_MRS,A_MSR:
               if opnr=0 then
                 result:=operand_write
@@ -901,7 +902,9 @@ implementation
                 result := operand_read;
             //Thumb2
             A_LSL, A_LSR, A_ROR, A_ASR, A_SDIV, A_UDIV, A_MOVW, A_MOVT, A_MLS, A_BFI,
-            A_SMMLA,A_SMMLS:
+            A_QADD,
+            A_PKHTB,A_PKHBT,
+            A_SMMLA,A_SMMLS,A_SMUAD,A_SMUSD:
               if opnr in [0] then
                 result:=operand_write
               else
@@ -920,7 +923,10 @@ implementation
             A_STREX:
               result:=operand_write;
             else
-              internalerror(200403151);
+              begin
+                writeln(opcode);
+                internalerror(2004031502);
+              end;
           end;
       end;
 
@@ -1160,8 +1166,8 @@ implementation
                                             begin
                                               if (hp2.typ=ait_const) and (tai_const(hp2).sym=tai_const(hp).sym)
                                                 and (tai_const(hp2).value=tai_const(hp).value) and (tai(hp2.previous).typ=ait_label) and
-                                                { gottpoff symbols are PC relative, so we cannot reuse them }
-                                                (tai_const(hp2).consttype<>aitconst_gottpoff) then
+                                                { gottpoff and tlsgd symbols are PC relative, so we cannot reuse them }
+                                                (not(tai_const(hp2).consttype in [aitconst_gottpoff,aitconst_tlsgd,aitconst_tlsdesc])) then
                                                 begin
                                                   with taicpu(curtai).oper[curop]^.ref^ do
                                                     begin
@@ -2228,6 +2234,7 @@ implementation
             { fpu_vfpv3_d16  } IF_VFPv2 or IF_VFPv3,
             { fpu_fpv4_s16   } IF_NONE,
             { fpu_vfpv4      } IF_VFPv2 or IF_VFPv3 or IF_VFPv4,
+            { fpu_vfpv4      } IF_VFPv2 or IF_VFPv3 or IF_VFPv4,
             { fpu_neon_vfpv4 } IF_VFPv2 or IF_VFPv3 or IF_VFPv4 or IF_NEON
           );
       begin
@@ -3051,13 +3058,23 @@ implementation
                 begin
                   currsym:=objdata.symbolref(oper[0]^.ref^.symbol);
 
-                  bytes:=bytes or (((oper[0]^.ref^.offset-8) shr 2) and $ffffff);
+                  { tlscall is not relative so ignore the offset }
+                  if oper[0]^.ref^.refaddr<>addr_tlscall then
+                    bytes:=bytes or (((oper[0]^.ref^.offset-8) shr 2) and $ffffff);
 
                   if (opcode<>A_BL) or (condition<>C_None) then
                     objdata.writereloc(aint(bytes),4,currsym,RELOC_RELATIVE_24)
                   else
-                    objdata.writereloc(aint(bytes),4,currsym,RELOC_RELATIVE_CALL);
-
+                    case oper[0]^.ref^.refaddr of
+                      addr_pic:
+                        objdata.writereloc(aint(bytes),4,currsym,RELOC_ARM_CALL);
+                      addr_full:
+                        objdata.writereloc(aint(bytes),4,currsym,RELOC_RELATIVE_CALL);
+                      addr_tlscall:
+                        objdata.writereloc(aint(bytes),4,currsym,RELOC_TLS_CALL);
+                      else
+                        Internalerror(2019092903);
+                    end;
                   exit;
                 end;
             end;

+ 34 - 9
compiler/arm/agarmgas.pas

@@ -49,6 +49,7 @@ unit agarmgas;
 
       TArmAppleGNUAssembler=class(TAppleGNUassembler)
         constructor CreateWithWriter(info: pasminfo; wr: TExternalAssemblerOutputFile; freewriter, smart: boolean); override;
+        function MakeCmdLine: TCmdStr; override;
         procedure WriteExtraHeader; override;
       end;
 
@@ -107,6 +108,12 @@ unit agarmgas;
         case current_settings.fputype of
           fpu_soft:
             result:='-mfpu=softvfp '+result;
+          fpu_fpa:
+            result:='-mfpu=fpa '+result;
+          fpu_fpa10:
+            result:='-mfpu=fpa10 '+result;
+          fpu_fpa11:
+            result:='-mfpu=fpa11 '+result;
           fpu_vfpv2:
             result:='-mfpu=vfpv2 '+result;
           fpu_vfpv3:
@@ -115,6 +122,7 @@ unit agarmgas;
             result:='-mfpu=neon-vfpv3 '+result;
           fpu_vfpv3_d16:
             result:='-mfpu=vfpv3-d16 '+result;
+          fpu_fpv4_sp_d16,
           fpu_fpv4_s16:
             result:='-mfpu=fpv4-sp-d16 '+result;
           fpu_vfpv4:
@@ -160,6 +168,18 @@ unit agarmgas;
       end;
 
 
+    function TArmAppleGNUAssembler.MakeCmdLine: TCmdStr;
+      begin
+        result:=inherited MakeCmdLine;
+	if (asminfo^.id in [as_clang_gas,as_clang_asdarwin]) then
+          begin
+            if fputypestrllvm[current_settings.fputype] <> '' then
+              result:='-m'+fputypestrllvm[current_settings.fputype]+' '+result;
+            { Apple arm always uses softfp floating point ABI }
+            result:='-mfloat-abi=softfp '+result;
+          end;
+      end;
+
     procedure TArmAppleGNUAssembler.WriteExtraHeader;
       begin
         inherited WriteExtraHeader;
@@ -194,7 +214,9 @@ unit agarmgas;
                 if offset<>0 then
                   s:=s+tostr_with_plus(offset);
                 if refaddr=addr_pic then
-                  s:=s+'(PLT)';
+                  s:=s+'(PLT)'
+                else if refaddr=addr_tlscall then
+                  s:=s+'(tlscall)';
               end
             else
               begin
@@ -216,7 +238,7 @@ unit agarmgas;
                      else if shiftmode <> SM_None then
                        s:=s+', '+gas_shiftmode2str[shiftmode]+' #'+tostr(shiftimm);
                      if offset<>0 then
-                       Internalerror(2019012601);
+                       Internalerror(2019012602);
                   end
                 else if offset<>0 then
                   s:=s+', #'+tostr(offset);
@@ -404,7 +426,7 @@ unit agarmgas;
                      top_const:
                        s:=s+sep+tostr(taicpu(hp).oper[1]^.val);
                      else
-                       internalerror(200311292);
+                       internalerror(2003112903);
                    end;
                  end
                else
@@ -426,9 +448,10 @@ unit agarmgas;
             asmbin : 'as';
             asmcmd : '-o $OBJ $EXTRAOPT $ASM';
             supported_targets : [system_arm_linux,system_arm_netbsd,system_arm_wince,system_arm_gba,system_arm_palmos,system_arm_nds,
-                                 system_arm_embedded,system_arm_symbian,system_arm_android,system_arm_aros];
+                                 system_arm_embedded,system_arm_symbian,system_arm_android,system_arm_aros,system_arm_freertos];
             flags : [af_needar,af_smartlink_sections];
             labelprefix : '.L';
+            labelmaxlen : -1;
             comment : '# ';
             dollarsign: '$';
           );
@@ -439,9 +462,10 @@ unit agarmgas;
             idtxt  : 'AS-DARWIN';
             asmbin : 'as';
             asmcmd : '-o $OBJ $EXTRAOPT $ASM -arch $ARCH';
-            supported_targets : [system_arm_darwin];
+            supported_targets : [system_arm_ios];
             flags : [af_needar,af_smartlink_sections,af_supports_dwarf,af_stabs_use_function_absolute_addresses];
             labelprefix : 'L';
+            labelmaxlen : -1;
             comment : '# ';
             dollarsign: '$';
           );
@@ -449,13 +473,14 @@ unit agarmgas;
 
        as_arm_clang_darwin_info : tasminfo =
           (
-            id     : as_clang;
+            id     : as_clang_asdarwin;
             idtxt  : 'CLANG';
             asmbin : 'clang';
-            asmcmd : '-c -o $OBJ $EXTRAOPT -arch $ARCH $DARWINVERSION -x assembler $ASM';
-            supported_targets : [system_arm_darwin];
-            flags : [af_needar,af_smartlink_sections,af_supports_dwarf];
+            asmcmd : '-x assembler -c -target $TRIPLET -o $OBJ $EXTRAOPT -x assembler $ASM';
+            supported_targets : [system_arm_ios];
+            flags : [af_needar,af_smartlink_sections,af_supports_dwarf,af_llvm];
             labelprefix : 'L';
+            labelmaxlen : -1;
             comment : '# ';
             dollarsign: '$';
           );

File diff suppressed because it is too large
+ 1398 - 1755
compiler/arm/aoptcpu.pas


+ 1 - 1
compiler/arm/armins.dat

@@ -259,7 +259,7 @@ fpureg,immshifter,memam2 \xA0\xC\x10\x2\x0              ARM32,FPA
 
 [CLZcc]
 reg32,reg32              \x80\xFA\xB0\xF0\x80           THUMB32,ARMv6T2
-reg32,reg32              \x32\x01\x6F\xF\x10            ARM32,ARMv4
+reg32,reg32              \x32\x01\x6F\xF\x10            ARM32,ARMv5T
 
 [CPS]
 immshifter               \x8F\xF3\xAF\x81\x00           THUMB32,ARMv6T2

+ 1 - 1
compiler/arm/armtab.inc

@@ -768,7 +768,7 @@
     ops     : 2;
     optypes : (ot_reg32,ot_reg32,ot_none,ot_none,ot_none,ot_none);
     code    : #50#1#111#15#16;
-    flags   : if_arm32 or if_armv4
+    flags   : if_arm32 or if_armv5t
   ),
   (
     opcode  : A_CPS;

+ 102 - 70
compiler/arm/cgcpu.pas

@@ -276,7 +276,7 @@ unit cgcpu;
       begin
         inherited init_register_allocators;
         { currently, we always save R14, so we can use it }
-        if (target_info.system<>system_arm_darwin) then
+        if (target_info.system<>system_arm_ios) then
             begin
               if assigned(current_procinfo) and (current_procinfo.framepointer<>NR_R11) then
                 rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
@@ -329,7 +329,7 @@ unit cgcpu;
           imm1, imm2: DWord;
        begin
           if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
-            internalerror(2002090902);
+            internalerror(2002090907);
           if is_shifter_const(a,imm_shift) then
             list.concat(taicpu.op_reg_const(A_MOV,reg,a))
           else if is_shifter_const(not(a),imm_shift) then
@@ -516,7 +516,7 @@ unit cgcpu;
         hsym:=tsym(procdef.parast.Find('self'));
         if not(assigned(hsym) and
           (hsym.typ=paravarsym)) then
-          internalerror(200305251);
+          internalerror(2003052503);
         paraloc:=tparavarsym(hsym).paraloc[callerside].location;
         while paraloc<>nil do
           with paraloc^ do
@@ -546,7 +546,7 @@ unit cgcpu;
                       end;
                   end
                 else
-                  internalerror(200309189);
+                  internalerror(2003091803);
               end;
               paraloc:=next;
             end;
@@ -1089,7 +1089,7 @@ unit cgcpu;
           OP_ROL:
             begin
               if not(size in [OS_32,OS_S32]) then
-                internalerror(2008072801);
+                internalerror(2008072804);
               { simulate ROL by ror'ing 32-value }
               tmpreg:=getintregister(list,OS_32);
               list.concat(taicpu.op_reg_reg_const(A_RSB,tmpreg,src1, 32));
@@ -1210,7 +1210,7 @@ unit cgcpu;
         if (ref.base=NR_NO) then
           begin
             if ref.shiftmode<>SM_None then
-              internalerror(2014020701);
+              internalerror(2014020707);
             ref.base:=ref.index;
             ref.index:=NR_NO;
           end;
@@ -1368,7 +1368,7 @@ unit cgcpu;
            OS_F32:
              oppostfix:=PF_None;
            else
-             InternalError(200308299);
+             InternalError(2003082912);
          end;
 
          if ((ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[tosize])) or
@@ -2084,7 +2084,7 @@ unit cgcpu;
              begin
                reference_reset(ref,4,[]);
                if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
-                 (FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype]) then
+                 (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) then
                  begin
                    if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
                      begin
@@ -2115,14 +2115,16 @@ unit cgcpu;
                    begin
                      ref.index:=ref.base;
                      ref.base:=NR_NO;
-                     { FSTMX is deprecated on ARMv6 and later }
-                     {if (current_settings.cputype<cpu_armv6) then
-                       postfix:=PF_IAX
-                     else
-                       postfix:=PF_IAD;}
                      if mmregs<>[] then
                        list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFD,mmregs));
                    end
+                 else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
+                   begin
+                     ref.index:=ref.base;
+                     ref.base:=NR_NO;
+                     if mmregs<>[] then
+                       list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFS,mmregs));
+                   end
                  else
                    internalerror(2019050923);
                end;
@@ -2176,7 +2178,7 @@ unit cgcpu;
                         }
                       end;
                 end;
-              else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
+              else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
                 begin
                   { restore vfp registers? }
                   { the *[0..31] is a hack to prevent that the compiler tries to save odd single-type registers,
@@ -2185,7 +2187,7 @@ unit cgcpu;
                   mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
                 end
               else
-                internalerror(2019050926);
+                internalerror(2019050908);
             end;
 
             if (firstfloatreg<>RS_NO) or
@@ -2193,7 +2195,7 @@ unit cgcpu;
               begin
                 reference_reset(ref,4,[]);
                 if (tg.direction*tcpuprocinfo(current_procinfo).floatregstart>=1023) or
-                   (FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype]) then
+                   (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) then
                   begin
                     if not is_shifter_const(tcpuprocinfo(current_procinfo).floatregstart,shift) then
                       begin
@@ -2223,13 +2225,15 @@ unit cgcpu;
                     begin
                       ref.index:=ref.base;
                       ref.base:=NR_NO;
-                      { FLDMX is deprecated on ARMv6 and later }
-                      {if (current_settings.cputype<cpu_armv6) then
-                        mmpostfix:=PF_IAX
-                      else
-                        mmpostfix:=PF_IAD;}
-                     if mmregs<>[] then
-                       list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFD,mmregs));
+                      if mmregs<>[] then
+                        list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFD,mmregs));
+                    end
+                  else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
+                    begin
+                      ref.index:=ref.base;
+                      ref.base:=NR_NO;
+                      if mmregs<>[] then
+                        list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFS,mmregs));
                     end
                   else
                     internalerror(2019050921);
@@ -2483,7 +2487,7 @@ unit cgcpu;
         indirection_done:=false;
         if assigned(ref.symbol) then
           begin
-            if (target_info.system=system_arm_darwin) and
+            if (target_info.system=system_arm_ios) and
                (ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL,AB_PRIVATE_EXTERN,AB_COMMON]) then
               begin
                 tmpreg:=g_indirect_sym_load(list,ref.symbol.name,asmsym2indsymflags(ref.symbol));
@@ -2493,6 +2497,17 @@ unit cgcpu;
               end
             else if ref.refaddr=addr_gottpoff then
               current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
+            else if ref.refaddr=addr_tlsgd then
+              current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsgd,ref.symbol,ref.relsymbol,ref.offset))
+            else if ref.refaddr=addr_tlsdesc then
+              current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsdesc,ref.symbol,ref.relsymbol,ref.offset))
+            else if ref.refaddr=addr_tpoff then
+              begin
+                if assigned(ref.relsymbol) or (ref.offset<>0) then
+                  Internalerror(2019092804);
+
+                current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_tpoff,ref.symbol));
+              end
             else if (cs_create_pic in current_settings.moduleswitches) then
               if (tf_pic_uses_got in target_info.flags) then
                 current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_got,ref.symbol))
@@ -2600,9 +2615,9 @@ unit cgcpu;
         paraloc1.init;
         paraloc2.init;
         paraloc3.init;
-        paramanager.getintparaloc(list,pd,1,paraloc1);
-        paramanager.getintparaloc(list,pd,2,paraloc2);
-        paramanager.getintparaloc(list,pd,3,paraloc3);
+        paramanager.getcgtempparaloc(list,pd,1,paraloc1);
+        paramanager.getcgtempparaloc(list,pd,2,paraloc2);
+        paramanager.getcgtempparaloc(list,pd,3,paraloc3);
         a_load_const_cgpara(list,OS_SINT,len,paraloc3);
         a_loadaddr_ref_cgpara(list,dest,paraloc2);
         a_loadaddr_ref_cgpara(list,source,paraloc1);
@@ -3090,11 +3105,12 @@ unit cgcpu;
         list.concat(instr);
         case instr.opcode of
           A_VMOV:
+            { VMOV cannot generate an FPU exception, so we do not need a check here }
             add_move_instruction(instr);
           else
-            ;
+            { VCVT can generate an exception }
+            maybe_check_for_fpu_exception(list);
         end;
-        maybe_check_for_fpu_exception(list);
       end;
 
 
@@ -3154,13 +3170,10 @@ unit cgcpu;
             end;
           end
         else
-          begin
-             handle_load_store(list,A_VLDR,PF_None,tmpmmreg,ref);
-          end;
+          handle_load_store(list,A_VLDR,PF_None,tmpmmreg,ref);
 
         if (tmpmmreg<>reg) then
           a_loadmm_reg_reg(list,fromsize,tosize,tmpmmreg,reg,shuffle);
-        maybe_check_for_fpu_exception(list);
       end;
 
 
@@ -3180,14 +3193,14 @@ unit cgcpu;
               tosize:=OS_F32;
               { since we are loading an integer, no conversion may be required }
               if (fromsize<>tosize) then
-                internalerror(2009112801);
+                internalerror(2009112802);
             end;
           OS_64,OS_S64:
             begin
               tosize:=OS_F64;
               { since we are loading an integer, no conversion may be required }
               if (fromsize<>tosize) then
-                internalerror(2009112901);
+                internalerror(2009112902);
             end;
           OS_F32,OS_F64:
             ;
@@ -3223,10 +3236,8 @@ unit cgcpu;
             end;
           end
         else
-          begin
-             handle_load_store(list,A_VSTR,PF_None,tmpmmreg,ref);
-          end;
-        maybe_check_for_fpu_exception(list);
+          handle_load_store(list,A_VSTR,PF_None,tmpmmreg,ref);
+        { VSTR cannot generate an FPU exception, VCVT is handled seperately, so we do not need a check here }
       end;
 
 
@@ -3242,7 +3253,7 @@ unit cgcpu;
            not shufflescalar(shuffle) then
           internalerror(2009112516);
         list.concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg));
-        maybe_check_for_fpu_exception(list);
+        { VMOV cannot generate an FPU exception, so we do not need a check here }
       end;
 
 
@@ -3253,12 +3264,12 @@ unit cgcpu;
         if (fromsize<>OS_F32) then
           internalerror(2009112430);
         if not(tosize in [OS_32,OS_S32]) then
-          internalerror(2009112420);
+          internalerror(2009112409);
         if assigned(shuffle) and
            not shufflescalar(shuffle) then
           internalerror(2009112514);
         list.concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
-        maybe_check_for_fpu_exception(list);
+        { VMOV cannot generate an FPU exception, so we do not need a check here }
       end;
 
 
@@ -3339,10 +3350,13 @@ unit cgcpu;
 
     procedure tbasecgarm.g_maybe_tls_init(list : TAsmList);
       begin
-        list.concat(tai_regalloc.alloc(NR_R0,nil));
-        a_call_name(list,'fpc_read_tp',false);
-        a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_R0,current_procinfo.tlsoffset);
-        list.concat(tai_regalloc.dealloc(NR_R0,nil));
+        if pi_needs_tls in current_procinfo.flags then
+          begin
+            list.concat(tai_regalloc.alloc(NR_R0,nil));
+            a_call_name(list,'fpc_read_tp',false);
+            a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_R0,current_procinfo.tlsoffset);
+            list.concat(tai_regalloc.dealloc(NR_R0,nil));
+          end;
       end;
 
 
@@ -3396,7 +3410,7 @@ unit cgcpu;
         if (mmsize<>OS_F64) then
           internalerror(2009112405);
         list.concat(taicpu.op_reg_reg_reg(A_VMOV,mmreg,intreg.reglo,intreg.reghi));
-        cg.maybe_check_for_fpu_exception(list);
+        { VMOV cannot generate an FPU exception, so we do not need a check here }
       end;
 
 
@@ -3407,7 +3421,7 @@ unit cgcpu;
         if (mmsize<>OS_F64) then
           internalerror(2009112406);
         list.concat(taicpu.op_reg_reg_reg(A_VMOV,intreg.reglo,intreg.reghi,mmreg));
-        cg.maybe_check_for_fpu_exception(list);
+        { VMOV cannot generate an FPU exception, so we do not need a check here }
       end;
 
 
@@ -3581,7 +3595,7 @@ unit cgcpu;
                   list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi),PF_S));
                 end;
               else
-                internalerror(2003083101);
+                internalerror(2003083102);
             end;
             if size=OS_64 then
               begin
@@ -3620,7 +3634,7 @@ unit cgcpu;
                   cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
                 end;
               else
-                internalerror(2003083101);
+                internalerror(2003083104);
             end;
           end;
       end;
@@ -3962,7 +3976,7 @@ unit cgcpu;
           hr : treference;
        begin
           if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
-            internalerror(2002090902);
+            internalerror(2002090908);
           if is_thumb_imm(a) then
             list.concat(taicpu.op_reg_const(A_MOV,reg,a))
           else
@@ -3994,7 +4008,7 @@ unit cgcpu;
         hsym:=tsym(procdef.parast.Find('self'));
         if not(assigned(hsym) and
           (hsym.typ=paravarsym)) then
-          internalerror(200305251);
+          internalerror(2003052504);
         paraloc:=tparavarsym(hsym).paraloc[callerside].location;
         while paraloc<>nil do
           with paraloc^ do
@@ -4044,7 +4058,7 @@ unit cgcpu;
                       end;
                   end
                 else
-                  internalerror(200309189);
+                  internalerror(2003091804);
               end;
               paraloc:=next;
             end;
@@ -4131,7 +4145,7 @@ unit cgcpu;
           OP_ROL:
             begin
               if not(size in [OS_32,OS_S32]) then
-                internalerror(2008072801);
+                internalerror(2008072805);
               { simulate ROL by ror'ing 32-value }
               tmpreg:=getintregister(list,OS_32);
               a_load_const_reg(list,OS_32,32,tmpreg);
@@ -4213,7 +4227,7 @@ unit cgcpu;
             else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
               begin
                 if l1>32 then{roozbeh does this ever happen?}
-                  internalerror(200308296);
+                  internalerror(2003082903);
                 shifterop_reset(so);
                 so.shiftmode:=SM_LSL;
                 so.shiftimm:=l1;
@@ -4223,7 +4237,7 @@ unit cgcpu;
             else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
               begin
                 if l1>32 then{does this ever happen?}
-                  internalerror(201205181);
+                  internalerror(2012051802);
                 shifterop_reset(so);
                 so.shiftmode:=SM_LSL;
                 so.shiftimm:=l1;
@@ -4306,7 +4320,7 @@ unit cgcpu;
       begin
         inherited init_register_allocators;
         { currently, we save R14 always, so we can use it }
-        if (target_info.system<>system_arm_darwin) then
+        if (target_info.system<>system_arm_ios) then
           rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
               [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
                RS_R9,RS_R10,RS_R12,RS_R14],first_int_imreg,[])
@@ -4318,12 +4332,19 @@ unit cgcpu;
         rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
             [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
 
-        if FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype] then
+        if (FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype]) and
+          (FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype]) then
           rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
               [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
                RS_D16,RS_D17,RS_D18,RS_D19,RS_D20,RS_D21,RS_D22,RS_D23,RS_D24,RS_D25,RS_D26,RS_D27,RS_D28,RS_D29,RS_D30,RS_D31,
                RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
               ],first_mm_imreg,[])
+        else if (FPUARM_HAS_32REGS in fpu_capabilities[current_settings.fputype]) then
+          rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFS,
+              [RS_S0,RS_S1,RS_S2,RS_S3,RS_S4,RS_S5,RS_S6,RS_S7,
+               RS_S16,RS_S17,RS_S18,RS_S19,RS_S20,RS_S21,RS_S22,RS_S23,RS_S24,RS_S25,RS_S26,RS_S27,RS_S28,RS_S29,RS_S30,RS_S31,
+               RS_S8,RS_S9,RS_S10,RS_S11,RS_S12,RS_S13,RS_S14,RS_S15
+              ],first_mm_imreg,[])
         else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
           rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
               [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
@@ -4363,7 +4384,7 @@ unit cgcpu;
           hr : treference;
        begin
           if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
-            internalerror(2002090902);
+            internalerror(2002090909);
           if is_thumb32_imm(a) then
             list.concat(taicpu.op_reg_const(A_MOV,reg,a))
           else if is_thumb32_imm(not(a)) then
@@ -4410,7 +4431,7 @@ unit cgcpu;
            OS_S32:
              oppostfix:=PF_None;
            else
-             InternalError(200308299);
+             InternalError(2003082913);
          end;
          if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
            begin
@@ -4681,7 +4702,7 @@ unit cgcpu;
             else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
               begin
                 if l1>32 then{roozbeh does this ever happen?}
-                  internalerror(200308296);
+                  internalerror(2003082911);
                 shifterop_reset(so);
                 so.shiftmode:=SM_LSL;
                 so.shiftimm:=l1;
@@ -4691,7 +4712,7 @@ unit cgcpu;
             else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
               begin
                 if l1>32 then{does this ever happen?}
-                  internalerror(201205181);
+                  internalerror(2012051803);
                 shifterop_reset(so);
                 so.shiftmode:=SM_LSL;
                 so.shiftimm:=l1;
@@ -4753,7 +4774,7 @@ unit cgcpu;
            OP_ROL:
               begin
                 if not(size in [OS_32,OS_S32]) then
-                   internalerror(2008072801);
+                   internalerror(2008072806);
                 { simulate ROL by ror'ing 32-value }
                 tmpreg:=getintregister(list,OS_32);
                 list.concat(taicpu.op_reg_const(A_MOV,tmpreg,32));
@@ -5126,6 +5147,17 @@ unit cgcpu;
 
                 if ref.refaddr=addr_gottpoff then
                   current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
+                else if ref.refaddr=addr_tlsgd then
+                  current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsgd,ref.symbol,ref.relsymbol,ref.offset))
+                else if ref.refaddr=addr_tlsdesc then
+                  current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_tlsdesc,ref.symbol,ref.relsymbol,ref.offset))
+                else if ref.refaddr=addr_tpoff then
+                  begin
+                    if assigned(ref.relsymbol) or (ref.offset<>0) then
+                      Internalerror(2019092807);
+
+                    current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_tpoff,ref.symbol));
+                  end
                 else
                   current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
 
@@ -5192,7 +5224,7 @@ unit cgcpu;
         if ((op in [A_LDF,A_STF,A_FLDS,A_FLDD,A_FSTS,A_FSTD]) or (op=A_VSTR) or (op=A_VLDR)) and (ref.index<>NR_NO) then
           begin
             if ref.shiftmode<>SM_none then
-              internalerror(200309121);
+              internalerror(2003091202);
             if tmpreg<>NR_NO then
               begin
                 if ref.base=tmpreg then
@@ -5206,7 +5238,7 @@ unit cgcpu;
                 else
                   begin
                     if ref.index<>tmpreg then
-                      internalerror(200403161);
+                      internalerror(2004031602);
                     if ref.signindex<0 then
                       list.concat(taicpu.op_reg_reg_reg(A_SUB,tmpreg,ref.base,tmpreg))
                     else
@@ -5238,7 +5270,7 @@ unit cgcpu;
             instr:=setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32);
             list.Concat(instr);
             add_move_instruction(instr);
-            maybe_check_for_fpu_exception(list);
+            { VMOV cannot generate an FPU exception, so we do not need a check here }
           end
         else if (fromsize=OS_F64) and
           (tosize=OS_F64) then
@@ -5264,7 +5296,7 @@ unit cgcpu;
     procedure tthumb2cgarm.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
       begin
         handle_load_store(list,A_VSTR,PF_None,reg,ref);
-        maybe_check_for_fpu_exception(list);
+        { VSTR cannot generate an FPU exception, so we do not need a check here }
       end;
 
 
@@ -5284,7 +5316,7 @@ unit cgcpu;
           (fromsize=OS_F32) then
           begin
             list.Concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg));
-            maybe_check_for_fpu_exception(list);
+            { VMOV cannot generate an FPU exception, so we do not need a check here }
           end
         else
           internalerror(2012100814);
@@ -5345,7 +5377,7 @@ unit cgcpu;
               list.concat(taicpu.op_reg_reg(A_SBC,regdst.reghi,regsrc.reghi));
             end;
           else
-            internalerror(2003083101);
+            internalerror(2003083105);
         end;
       end;
 
@@ -5399,7 +5431,7 @@ unit cgcpu;
               list.concat(taicpu.op_reg_reg(A_SBC,reg.reghi,tmpreg));
             end;
           else
-            internalerror(2003083101);
+            internalerror(2003083106);
         end;
       end;
 

+ 36 - 9
compiler/arm/cpubase.pas

@@ -44,10 +44,6 @@ unit cpubase;
 
     type
       TAsmOp= {$i armop.inc}
-      {This is a bit of a hack, because there are more than 256 ARM Assembly Ops
-       But FPC currently can't handle more than 256 elements in a set.}
-      TCommonAsmOps = Set of A_None .. A_UADD16;
-
       { This should define the array of instructions as string }
       op2strtable=array[tasmop] of string[11];
 
@@ -56,6 +52,14 @@ unit cpubase;
       firstop = low(tasmop);
       { Last value of opcode enumeration  }
       lastop  = high(tasmop);
+      { Last value of opcode for TCommonAsmOps set below  }
+      LastCommonAsmOp = A_UADD16;
+
+
+    type
+      {This is a bit of a hack, because there are more than 256 ARM Assembly Ops
+       But FPC currently can't handle more than 256 elements in a set.}
+      TCommonAsmOps = Set of A_None .. LastCommonAsmOp;
 
 {*****************************************************************************
                                   Registers
@@ -113,9 +117,6 @@ unit cpubase;
 
       VOLATILE_INTREGISTERS_DARWIN = [RS_R0..RS_R3,RS_R9,RS_R12..RS_R14];
 
-    type
-      totherregisterset = set of tregisterindex;
-
 {*****************************************************************************
                           Instruction post fixes
 *****************************************************************************}
@@ -368,6 +369,9 @@ unit cpubase;
     function inverse_cond(const c: TAsmCond): TAsmCond; {$ifdef USEINLINE}inline;{$endif USEINLINE}
     function conditions_equal(const c1, c2: TAsmCond): boolean; {$ifdef USEINLINE}inline;{$endif USEINLINE}
 
+    { Checks if Subset is a subset of c (e.g. "less than" is a subset of "less than or equal" }
+    function condition_in(const Subset, c: TAsmCond): Boolean;
+
     procedure shifterop_reset(var so : tshifterop); {$ifdef USEINLINE}inline;{$endif USEINLINE}
     function is_pc(const r : tregister) : boolean; {$ifdef USEINLINE}inline;{$endif USEINLINE}
 
@@ -446,7 +450,7 @@ unit cpubase;
             begin
               case getsubreg(reg) of
                 R_SUBFD,
-                R_SUBWHOLE:
+                R_SUBMMWHOLE:
                   result:=OS_F64;
                 R_SUBFS:
                   result:=OS_F32;
@@ -543,6 +547,26 @@ unit cpubase;
       end;
 
 
+    { Checks if Subset is a subset of c (e.g. "less than" is a subset of "less than or equal" }
+    function condition_in(const Subset, c: TAsmCond): Boolean;
+      begin
+        Result := (c = C_None) or conditions_equal(Subset, c);
+
+        { Please update as necessary. [Kit] }
+        if not Result then
+          case Subset of
+            C_EQ:
+              Result := (c in [C_GE, C_LE]);
+            C_LT:
+              Result := (c in [C_LE]);
+            C_GT:
+              Result := (c in [C_GE]);
+            else
+              Result := False;
+          end;
+      end;
+
+
     function is_shifter_const(d : aint;var imm_shift : byte) : boolean;
       var
          i : longint;
@@ -615,6 +639,9 @@ unit cpubase;
           end;
       end;
     
+{$push}
+{ Disable range and overflow checking here }
+{$R-}{$Q-}        
     function is_continuous_mask(d : aword;var lsb, width: byte) : boolean;
       var
         msb : byte;
@@ -623,9 +650,9 @@ unit cpubase;
         msb:=BsrDword(d);
         
         width:=msb-lsb+1;
-        
         result:=(lsb<>255) and (msb<>255) and (aword(((1 shl (msb-lsb+1))-1) shl lsb) = d);
       end;
+{$pop}
 
 
     function split_into_shifter_const(value : aint;var imm1: dword; var imm2: dword) : boolean;

+ 22 - 6
compiler/arm/cpuelf.pas

@@ -28,7 +28,7 @@ interface
 implementation
 
   uses
-    globtype,cutils,cclasses,
+    globtype,globals,cutils,cclasses,
     verbose, elfbase,
     systems,aasmbase,ogbase,ogelf,assemble;
 
@@ -335,9 +335,24 @@ implementation
           result:=R_ARM_THM_CALL;
         RELOC_GOT32:
           result:=R_ARM_GOT_BREL;
+        RELOC_TPOFF:
+          if current_settings.tlsmodel=tlsm_initial_exec then
+            result:=R_ARM_TLS_IE32
+          else if current_settings.tlsmodel=tlsm_local_exec then
+            result:=R_ARM_TLS_LE32
+          else
+            Internalerror(2019092901);
+        RELOC_TLSGD:
+          result:=R_ARM_TLS_GD32;
+        RELOC_TLSDESC:
+          result:=R_ARM_TLS_GOTDESC;
+        RELOC_TLS_CALL:
+          result:=R_ARM_TLS_CALL;
+        RELOC_ARM_CALL:
+          result:=R_ARM_CALL;
+        RELOC_DTPOFF:
+          result:=R_ARM_TLS_LDO32;
       else
-        result:=0;
-        writeln(objrel.typ);
         InternalError(2012110602);
       end;
     end;
@@ -908,7 +923,7 @@ implementation
           else
             begin
               writeln(objreloc.ftype);
-              internalerror(200604014);
+              internalerror(2006040107);
             end;
           end
         else           { not relocsec.Used }
@@ -956,12 +971,13 @@ implementation
          idtxt  : 'ELF';
          asmbin : '';
          asmcmd : '';
-         supported_targets : [system_arm_embedded,system_arm_darwin,
+         supported_targets : [system_arm_embedded,system_arm_ios,
                               system_arm_linux,system_arm_netbsd,
                               system_arm_gba,system_arm_nds,
-                              system_arm_aros];
+                              system_arm_aros,system_arm_freertos];
          flags : [af_outputbinary,af_smartlink_sections,af_supports_dwarf];
          labelprefix : '.L';
+         labelmaxlen : -1;
          comment : '';
          dollarsign: '$';
        );

+ 44 - 26
compiler/arm/cpuinfo.pas

@@ -54,6 +54,9 @@ Type
        cpu_armv7r,
        cpu_armv7m,
        cpu_armv7em
+       { when new elements added afterwards,
+         update class procedure tarmnodeutils.InsertObjectInfo; in narmutil.pas
+       }
       );
 
    tinstructionset = (is_thumb,is_arm);
@@ -70,17 +73,16 @@ Type
       fpu_vfpv3,
       fpu_neon_vfpv3,
       fpu_vfpv3_d16,
-      fpu_fpv4_s16,
+      fpu_fpv4_s16,     { same as fpu_fpv4_sp_d32, kept for backwards compatibility }
       fpu_vfpv4,
+      fpu_fpv4_sp_d16,  { 32 registers single precision, for load/store/move they can be accessed as 16 double registers }
       fpu_neon_vfpv4
-      { when new elements added afterwards, update also fpu_vfp_last below }
+      { when new elements added afterwards, update
+        class procedure tarmnodeutils.InsertObjectInfo; in narmutil.pas }
      );
 
 Const
-   fpu_vfp_first = fpu_vfpv2;
-   fpu_vfp_last  = fpu_neon_vfpv4;
-
-  fputypestrllvm : array[tfputype] of string[14] = ('',
+  fputypestrllvm : array[tfputype] of string[15] = ('',
     '',
     '',
     '',
@@ -92,6 +94,7 @@ Const
     'fpu=vfpv3-d16',
     'fpu=vfpv4-s16',
     'fpu=vfpv4',
+    'fpu=fpv4-sp-d16',
     'fpu=neon-vfpv4'
   );
 
@@ -344,6 +347,9 @@ Type
       ct_stm32f756xe,
       ct_stm32f756xg,
 
+      ct_stm32g071rb,
+      ct_nucleog071rb,
+
       { TI - Fury Class - 64 K Flash, 16 K SRAM Devices }
       ct_lm3s1110,
       ct_lm3s1133,
@@ -504,6 +510,9 @@ Type
       ct_nrf52832_xxaa,
       ct_nrf52840_xxaa,
 
+      { Raspberry Pi 2 }
+      ct_raspi2,
+
       // generic Thumb2 target
       ct_thumb2bare
      );
@@ -563,7 +572,8 @@ Const
      'ARMV7EM'
    );
 
-   fputypestr : array[tfputype] of string[10] = ('',
+   fputypestr : array[tfputype] of string[11] = (
+     'NONE',
      'SOFT',
      'LIBGCC',
      'FPA',
@@ -575,6 +585,7 @@ Const
      'VFPV3_D16',
      'FPV4_S16',
      'VFPV4',
+     'FPV4_SP_D16',
      'NEON_VFPV4'
    );
 
@@ -811,8 +822,8 @@ Const
       (controllertypestr:'STM32F401RD';     controllerunitstr:'STM32F401XX';      cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$08000000; flashsize:$00060000; srambase:$20000000; sramsize:$00018000),
       (controllertypestr:'STM32F401VD';     controllerunitstr:'STM32F401XX';      cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$08000000; flashsize:$00060000; srambase:$20000000; sramsize:$00018000),
       (controllertypestr:'STM32F401CE';     controllerunitstr:'STM32F401XX';      cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$08000000; flashsize:$00080000; srambase:$20000000; sramsize:$00018000),
-      (controllertypestr:'STM32F401RE';     controllerunitstr:'STM32F401XX';      cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$08000000; flashsize:$00080000; srambase:$20000000; sramsize:$00018000),
-      (controllertypestr:'NUCLEOF401RE';    controllerunitstr:'STM32F401XX';      cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$08000000; flashsize:$00080000; srambase:$20000000; sramsize:$00018000),
+      (controllertypestr:'STM32F401RE';     controllerunitstr:'STM32F401XE';      cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$08000000; flashsize:$00080000; srambase:$20000000; sramsize:$00018000),
+      (controllertypestr:'NUCLEOF401RE';    controllerunitstr:'STM32F401XE';      cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$08000000; flashsize:$00080000; srambase:$20000000; sramsize:$00018000),
       (controllertypestr:'STM32F401VE';     controllerunitstr:'STM32F401XX';      cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$08000000; flashsize:$00080000; srambase:$20000000; sramsize:$00018000),
       (controllertypestr:'STM32F407VG';     controllerunitstr:'STM32F407XX';      cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$08000000; flashsize:$00100000; srambase:$20000000; sramsize:$00020000),
       (controllertypestr:'DISCOVERYF407VG'; controllerunitstr:'STM32F407XX';      cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$08000000; flashsize:$00100000; srambase:$20000000; sramsize:$00020000),
@@ -862,6 +873,9 @@ Const
       (controllertypestr:'STM32F756XE';     controllerunitstr:'STM32F756';        cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$08000000; flashsize:$00080000; srambase:$20010000; sramsize:$00040000),
       (controllertypestr:'STM32F756XG';     controllerunitstr:'STM32F756';        cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$08000000; flashsize:$00100000; srambase:$20010000; sramsize:$00040000),
 
+      (controllertypestr:'STM32G071RB'         ; controllerunitstr:'STM32G071XX'         ; cputype:cpu_armv6m; fputype:fpu_soft; flashbase:$08000000; flashsize:$00020000; srambase:$20000000; sramsize:$00009000),
+      (controllertypestr:'NUCLEOG071RB'        ; controllerunitstr:'STM32G071XX'         ; cputype:cpu_armv6m; fputype:fpu_soft; flashbase:$08000000; flashsize:$00020000; srambase:$20000000; sramsize:$00009000),
+
       (controllertypestr:'LM3S1110';	controllerunitstr:'LM3FURY';	cputype:cpu_armv7m; fputype:fpu_soft; flashbase:$00000000;	flashsize:$00010000;	srambase:$20000000;	sramsize:$00004000),
       (controllertypestr:'LM3S1133';	controllerunitstr:'LM3FURY';	cputype:cpu_armv7m; fputype:fpu_soft; flashbase:$00000000;	flashsize:$00010000;	srambase:$20000000;	sramsize:$00004000),
       (controllertypestr:'LM3S1138';	controllerunitstr:'LM3FURY';	cputype:cpu_armv7m; fputype:fpu_soft; flashbase:$00000000;	flashsize:$00010000;	srambase:$20000000;	sramsize:$00004000),
@@ -1020,6 +1034,9 @@ Const
       (controllertypestr:'NRF52832_XXAA'; controllerunitstr:'NRF52'; cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$00000000; flashsize:$00080000; srambase:$20000000; sramsize:$00010000),
       (controllertypestr:'NRF52840_XXAA'; controllerunitstr:'NRF52'; cputype:cpu_armv7em; fputype:fpu_soft; flashbase:$00000000; flashsize:$00080000; srambase:$20000000; sramsize:$00010000),
       
+      { Raspberry Pi 2 }
+      (controllertypestr:'RASPI2'; controllerunitstr:'RASPI2'; cputype:cpu_armv7a; fputype:fpu_vfpv4; flashbase:$00000000; flashsize:$00000000; srambase:$00008000; sramsize:$10000000),
+
       { Bare bones }
       (controllertypestr:'THUMB2_BARE';	controllerunitstr:'THUMB2_BARE';	cputype:cpu_armv7m; fputype:fpu_soft; flashbase:$00000000;	flashsize:$00002000;	srambase:$20000000;	sramsize:$00000400)
     );
@@ -1030,12 +1047,12 @@ Const
                                  genericlevel3optimizerswitches-
                                  { no need to write info about those }
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
-                                 [cs_opt_regvar,cs_opt_loopunroll,cs_opt_tailrecursion,
+                                 [{$ifndef llvm}cs_opt_regvar,{$endif}cs_opt_loopunroll,cs_opt_tailrecursion,
                                   cs_opt_stackframe,cs_opt_nodecse,cs_opt_reorder_fields,cs_opt_fastmath,cs_opt_forcenostackframe];
 
    level1optimizerswitches = genericlevel1optimizerswitches;
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches +
-     [cs_opt_regvar,cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse];
+     [{$ifndef llvm}cs_opt_regvar,{$endif}cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse];
    level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [cs_opt_scheduler{,cs_opt_loopunroll}];
    level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [];
 
@@ -1060,9 +1077,9 @@ Const
    tfpuflags =
       (
         FPUARM_HAS_FPA,                { fpu is an fpa based FPU                                                               }
-        FPUARM_HAS_VFP_EXTENSION,      { fpu is a vfp extension                                                                }
+        FPUARM_HAS_VFP_EXTENSION,      { fpu is a vfp extension, it means at least single operation support                    }
         FPUARM_HAS_VFP_DOUBLE,         { vfp has double support                                                                }
-        FPUARM_HAS_VFP_SINGLE_ONLY,    { vfp has only single support, disjunct to FPUARM_HAS_VFP_DOUBLE, for error checking    }
+        FPUARM_HAS_VFP_DOUBLE_MOVLDST, { vfp has only single support, but MOV, LD, ST can be done on pairs as double           }
         FPUARM_HAS_32REGS,             { vfp has 32 regs, without this flag, 16 are assumed                                    }
         FPUARM_HAS_VMOV_CONST,         { vmov supports (some) real constants                                                   }
         FPUARM_HAS_EXCEPTION_TRAPPING, { vfp does exceptions trapping                                                          }
@@ -1094,19 +1111,20 @@ Const
      );
 
      fpu_capabilities : array[tfputype] of set of tfpuflags =
-       ( { fpu_none       } [],
-         { fpu_soft       } [],
-         { fpu_libgcc     } [],
-         { fpu_fpa        } [FPUARM_HAS_FPA],
-         { fpu_fpa10      } [FPUARM_HAS_FPA],
-         { fpu_fpa11      } [FPUARM_HAS_FPA],
-         { fpu_vfpv2      } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE],
-         { fpu_vfpv3      } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST],
-         { fpu_neon_vfpv3 } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST,FPUARM_HAS_NEON],
-         { fpu_vfpv3_d16  } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_VMOV_CONST],
-         { fpu_fpv4_s16   } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_SINGLE_ONLY,FPUARM_HAS_VMOV_CONST],
-         { fpu_vfpv4      } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST,FPUARM_HAS_FMA],
-         { fpu_neon_vfpv4 } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST,FPUARM_HAS_NEON,FPUARM_HAS_FMA]
+       ( { fpu_none         } [],
+         { fpu_soft         } [],
+         { fpu_libgcc       } [],
+         { fpu_fpa          } [FPUARM_HAS_FPA],
+         { fpu_fpa10        } [FPUARM_HAS_FPA],
+         { fpu_fpa11        } [FPUARM_HAS_FPA],
+         { fpu_vfpv2        } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE],
+         { fpu_vfpv3        } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST],
+         { fpu_neon_vfpv3   } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST,FPUARM_HAS_NEON],
+         { fpu_vfpv3_d16    } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_VMOV_CONST],
+         { fpu_fpv4_s16     } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_32REGS,FPUARM_HAS_VFP_DOUBLE_MOVLDST,FPUARM_HAS_VMOV_CONST,FPUARM_HAS_FMA],
+         { fpu_vfpv4        } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST,FPUARM_HAS_FMA],
+         { fpu_fpv4_sp_d16  } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_32REGS,FPUARM_HAS_VFP_DOUBLE_MOVLDST,FPUARM_HAS_VMOV_CONST,FPUARM_HAS_FMA],
+         { fpu_neon_vfpv4   } [FPUARM_HAS_VFP_EXTENSION,FPUARM_HAS_VFP_DOUBLE,FPUARM_HAS_32REGS,FPUARM_HAS_VMOV_CONST,FPUARM_HAS_NEON,FPUARM_HAS_FMA]
        );
 
    { contains all CPU supporting any kind of thumb instruction set }

+ 2 - 1
compiler/arm/cpunode.pas

@@ -46,7 +46,8 @@ unit cpunode;
        narmcnv,
        narmcon,
        narmset,
-       narmmem
+       narmmem,
+       narmutil
 {$else}
        llvmnode
 {$endif}

+ 8 - 8
compiler/arm/cpupara.pas

@@ -40,7 +40,7 @@ unit cpupara;
           function get_saved_registers_int(calloption : tproccalloption):tcpuregisterarray;override;
           function push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;override;
           function ret_in_param(def:tdef;pd:tabstractprocdef):boolean;override;
-          procedure getintparaloc(list: TAsmList; pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);override;
+          procedure getcgtempparaloc(list: TAsmList; pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
           function create_varargs_paraloc_info(p : tabstractprocdef; side: tcallercallee; varargspara:tvarargsparalist):longint;override;
           function get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
@@ -59,14 +59,14 @@ unit cpupara;
 
     uses
        verbose,systems,cutils,
-       defutil,symsym,symcpu,symtable,
+       defutil,symsym,symcpu,symtable,symutil,
        { PowerPC uses procinfo as well in cpupara, so this should not hurt }
        procinfo;
 
 
     function tcpuparamanager.get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;
       begin
-        if (target_info.system<>system_arm_darwin) then
+        if (target_info.system<>system_arm_ios) then
           result:=VOLATILE_INTREGISTERS
         else
           result:=VOLATILE_INTREGISTERS_DARWIN;
@@ -94,7 +94,7 @@ unit cpupara;
       end;
 
 
-    procedure tcpuparamanager.getintparaloc(list: TAsmList; pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);
+    procedure tcpuparamanager.getcgtempparaloc(list: TAsmList; pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);
       var
         paraloc : pcgparalocation;
         psym : tparavarsym;
@@ -149,7 +149,7 @@ unit cpupara;
                 getparaloc:=LOC_MMREGISTER
               else if (calloption in cdecl_pocalls) or
                  (cs_fp_emulation in current_settings.moduleswitches) or
-                 (current_settings.fputype in [fpu_vfp_first..fpu_vfp_last]) then
+                 (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) then
                 { the ARM eabi also allows passing VFP values via VFP registers,
                   but Mac OS X doesn't seem to do that and linux only does it if
                   built with the "-mfloat-abi=hard" option }
@@ -294,7 +294,7 @@ unit cpupara;
                   for i:=0 to trecorddef(def).symtable.SymList.count-1 do
                     begin
                       sym:=tsym(trecorddef(def).symtable.SymList[i]);
-                      if sym.typ<>fieldvarsym then
+                      if not is_normal_fieldvarsym(sym) then
                         continue;
                       { bitfield -> ignore }
                       if (trecordsymtable(trecorddef(def).symtable).usefieldalignment=bit_alignment) and
@@ -782,7 +782,7 @@ unit cpupara;
               end
             else if (p.proccalloption in [pocall_softfloat]) or
                (cs_fp_emulation in current_settings.moduleswitches) or
-               (current_settings.fputype in [fpu_vfp_first..fpu_vfp_last]) then
+               (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) then
               begin
                 case retcgsize of
                   OS_64,
@@ -915,7 +915,7 @@ unit cpupara;
               end;
           end
         else
-          internalerror(200410231);
+          internalerror(2004102306);
 
         create_funcretloc_info(p,side);
       end;

+ 1 - 1
compiler/arm/cpupi.pas

@@ -88,7 +88,7 @@ unit cpupi;
           end;
         if tg.direction = -1 then
           begin
-            if (target_info.system<>system_arm_darwin) then
+            if (target_info.system<>system_arm_ios) then
               { Non-Darwin, worst case: r4-r10,r11,r13,r14,r15 is saved -> -28-16, but we
                 always adjust the frame pointer to point to the first stored
                 register (= last register in list above) -> + 4 }

+ 7 - 0
compiler/arm/cputarg.pas

@@ -62,9 +62,16 @@ implementation
     {$ifndef NOTARGETBSD}
       ,t_bsd
     {$endif}
+    {$ifndef NOTARGETDARWIN}
+      ,t_darwin
+    {$endif}
     {$ifndef NOTARGETAROS}
       ,t_aros
     {$endif}
+    {$ifndef NOTARGETFREERTOS}
+      ,t_freertos
+    {$endif}
+
 
 {**************************************
              Assemblers

+ 1 - 1
compiler/arm/hlcgcpu.pas

@@ -106,7 +106,7 @@ implementation
         l : TAsmLabel;
       begin
         if (procdef.extnumber=$ffff) then
-          Internalerror(200006139);
+          Internalerror(2000061311);
         if GenerateThumbCode then
           begin
             reference_reset_base(href,voidpointertype,NR_R0,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),ctempposinvalid,sizeof(pint),[]);

+ 9 - 67
compiler/arm/narmadd.pas

@@ -207,7 +207,7 @@ interface
             end;
           fpu_soft:
             { this case should be handled already by pass1 }
-            internalerror(200308252);
+            internalerror(2003082503);
           else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
             begin
               { force mmreg as location, left right doesn't matter
@@ -240,7 +240,7 @@ interface
                  location.register,left.location.register,right.location.register),pf));
               cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
             end
-          else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
+          else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
             begin
               { force mmreg as location, left right doesn't matter
                 as both will be in a fpureg }
@@ -260,7 +260,7 @@ interface
                 slashn :
                   op:=A_VDIV;
                 else
-                  internalerror(2009111401);
+                  internalerror(2009111404);
               end;
 
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(op, location.register,left.location.register,right.location.register), PF_F32));
@@ -326,7 +326,7 @@ interface
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VMRS,NR_APSR_nzcv,NR_FPSCR));
               location.resflags:=GetFpuResFlags;
             end
-          else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
+          else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
             begin
               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
@@ -576,77 +576,19 @@ interface
           end;
       end;
 
+
     function tarmaddnode.first_addfloat: tnode;
-      var
-        procname: string[31];
-        { do we need to reverse the result ? }
-        notnode : boolean;
-        fdef : tdef;
       begin
         result := nil;
-        notnode := false;
 
-        if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
+        if (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype]) and
+           not(FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype]) then
           begin
             case tfloatdef(left.resultdef).floattype of
               s32real:
-                begin
-                  result:=nil;
-                  notnode:=false;
-                end;
+                ;
               s64real:
-                begin
-                  fdef:=search_system_type('FLOAT64').typedef;
-                  procname:='float64';
-
-                  case nodetype of
-                    addn:
-                      procname:=procname+'_add';
-                    muln:
-                      procname:=procname+'_mul';
-                    subn:
-                      procname:=procname+'_sub';
-                    slashn:
-                      procname:=procname+'_div';
-                    ltn:
-                      procname:=procname+'_lt';
-                    lten:
-                      procname:=procname+'_le';
-                    gtn:
-                      begin
-                        procname:=procname+'_lt';
-                        swapleftright;
-                      end;
-                    gten:
-                      begin
-                        procname:=procname+'_le';
-                        swapleftright;
-                      end;
-                    equaln:
-                      procname:=procname+'_eq';
-                    unequaln:
-                      begin
-                        procname:=procname+'_eq';
-                        notnode:=true;
-                      end;
-                    else
-                      CGMessage3(type_e_operator_not_supported_for_types,node2opstr(nodetype),left.resultdef.typename,right.resultdef.typename);
-                  end;
-
-                  if nodetype in [ltn,lten,gtn,gten,equaln,unequaln] then
-                    resultdef:=pasbool1type;
-                  result:=ctypeconvnode.create_internal(ccallnode.createintern(procname,ccallparanode.create(
-                      ctypeconvnode.create_internal(right,fdef),
-                      ccallparanode.create(
-                        ctypeconvnode.create_internal(left,fdef),nil))),resultdef);
-
-                  left:=nil;
-                  right:=nil;
-
-                  { do we need to reverse the result }
-                  if notnode then
-                    result:=cnotnode.create(result);
-                end;
+                result:=first_addfloat_soft;
               else
                 internalerror(2019050933);
             end;

+ 1 - 1
compiler/arm/narmcal.pas

@@ -83,7 +83,7 @@ implementation
          (target_info.abi<>abi_eabihf) and
          (procdefinition.proccalloption<>pocall_hardfloat) and
          ((cs_fp_emulation in current_settings.moduleswitches) or
-          (current_settings.fputype in [fpu_vfp_first..fpu_vfp_last])) then
+          (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype])) then
         begin
           { keep the fpu values in integer registers for now, the code
             generator will move them to memory or an mmregister when necessary

+ 19 - 32
compiler/arm/narmcnv.pas

@@ -32,33 +32,17 @@ interface
        tarmtypeconvnode = class(tcgtypeconvnode)
          protected
            function first_int_to_real: tnode;override;
-           function first_real_to_real: tnode; override;
-         { procedure second_int_to_int;override; }
-         { procedure second_string_to_string;override; }
-         { procedure second_cstring_to_pchar;override; }
-         { procedure second_string_to_chararray;override; }
-         { procedure second_array_to_pointer;override; }
-         // function first_int_to_real: tnode; override;
-         { procedure second_pointer_to_array;override; }
-         { procedure second_chararray_to_string;override; }
-         { procedure second_char_to_string;override; }
+           function first_real_to_real: tnode;override;
            procedure second_int_to_real;override;
-         // procedure second_real_to_real;override;
-         { procedure second_cord_to_pointer;override; }
-         { procedure second_proc_to_procvar;override; }
-         { procedure second_bool_to_int;override; }
            procedure second_int_to_bool;override;
-         { procedure second_load_smallset;override;  }
-         { procedure second_ansistring_to_pchar;override; }
-         { procedure second_pchar_to_string;override; }
-         { procedure second_class_to_intf;override; }
-         { procedure second_char_to_char;override; }
        end;
 
 implementation
 
    uses
-      verbose,globtype,globals,symdef,aasmbase,aasmtai,aasmdata,symtable,
+      verbose,globtype,globals,
+      systems,
+      symdef,aasmbase,aasmtai,aasmdata,symtable,
       defutil,
       cgbase,cgutils,
       pass_1,pass_2,procinfo,ncal,
@@ -78,7 +62,8 @@ implementation
 {$ifdef cpufpemu}
           (current_settings.fputype=fpu_soft) or
 {$endif cpufpemu}
-          (FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype]) then
+          (not(FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype]) and
+           not(FPUARM_HAS_FPA in fpu_capabilities[current_settings.fputype])) then
           result:=inherited first_int_to_real
         else
           begin
@@ -117,17 +102,19 @@ implementation
               fpu_fpa10,
               fpu_fpa11:
                 expectloc:=LOC_FPUREGISTER;
-              fpu_vfp_first..fpu_vfp_last:
-                expectloc:=LOC_MMREGISTER;
+              else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
+                expectloc:=LOC_MMREGISTER
               else
                 internalerror(2009112702);
             end;
           end;
       end;
 
+
     function tarmtypeconvnode.first_real_to_real: tnode;
       begin
-        if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
+        if (current_settings.fputype=fpu_soft) and
+           not (target_info.system in systems_wince) then
           begin
             case tfloatdef(left.resultdef).floattype of
               s32real:
@@ -141,7 +128,7 @@ implementation
                       left:=nil;
                     end;
                   else
-                    internalerror(200610151);
+                    internalerror(2006101504);
                 end;
               s64real:
                 case tfloatdef(resultdef).floattype of
@@ -154,10 +141,10 @@ implementation
                       left:=nil;
                     end;
                   else
-                    internalerror(200610152);
+                    internalerror(2006101505);
                 end;
               else
-                internalerror(200610153);
+                internalerror(2006101506);
             end;
             left:=nil;
             firstpass(result);
@@ -236,7 +223,7 @@ implementation
                           end;
                       end;
                     else
-                      internalerror(200410031);
+                      internalerror(2004100307);
                   end;
               end;
             end;
@@ -255,13 +242,13 @@ implementation
                 location.register,left.location.register),
                 signedprec2vfppf[signed,location.size]));
             end
-          else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
+          else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
             begin
               location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
               signed:=left.location.size=OS_S32;
               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
               if (left.location.size<>OS_F32) then
-                internalerror(2009112703);
+                internalerror(2009112704);
               if left.location.size<>location.size then
                 location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size)
               else
@@ -273,7 +260,7 @@ implementation
             end
           else
             { should be handled in pass 1 }
-            internalerror(2019050934);
+            internalerror(2019050909);
         end;
       end;
 
@@ -371,7 +358,7 @@ implementation
                 tbasecgarm(cg).cgsetflags:=false;
               end;
             else
-              internalerror(200311301);
+              internalerror(2003113002);
          end;
          { load flags to register }
          location_reset(location,LOC_REGISTER,def_cgsize(resultdef));

+ 6 - 2
compiler/arm/narmcon.pas

@@ -55,7 +55,9 @@ interface
       begin
         result:=nil;
         if (FPUARM_HAS_VMOV_CONST in fpu_capabilities[current_settings.fputype]) and
-           IsVFPFloatImmediate(tfloatdef(resultdef).floattype,value_real) then
+           IsVFPFloatImmediate(tfloatdef(resultdef).floattype,value_real) and
+           ((tfloatdef(resultdef).floattype=s32real) or
+            (FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[init_settings.fputype])) then
            expectloc:=LOC_MMREGISTER
          else
            expectloc:=LOC_CREFERENCE;
@@ -76,7 +78,9 @@ interface
 
       begin
         if (FPUARM_HAS_VMOV_CONST in fpu_capabilities[current_settings.fputype]) and
-          IsVFPFloatImmediate(tfloatdef(resultdef).floattype,value_real) then
+          IsVFPFloatImmediate(tfloatdef(resultdef).floattype,value_real) and
+           ((tfloatdef(resultdef).floattype=s32real) or
+            (FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[init_settings.fputype])) then
           begin
             location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
             location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);

+ 13 - 13
compiler/arm/narminl.pas

@@ -86,7 +86,12 @@ implementation
                  location.loc := LOC_FPUREGISTER;
                end;
             end;
-          fpu_vfp_first..fpu_vfp_last:
+          fpu_soft:
+            begin
+              hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
+              location_copy(location,left.location);
+            end
+          else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
             begin
               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
               location_copy(location,left.location);
@@ -95,11 +100,6 @@ implementation
                  location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
                  location.loc := LOC_MMREGISTER;
                end;
-            end;
-          fpu_soft:
-            begin
-              hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
-              location_copy(location,left.location);
             end
           else
             internalerror(2009111801);
@@ -125,7 +125,7 @@ implementation
                 expectloc:=LOC_FPUREGISTER;
               else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
                 expectloc:=LOC_MMREGISTER
-              else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
+              else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
                 begin
                   if tfloatdef(left.resultdef).floattype=s32real then
                     expectloc:=LOC_MMREGISTER
@@ -153,7 +153,7 @@ implementation
                 expectloc:=LOC_FPUREGISTER;
               else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
                 expectloc:=LOC_MMREGISTER
-              else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
+              else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
                 begin
                   if tfloatdef(left.resultdef).floattype=s32real then
                     expectloc:=LOC_MMREGISTER
@@ -181,7 +181,7 @@ implementation
                 expectloc:=LOC_FPUREGISTER;
               else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype] then
                 expectloc:=LOC_MMREGISTER
-              else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
+              else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
                 begin
                   if tfloatdef(left.resultdef).floattype=s32real then
                     expectloc:=LOC_MMREGISTER
@@ -265,7 +265,7 @@ implementation
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VABS,location.register,left.location.register),pf));
               cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
             end
-          else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
+          else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
             begin
               current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg(A_VABS,location.register,left.location.register), PF_F32));
               cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
@@ -296,7 +296,7 @@ implementation
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(A_VMUL,location.register,left.location.register,left.location.register),pf));
               cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
             end
-          else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
+          else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
             begin
               current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg_reg(A_VMUL,location.register,left.location.register,left.location.register), PF_F32));
               cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
@@ -327,13 +327,13 @@ implementation
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VSQRT,location.register,left.location.register),pf));
               cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
             end
-          else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype] then
+          else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype] then
             begin
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VSQRT,location.register,left.location.register), PF_F32));
               cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
             end
           else
-            internalerror(2009111402);
+            internalerror(2009111405);
         end;
       end;
 

+ 93 - 16
compiler/arm/narmld.pas

@@ -41,7 +41,7 @@ implementation
     uses
       globals,verbose,
       cgbase,cgobj,cgutils,
-      aasmdata,
+      aasmdata,aasmcpu,
       systems,
       symcpu,symdef,
       nld,
@@ -67,21 +67,98 @@ implementation
               begin
                 if not(pi_uses_threadvar in current_procinfo.flags) then
                   internalerror(2012012101);
-                current_asmdata.getjumplabel(l);
-                reference_reset_symbol(href,current_asmdata.RefAsmSymbol(gvs.mangledname,AT_DATA),-8,sizeof(AInt),[]);
-                href.refaddr:=addr_gottpoff;
-                href.relsymbol:=l;
-                hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
-                cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,hregister);
-                cg.a_label(current_asmdata.CurrAsmList,l);
-                reference_reset(href,0,[]);
-                href.base:=NR_PC;
-                href.index:=hregister;
-                hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
-                cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,href,hregister);
-                location.reference.base:=current_procinfo.tlsoffset;
-                location.reference.index:=hregister;
-                handled:=true;
+                case current_settings.tlsmodel of
+                  tlsm_global_dynamic:
+                    begin
+{$ifdef use_tls_dialect_gnu}
+                      current_asmdata.getjumplabel(l);
+                      reference_reset_symbol(href,current_asmdata.RefAsmSymbol(gvs.mangledname,AT_TLS),-8,sizeof(AInt),[]);
+                      href.refaddr:=addr_tlsgd;
+                      href.relsymbol:=l;
+                      hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                      cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,hregister);
+                      cg.a_label(current_asmdata.CurrAsmList,l);
+                      cg.getcpuregister(current_asmdata.CurrAsmList,NR_R0);
+                      cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,OS_ADDR,hregister,NR_PC,NR_R0);
+                      cg.g_call(current_asmdata.CurrAsmList,'__tls_get_addr');
+                      cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_R0);
+                      hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                      cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,NR_R0,hregister);
+                      reference_reset(location.reference,location.reference.alignment,location.reference.volatility);
+                      location.reference.base:=hregister;
+{$else use_tls_dialect_gnu}
+                      { On arm, we use the gnu2 tls dialect. It has the advantage that it can be relaxed (optimized) by the linker,
+                        this is not possible with the gnu tls dialect.
+
+                        gnu2 is proposed and documented in
+                          Glauber de Oliveira Costa, Alexandre Oliva: Speeding Up Thread-Local Storage Access in DynamicLibraries in the ARM platform, 2006.
+                          Link: https://www.fsfla.org/~lxoliva/writeups/TLS/paper-lk2006.pdf
+                      }
+                      current_asmdata.getjumplabel(l);
+                      reference_reset_symbol(href,current_asmdata.RefAsmSymbol(gvs.mangledname,AT_TLS),0,sizeof(AInt),[]);
+                      href.refaddr:=addr_tlsdesc;
+                      href.relsymbol:=l;
+                      hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                      cg.getcpuregister(current_asmdata.CurrAsmList,NR_R0);
+                      cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,NR_R0);
+                      cg.a_label(current_asmdata.CurrAsmList,l);
+
+                      { we have to go the ugly way so we can set addr_tlscall }
+                      cg.allocallcpuregisters(current_asmdata.CurrAsmList);
+                      cg.a_call_name(current_asmdata.CurrAsmList,gvs.mangledname,false);
+                      with taicpu(current_asmdata.CurrAsmList.Last) do
+                        begin
+                          if opcode<>A_BL then
+                            Internalerror(2019092902);
+                          oper[0]^.ref^.refaddr:=addr_tlscall;
+                        end;
+                      cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
+
+                      cg.getcpuregister(current_asmdata.CurrAsmList,NR_R0);
+                      cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_R0);
+                      hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                      cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,NR_R0,hregister);
+                      reference_reset(location.reference,location.reference.alignment,location.reference.volatility);
+                      location.reference.base:=current_procinfo.tlsoffset;
+                      include(current_procinfo.flags,pi_needs_tls);
+                      location.reference.index:=hregister;
+{$endif use_tls_dialect_gnu}
+                      handled:=true;
+                    end;
+                  tlsm_initial_exec:
+                    begin
+                      current_asmdata.getjumplabel(l);
+                      reference_reset_symbol(href,current_asmdata.RefAsmSymbol(gvs.mangledname,AT_TLS),-8,sizeof(AInt),[]);
+                      href.refaddr:=addr_tpoff;
+                      href.relsymbol:=l;
+                      hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                      cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,hregister);
+                      cg.a_label(current_asmdata.CurrAsmList,l);
+                      reference_reset(href,0,[]);
+                      href.base:=NR_PC;
+                      href.index:=hregister;
+                      hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                      cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,href,hregister);
+                      location.reference.base:=current_procinfo.tlsoffset;
+                      include(current_procinfo.flags,pi_needs_tls);
+                      location.reference.index:=hregister;
+                      handled:=true;
+                    end;
+                  tlsm_local_exec:
+                    begin
+                      reference_reset_symbol(href,current_asmdata.RefAsmSymbol(gvs.mangledname,AT_TLS),0,sizeof(AInt),[]);
+                      href.refaddr:=addr_tpoff;
+                      hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                      cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,hregister);
+                      reference_reset(href,0,[]);
+                      location.reference.base:=current_procinfo.tlsoffset;
+                      include(current_procinfo.flags,pi_needs_tls);
+                      location.reference.index:=hregister;
+                      handled:=true;
+                    end;
+                  else
+                    Internalerror(2019092806);
+                end;
               end;
           end;
 

+ 11 - 19
compiler/arm/narmmat.pas

@@ -54,7 +54,7 @@ implementation
       globtype,compinnr,
       cutils,verbose,globals,constexp,
       aasmbase,aasmcpu,aasmtai,aasmdata,
-      defutil,
+      defutil,systems,
       symtype,symconst,symtable,
       cgbase,cgobj,hlcgobj,cgutils,
       pass_2,procinfo,
@@ -315,12 +315,9 @@ implementation
       var
         tmpreg : TRegister;
       begin
-        { if the location is LOC_JUMP, we do the secondpass after the
-          labels are allocated
-        }
+        secondpass(left);
         if not handle_locjump then
           begin
-            secondpass(left);
             case left.location.loc of
               LOC_FLAGS :
                 begin
@@ -358,17 +355,9 @@ implementation
         procname: string[31];
         fdef : tdef;
       begin
-        if (current_settings.fputype=fpu_soft) and
-           (left.resultdef.typ=floatdef) then
-          begin
-            result:=nil;
-            firstpass(left);
-            expectloc:=LOC_REGISTER;
-            exit;
-          end;
-
-        if not(FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[current_settings.fputype]) or
-          (tfloatdef(resultdef).floattype=s32real) then
+        if (FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[current_settings.fputype]) or
+           (target_info.system = system_arm_wince) or
+           is_single(resultdef) then
           exit(inherited pass_1);
 
         result:=nil;
@@ -376,7 +365,10 @@ implementation
         if codegenerror then
           exit;
 
-        if (left.resultdef.typ=floatdef) then
+        { if we get here and VFP support is on, there is no 64 bit VFP operation support available,
+          so in this case the software version needs to be called }
+        if (left.resultdef.typ=floatdef) and ((current_settings.fputype=fpu_soft) or
+          (FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[current_settings.fputype])) then
           begin
             case tfloatdef(resultdef).floattype of
               s64real:
@@ -428,7 +420,7 @@ implementation
                 OS_64:
                   cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_XOR,OS_32,tcgint($80000000),location.registerhi);
               else
-                internalerror(2014033101);
+                internalerror(2014033103);
               end;
             end
           else if FPUARM_HAS_VFP_DOUBLE in fpu_capabilities[init_settings.fputype] then
@@ -447,7 +439,7 @@ implementation
                 location.register,left.location.register), pf));
               cg.maybe_check_for_fpu_exception(current_asmdata.CurrAsmList);
             end
-          else if FPUARM_HAS_VFP_SINGLE_ONLY in fpu_capabilities[init_settings.fputype] then
+          else if FPUARM_HAS_VFP_EXTENSION in fpu_capabilities[init_settings.fputype] then
             begin
               hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
               location:=left.location;

+ 332 - 0
compiler/arm/narmutil.pas

@@ -0,0 +1,332 @@
+{
+    Copyright (c) 2019 by Florian Klämpfl
+
+    ARM version of some node tree helper routines
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit narmutil;
+
+{$i fpcdefs.inc}
+
+interface
+
+  uses
+    cclasses,ngenutil;
+
+  type
+    tarmnodeutils = class(tnodeutils)
+      class procedure InsertObjectInfo; override;
+      class procedure insert_init_final_table(entries: tfplist); override;
+    end;
+
+
+  implementation
+
+    uses
+      verbose,
+      systems,
+      globals,
+      cpuinfo,cpubase,
+      cgbase,cgutils,
+      aasmbase,aasmdata,aasmtai,aasmcpu,
+      symdef;
+
+    const
+      Tag_File = 1;
+      Tag_Section = 2;
+      Tag_Symbol = 3;
+      Tag_CPU_raw_name = 4;
+      Tag_CPU_name = 5;
+      Tag_CPU_arch = 6;
+      Tag_CPU_arch_profile = 7;
+      Tag_ARM_ISA_use = 8;
+      Tag_THUMB_ISA_use = 9;
+      Tag_FP_Arch = 10;
+      Tag_WMMX_arch = 11;
+      Tag_Advanced_SIMD_arch = 12;
+      Tag_PCS_config = 13;
+      Tag_ABI_PCS_R9_use = 14;
+      Tag_ABI_PCS_RW_data = 15;
+      Tag_ABI_PCS_RO_data = 16;
+      Tag_ABI_PCS_GOT_use = 17;
+      Tag_ABI_PCS_wchar_t = 18;
+      Tag_ABI_FP_rounding = 19;
+      Tag_ABI_FP_denormal = 20;
+      Tag_ABI_FP_exceptions = 21;
+      Tag_ABI_FP_user_exceptions = 22;
+      Tag_ABI_FP_number_model = 23;
+      Tag_ABI_align_needed = 24;
+      Tag_ABI_align8_preserved = 25;
+      Tag_ABI_enum_size = 26;
+      Tag_ABI_HardFP_use = 27;
+      Tag_ABI_VFP_args = 28;
+      Tag_ABI_WMMX_args = 29;
+      Tag_ABI_optimization_goals = 30;
+      Tag_ABI_FP_optimization_goals = 31;
+      Tag_compatiblity = 32;
+      Tag_CPU_unaligned_access = 34;
+      Tag_FP_HP_extension = 36;
+      Tag_ABI_FP_16bit_format = 38;
+      Tag_MPextension_use = 42;
+      Tag_DIV_use = 44;
+      Tag_nodefaults = 64;
+      Tag_also_compatible_with = 65;
+      Tag_conformance = 67;
+      Tag_T2EE_use = 66;
+      Tag_Virtualization_use = 68;
+
+    class procedure tarmnodeutils.InsertObjectInfo;
+      begin
+        inherited InsertObjectInfo;
+        { write eabi attributes to object file? }
+        if (target_info.system in [system_arm_linux]) and (target_info.abi in [abi_eabihf,abi_eabi]) then
+          begin
+            case current_settings.cputype of
+              cpu_armv3:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,''));
+                end;
+              cpu_armv4:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,1));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'4'));
+                end;
+              cpu_armv4t:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,2));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'4T'));
+                end;
+              cpu_armv5t:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,3));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'5T'));
+                end;
+              cpu_armv5te:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,4));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'5TE'));
+                end;
+              cpu_armv5tej:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,5));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'5TEJ'));
+                end;
+              cpu_armv6:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,6));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'6'));
+                end;
+              cpu_armv6k:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,9));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'6K'));
+                end;
+              cpu_armv6t2:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,8));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'T2'));
+                end;
+              cpu_armv6z:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,7));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'6Z'));
+                end;
+              cpu_armv6m:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,11));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'6-M'));
+                end;
+              cpu_armv7:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,10));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,0));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'7'));
+                end;
+              cpu_armv7a:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,10));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,$41));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'7-A'));
+                end;
+              cpu_armv7r:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,10));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,$52));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'7-R'));
+                end;
+              cpu_armv7m:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,10));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,$4D));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'7-M'));
+                end;
+              cpu_armv7em:
+                begin
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch,13));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_arch_profile,$4D));
+                  current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_CPU_name,'7E-M'));
+                end;
+              else
+                Internalerror(2019100602);
+            end;
+            case current_settings.fputype of
+              fpu_none,
+              fpu_soft,
+              fpu_libgcc,
+              fpu_fpa,
+              fpu_fpa10,
+              fpu_fpa11:
+                current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,0));
+              fpu_vfpv2:
+                current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,2));
+              fpu_vfpv3,
+              fpu_neon_vfpv3:
+                current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,3));
+              fpu_vfpv3_d16:
+                current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,4));
+              fpu_fpv4_sp_d16,
+              fpu_fpv4_s16:
+                current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,6));
+              fpu_vfpv4,
+              fpu_neon_vfpv4:
+                current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_FP_Arch,5));
+              { else not needed anymore PM 2020/04/13
+                Internalerror(2019100603); }
+            end;
+            if FPUARM_HAS_FMA in fpu_capabilities[current_settings.fputype] then
+              current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_Advanced_SIMD_arch,2))
+            else if FPUARM_HAS_NEON in fpu_capabilities[current_settings.fputype] then
+              current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_Advanced_SIMD_arch,1))
+            else
+              current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_Advanced_SIMD_arch,0));
+            current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_ARM_ISA_use,1));
+            if CPUARM_HAS_THUMB2 in cpu_capabilities[current_settings.cputype] then
+              current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_THUMB_ISA_use,2))
+            else
+              current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_THUMB_ISA_use,1));
+            current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_ABI_VFP_args,1));
+            current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_ABI_FP_denormal,1));
+            current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_ABI_FP_exceptions,1));
+            current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_ABI_FP_number_model,3));
+            current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_ABI_align_needed,0));
+            current_asmdata.asmlists[al_start].Concat(tai_eabi_attribute.create(Tag_ABI_align8_preserved,1));
+            { gcc typically writes more like enum size, wchar size, optimization goal, however, this
+              is normally not module global in FPC }
+          end;
+      end;
+
+    class procedure tarmnodeutils.insert_init_final_table(entries:tfplist);
+
+      procedure genentry(list : TAsmList);
+        var
+          ref: treference;
+        begin
+          if GenerateThumbCode then
+            list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R14]))
+          else
+            begin
+              reference_reset(ref,4,[]);
+              ref.index:=NR_STACK_POINTER_REG;
+              ref.addressmode:=AM_PREINDEXED;
+              list.concat(setoppostfix(taicpu.op_ref_regset(A_STM,ref,R_INTREGISTER,R_SUBWHOLE,[RS_R14]),PF_FD));
+            end;
+        end;
+
+      procedure genexit(list : TAsmList);
+        var
+          ref: treference;
+        begin
+          if GenerateThumbCode then
+            list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R15]))
+          else
+            begin
+              reference_reset(ref,4,[]);
+              ref.index:=NR_STACK_POINTER_REG;
+              ref.addressmode:=AM_PREINDEXED;
+              list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,[RS_R15]),PF_FD));
+            end;
+        end;
+
+      var
+        initList, finalList, header: TAsmList;
+        entry : pinitfinalentry;
+        i : longint;
+      begin
+        if not(tf_init_final_units_by_calls in target_info.flags) then
+          begin
+            inherited insert_init_final_table(entries);
+            exit;
+          end;
+        initList:=TAsmList.create;
+        finalList:=TAsmList.create;
+
+        genentry(finalList);
+        genentry(initList);
+
+        for i:=0 to entries.count-1 do
+          begin
+            entry:=pinitfinalentry(entries[i]);
+            if entry^.finifunc<>'' then
+              finalList.Concat(taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(entry^.finifunc,AT_FUNCTION)));
+            if entry^.initfunc<>'' then
+              initList.Concat(taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(entry^.initfunc,AT_FUNCTION)));
+          end;
+
+        genexit(finalList);
+        genexit(initList);
+
+        header:=TAsmList.create;
+        new_section(header, sec_code, 'FPC_INIT_FUNC_TABLE', 1);
+        header.concat(tai_symbol.Createname_global('FPC_INIT_FUNC_TABLE',AT_FUNCTION,0,voidcodepointertype));
+
+        initList.insertList(header);
+        header.free;
+
+        current_asmdata.AsmLists[al_procedures].concatList(initList);
+
+        header:=TAsmList.create;
+        new_section(header, sec_code, 'FPC_FINALIZE_FUNC_TABLE', 1);
+        header.concat(tai_symbol.Createname_global('FPC_FINALIZE_FUNC_TABLE',AT_FUNCTION,0,voidcodepointertype));
+
+        finalList.insertList(header);
+        header.free;
+
+        current_asmdata.AsmLists[al_procedures].concatList(finalList);
+
+        initList.Free;
+        finalList.Free;
+
+        inherited insert_init_final_table(entries);
+      end;
+
+  begin
+    cnodeutils:=tarmnodeutils;
+  end.
+

+ 1 - 1
compiler/arm/raarmgas.pas

@@ -988,7 +988,7 @@ Unit raarmgas;
                        OPR_REFERENCE :
                          inc(oper.opr.ref.offset,l);
                        else
-                         internalerror(200309202);
+                         internalerror(2003092021);
                      end;
                    end
                end;

+ 2 - 2
compiler/arm/rgcpu.pas

@@ -491,7 +491,7 @@ unit rgcpu;
             tmpref.index:=hreg;
 
             if spilltemp.index<>NR_NO then
-              internalerror(200401263);
+              internalerror(2004012601);
 
             helplist.concat(spilling_create_load(tmpref,tempreg));
             if getregtype(tempreg)=R_INTREGISTER then
@@ -545,7 +545,7 @@ unit rgcpu;
             helplist.concat(taicpu.op_reg_ref(A_LDR,hreg,tmpref));
 
             if spilltemp.index<>NR_NO then
-              internalerror(200401263);
+              internalerror(2004012602);
 
             reference_reset_base(tmpref,current_procinfo.framepointer,0,ctempposinvalid,sizeof(pint),[]);
             tmpref.index:=hreg;

+ 51 - 0
compiler/arm/tripletcpu.pas

@@ -0,0 +1,51 @@
+{
+    Copyright (c) 2020 by Jonas Maebe
+
+    Construct the cpu part of the triplet
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit tripletcpu;
+
+{$i fpcdefs.inc}
+
+interface
+
+uses
+  globtype;
+
+function tripletcpustr(tripletstyle: ttripletstyle): ansistring;
+
+implementation
+
+uses
+  globals, cutils, systems, cpuinfo;
+
+function tripletcpustr(tripletstyle: ttripletstyle): ansistring;
+  begin
+    result:=lower(cputypestr[current_settings.cputype]);
+    { llvm replaces the cpu name with thumb for when generating thumb code}
+    if (tripletstyle=triplet_llvm) and
+       (current_settings.instructionset=is_thumb) then
+      result:='thumb'+copy(result,4,255);
+    if target_info.endian=endian_big then
+      result:=result+'be';
+  end;
+
+
+end.
+

+ 1275 - 0
compiler/armgen/aoptarm.pas

@@ -0,0 +1,1275 @@
+{
+    Copyright (c) 1998-2020 by Jonas Maebe and Florian Klaempfl, members of the Free Pascal
+    Development Team
+
+    This unit implements an ARM optimizer object used commonly for ARM and AAarch64
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+
+Unit aoptarm;
+
+{$i fpcdefs.inc}
+
+{ $define DEBUG_PREREGSCHEDULER}
+{ $define DEBUG_AOPTCPU}
+
+Interface
+
+uses
+  cgbase, cgutils, cpubase, aasmtai, aasmcpu,aopt, aoptobj;
+
+Type
+  { while ARM and AAarch64 look not very similar at a first glance,
+    several optimizations can be shared between both }
+  TARMAsmOptimizer = class(TAsmOptimizer)
+    procedure DebugMsg(const s : string; p : tai);
+
+    function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
+    function RedundantMovProcess(var p: tai; var hp1: tai): boolean;
+    function GetNextInstructionUsingReg(Current: tai; out Next: tai; reg: TRegister): Boolean;
+
+    function OptPass1UXTB(var p: tai): Boolean;
+    function OptPass1UXTH(var p: tai): Boolean;
+    function OptPass1SXTB(var p: tai): Boolean;
+    function OptPass1SXTH(var p: tai): Boolean;
+    function OptPass1And(var p: tai): Boolean; virtual;
+  End;
+
+  function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
+  function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
+{$ifdef AARCH64}
+  function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const postfix: TOpPostfixes): boolean;
+{$endif AARCH64}
+  function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
+
+  function RefsEqual(const r1, r2: treference): boolean;
+
+  function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
+  function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
+
+Implementation
+
+  uses
+    cutils,verbose,globtype,globals,
+    systems,
+    cpuinfo,
+    cgobj,procinfo,
+    aasmbase,aasmdata;
+
+
+{$ifdef DEBUG_AOPTCPU}
+  procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);
+    begin
+      asml.insertbefore(tai_comment.Create(strpnew(s)), p);
+    end;
+{$else DEBUG_AOPTCPU}
+  procedure TARMAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
+    begin
+    end;
+{$endif DEBUG_AOPTCPU}
+
+  function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
+    begin
+      result :=
+        (instr.typ = ait_instruction) and
+        ((op = []) or ((taicpu(instr).opcode<=LastCommonAsmOp) and (taicpu(instr).opcode in op))) and
+        ((cond = []) or (taicpu(instr).condition in cond)) and
+        ((postfix = []) or (taicpu(instr).oppostfix in postfix));
+    end;
+
+
+  function MatchInstruction(const instr: tai; const op: TAsmOp; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
+    begin
+      result :=
+        (instr.typ = ait_instruction) and
+        (taicpu(instr).opcode = op) and
+        ((cond = []) or (taicpu(instr).condition in cond)) and
+        ((postfix = []) or (taicpu(instr).oppostfix in postfix));
+    end;
+
+
+{$ifdef AARCH64}
+  function MatchInstruction(const instr: tai; const ops : array of TAsmOp; const postfix: TOpPostfixes): boolean;
+  var
+    op : TAsmOp;
+  begin
+    result:=false;
+    if instr.typ <> ait_instruction then
+      exit;
+    for op in ops do
+      begin
+        if (taicpu(instr).opcode = op) and
+           ((postfix = []) or (taicpu(instr).oppostfix in postfix)) then
+          begin
+            result:=true;
+            exit;
+          end;
+      end;
+    end;
+{$endif AARCH64}
+
+  function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
+    begin
+      result :=
+        (instr.typ = ait_instruction) and
+        (taicpu(instr).opcode = op) and
+        ((postfix = []) or (taicpu(instr).oppostfix in postfix));
+    end;
+
+
+  function MatchOperand(const oper: TOper; const reg: TRegister): boolean; inline;
+    begin
+      result := (oper.typ = top_reg) and (oper.reg = reg);
+    end;
+
+
+  function RefsEqual(const r1, r2: treference): boolean;
+    begin
+      refsequal :=
+        (r1.offset = r2.offset) and
+        (r1.base = r2.base) and
+        (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
+        (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
+        (r1.relsymbol = r2.relsymbol) and
+{$ifdef ARM}
+        (r1.signindex = r2.signindex) and
+{$endif ARM}
+        (r1.shiftimm = r2.shiftimm) and
+        (r1.addressmode = r2.addressmode) and
+        (r1.shiftmode = r2.shiftmode) and
+        (r1.volatility=[]) and
+        (r2.volatility=[]);
+    end;
+
+
+  function MatchOperand(const oper1: TOper; const oper2: TOper): boolean; inline;
+    begin
+      result := oper1.typ = oper2.typ;
+
+      if result then
+        case oper1.typ of
+          top_const:
+            Result:=oper1.val = oper2.val;
+          top_reg:
+            Result:=oper1.reg = oper2.reg;
+          top_conditioncode:
+            Result:=oper1.cc = oper2.cc;
+          top_realconst:
+            Result:=oper1.val_real = oper2.val_real;
+          top_ref:
+            Result:=RefsEqual(oper1.ref^, oper2.ref^);
+          else Result:=false;
+        end
+    end;
+
+
+  function TARMAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
+    Out Next: tai; reg: TRegister): Boolean;
+    var
+      gniResult: Boolean;
+    begin
+      Next:=Current;
+      Result := False;
+      repeat
+
+        gniResult:=GetNextInstruction(Next,Next);
+        if gniResult and RegInInstruction(reg,Next) then
+          { Found something }
+          Exit(True);
+
+      until not gniResult or
+        not(cs_opt_level3 in current_settings.optimizerswitches) or
+        (Next.typ<>ait_instruction) or
+        is_calljmp(taicpu(Next).opcode)
+{$ifdef ARM}
+        or RegModifiedByInstruction(NR_PC,Next)
+{$endif ARM}
+        ;
+    end;
+
+
+  function TARMAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
+    var
+      alloc,
+      dealloc : tai_regalloc;
+      hp1 : tai;
+    begin
+      Result:=false;
+      if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
+        { We can't optimize if there is a shiftop }
+        (taicpu(movp).ops=2) and
+        MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
+        { don't mess with moves to fp }
+        (taicpu(movp).oper[0]^.reg<>current_procinfo.framepointer) and
+        { the destination register of the mov might not be used beween p and movp }
+        not(RegUsedBetween(taicpu(movp).oper[0]^.reg,p,movp)) and
+{$ifdef ARM}
+        { PC should be changed only by moves }
+        (taicpu(movp).oper[0]^.reg<>NR_PC) and
+        { cb[n]z are thumb instructions which require specific registers, with no wide forms }
+        (taicpu(p).opcode<>A_CBZ) and
+        (taicpu(p).opcode<>A_CBNZ) and
+        { There is a special requirement for MUL and MLA, oper[0] and oper[1] are not allowed to be the same }
+        not (
+          (taicpu(p).opcode in [A_MLA, A_MUL]) and
+          (taicpu(p).oper[1]^.reg = taicpu(movp).oper[0]^.reg) and
+          (current_settings.cputype < cpu_armv6)
+        ) and
+{$endif ARM}
+        { Take care to only do this for instructions which REALLY load to the first register.
+          Otherwise
+            str reg0, [reg1]
+            mov reg2, reg0
+          will be optimized to
+            str reg2, [reg1]
+        }
+        RegLoadedWithNewValue(taicpu(p).oper[0]^.reg, p) then
+        begin
+          dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(movp.Next));
+          if assigned(dealloc) then
+            begin
+              DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
+              result:=true;
+
+              { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
+                and remove it if possible }
+              asml.Remove(dealloc);
+              alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.previous));
+              if assigned(alloc) then
+                begin
+                  asml.Remove(alloc);
+                  alloc.free;
+                  dealloc.free;
+                end
+              else
+                asml.InsertAfter(dealloc,p);
+
+              { try to move the allocation of the target register }
+              GetLastInstruction(movp,hp1);
+              alloc:=FindRegAlloc(taicpu(movp).oper[0]^.reg,tai(hp1.Next));
+              if assigned(alloc) then
+                begin
+                  asml.Remove(alloc);
+                  asml.InsertBefore(alloc,p);
+                  { adjust used regs }
+                  IncludeRegInUsedRegs(taicpu(movp).oper[0]^.reg,UsedRegs);
+                end;
+
+              { finally get rid of the mov }
+              taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
+              { Remove preindexing and postindexing for LDR in some cases.
+                For example:
+                  ldr	reg2,[reg1, xxx]!
+                  mov reg1,reg2
+                must be translated to:
+                  ldr	reg1,[reg1, xxx]
+
+                Preindexing must be removed there, since the same register is used as the base and as the target.
+                Such case is not allowed for ARM CPU and produces crash. }
+              if (taicpu(p).opcode = A_LDR) and (taicpu(p).oper[1]^.typ = top_ref)
+                and (taicpu(movp).oper[0]^.reg = taicpu(p).oper[1]^.ref^.base)
+              then
+                taicpu(p).oper[1]^.ref^.addressmode:=AM_OFFSET;
+              asml.remove(movp);
+              movp.free;
+            end;
+        end;
+    end;
+
+
+  function TARMAsmOptimizer.RedundantMovProcess(var p: tai; var hp1: tai):boolean;
+    var
+      I: Integer;
+      current_hp, next_hp: tai;
+      LDRChange: Boolean;
+    begin
+      Result:=false;
+      {
+        change
+        mov r1, r0
+        add r1, r1, #1
+        to
+        add r1, r0, #1
+
+        Todo: Make it work for mov+cmp too
+
+        CAUTION! If this one is successful p might not be a mov instruction anymore!
+      }
+      if (taicpu(p).ops = 2) and
+         (taicpu(p).oper[1]^.typ = top_reg) and
+         (taicpu(p).oppostfix = PF_NONE) then
+        begin
+
+          if
+            MatchInstruction(hp1, [A_ADD, A_ADC,
+{$ifdef ARM}
+                                   A_RSB, A_RSC,
+{$endif ARM}
+                                   A_SUB, A_SBC,
+                                   A_AND, A_BIC, A_EOR, A_ORR, A_MOV, A_MVN],
+                             [taicpu(p).condition], []) and
+            { MOV and MVN might only have 2 ops }
+            (taicpu(hp1).ops >= 2) and
+            MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg) and
+            (taicpu(hp1).oper[1]^.typ = top_reg) and
+            (
+              (taicpu(hp1).ops = 2) or
+              (taicpu(hp1).oper[2]^.typ in [top_reg, top_const, top_shifterop])
+            ) and
+{$ifdef AARCH64}
+            (taicpu(p).oper[1]^.reg<>NR_SP) and
+{$endif AARCH64}
+            not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+            begin
+              { When we get here we still don't know if the registers match }
+              for I:=1 to 2 do
+                {
+                  If the first loop was successful p will be replaced with hp1.
+                  The checks will still be ok, because all required information
+                  will also be in hp1 then.
+                }
+                if (taicpu(hp1).ops > I) and
+                   MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[I]^.reg)
+{$ifdef ARM}
+                   { prevent certain combinations on thumb(2), this is only a safe approximation }
+                   and (not(GenerateThumbCode or GenerateThumb2Code) or
+                    ((getsupreg(taicpu(p).oper[1]^.reg)<>RS_R13) and
+                     (getsupreg(taicpu(p).oper[1]^.reg)<>RS_R15)))
+{$endif ARM}
+
+                then
+                  begin
+                    DebugMsg('Peephole RedundantMovProcess done', hp1);
+                    taicpu(hp1).oper[I]^.reg := taicpu(p).oper[1]^.reg;
+                    if p<>hp1 then
+                      begin
+                        asml.remove(p);
+                        p.free;
+                        p:=hp1;
+                        Result:=true;
+                      end;
+                  end;
+
+              if Result then Exit;
+            end
+          { Change:                   Change:
+              mov     r1, r0            mov     r1, r0
+              ...                       ...
+              ldr/str r2, [r1, etc.]    mov     r2, r1
+            To:                       To:
+              ldr/str r2, [r0, etc.]    mov     r2, r0
+          }
+          else if (taicpu(p).condition = C_None) and (taicpu(p).oper[1]^.typ = top_reg)
+{$ifdef ARM}
+            and not (getsupreg(taicpu(p).oper[0]^.reg) in [RS_PC, RS_R14, RS_STACK_POINTER_REG])
+            and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_PC)
+            { Thumb does not support references with base and index one being SP }
+            and (not(GenerateThumbCode) or (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG))
+{$endif ARM}
+{$ifdef AARCH64}
+            and (getsupreg(taicpu(p).oper[0]^.reg) <> RS_STACK_POINTER_REG)
+{$endif AARCH64}
+            then
+            begin
+              current_hp := p;
+              TransferUsedRegs(TmpUsedRegs);
+
+              { Search local instruction block }
+              while GetNextInstruction(current_hp, next_hp) and (next_hp <> BlockEnd) and (next_hp.typ = ait_instruction) do
+                begin
+                  UpdateUsedRegs(TmpUsedRegs, tai(current_hp.Next));
+                  LDRChange := False;
+
+                  if (taicpu(next_hp).opcode in [A_LDR,A_STR]) and (taicpu(next_hp).ops = 2) then
+                    begin
+
+                      { Change the registers from r1 to r0 }
+                      if (taicpu(next_hp).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) and
+{$ifdef ARM}
+                        { This optimisation conflicts with something and raises
+                          an access violation - needs further investigation. [Kit] }
+                        (taicpu(next_hp).opcode <> A_LDR) and
+{$endif ARM}
+                        { Don't mess around with the base register if the
+                          reference is pre- or post-indexed }
+                        (taicpu(next_hp).oper[1]^.ref^.addressmode = AM_OFFSET) then
+                        begin
+                          taicpu(next_hp).oper[1]^.ref^.base := taicpu(p).oper[1]^.reg;
+                          LDRChange := True;
+                        end;
+
+                      if taicpu(next_hp).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
+                        begin
+                          taicpu(next_hp).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
+                          LDRChange := True;
+                        end;
+
+                      if LDRChange then
+                        DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovLdr2Ldr 1)', next_hp);
+
+                      { Drop out if we're dealing with pre-indexed references }
+                      if (taicpu(next_hp).oper[1]^.ref^.addressmode = AM_PREINDEXED) and
+                        (
+                          RegInRef(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[1]^.ref^) or
+                          RegInRef(taicpu(p).oper[1]^.reg, taicpu(next_hp).oper[1]^.ref^)
+                        ) then
+                        begin
+                          { Remember to update register allocations }
+                          if LDRChange then
+                            AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
+
+                          Break;
+                        end;
+
+                      { The register being stored can be potentially changed (as long as it's not the stack pointer) }
+                      if (taicpu(next_hp).opcode = A_STR) and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) and
+                        MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg) then
+                        begin
+                          DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovLdr2Ldr 2)', next_hp);
+                          taicpu(next_hp).oper[0]^.reg := taicpu(p).oper[1]^.reg;
+                          LDRChange := True;
+                        end;
+
+                      if LDRChange and (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) then
+                        begin
+                          AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
+                          if (taicpu(p).oppostfix = PF_None) and
+                            (
+                              (
+                                (taicpu(next_hp).opcode = A_LDR) and
+                                MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg)
+                              ) or
+                              not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, next_hp, TmpUsedRegs)
+                            ) and
+                            { Double-check to see if the old registers were actually
+                              changed (e.g. if the super registers matched, but not
+                              the sizes, they won't be changed). }
+                            (
+                              (taicpu(next_hp).opcode = A_LDR) or
+                              not RegInOp(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[0]^)
+                            ) and
+                            not RegInRef(taicpu(p).oper[0]^.reg, taicpu(next_hp).oper[1]^.ref^) then
+                            begin
+                              DebugMsg('Peephole Optimization: RedundantMovProcess 2a done', p);
+                              RemoveCurrentP(p);
+                              Result := True;
+                              Exit;
+                            end;
+                        end;
+                    end
+                  else if (taicpu(next_hp).opcode = A_MOV) and (taicpu(next_hp).oppostfix = PF_None) and
+                    (taicpu(next_hp).ops = 2) then
+                    begin
+                      if MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[0]^.reg) then
+                        begin
+                          { Found another mov that writes entirely to the register }
+                          if RegUsedBetween(taicpu(p).oper[0]^.reg, p, next_hp) then
+                            begin
+                              { Register was used beforehand }
+                              if MatchOperand(taicpu(next_hp).oper[1]^, taicpu(p).oper[1]^.reg) then
+                                begin
+                                  { This MOV is exactly the same as the first one.
+                                    Since none of the registers have changed value
+                                    at this point, we can remove it. }
+                                  DebugMsg('Peephole Optimization: RedundantMovProcess 3a done', next_hp);
+
+                                  if (next_hp = hp1) then
+                                    { Don't let hp1 become a dangling pointer }
+                                    hp1 := nil;
+
+                                  asml.Remove(next_hp);
+                                  next_hp.Free;
+
+                                  { We still have the original p, so we can continue optimising;
+                                   if it was -O2 or below, this instruction appeared immediately
+                                   after the first MOV, so we're technically not looking more
+                                   than one instruction ahead after it's removed! [Kit] }
+                                  Continue;
+                                end
+                              else
+                                { Register changes value - drop out }
+                                Break;
+                            end;
+
+                          { We can delete the first MOV (only if the second MOV is unconditional) }
+{$ifdef ARM}
+                          if (taicpu(p).oppostfix = PF_None) and
+                            (taicpu(next_hp).condition = C_None) then
+{$endif ARM}
+                            begin
+                              DebugMsg('Peephole Optimization: RedundantMovProcess 2b done', p);
+                              RemoveCurrentP(p);
+                              Result := True;
+                            end;
+                          Exit;
+                        end
+                      else if MatchOperand(taicpu(next_hp).oper[1]^, taicpu(p).oper[0]^.reg) then
+                        begin
+                          if MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[1]^.reg)
+                            { Be careful - if the entire register is not used, removing this
+                              instruction will leave the unused part uninitialised }
+{$ifdef AARCH64}
+                            and (getsubreg(taicpu(p).oper[1]^.reg) = R_SUBQ)
+{$endif AARCH64}
+                            then
+                            begin
+                              { Instruction will become mov r1,r1 }
+                              DebugMsg('Peephole Optimization: Mov2None 2 done', next_hp);
+
+                              { Allocate r1 between the instructions; not doing
+                                so may cause problems when removing superfluous
+                                MOVs later (i38055) }
+                              AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
+
+                              if (next_hp = hp1) then
+                                { Don't let hp1 become a dangling pointer }
+                                hp1 := nil;
+
+                              asml.Remove(next_hp);
+                              next_hp.Free;
+                              Continue;
+                            end;
+
+                          { Change the old register (checking the first operand again
+                            forces it to be left alone if the full register is not
+                            used, lest mov w1,w1 gets optimised out by mistake. [Kit] }
+{$ifdef AARCH64}
+                          if not MatchOperand(taicpu(next_hp).oper[0]^, taicpu(p).oper[1]^.reg) then
+{$endif AARCH64}
+                            begin
+                              DebugMsg('Peephole Optimization: ' + std_regname(taicpu(p).oper[0]^.reg) + ' = ' + std_regname(taicpu(p).oper[1]^.reg) + ' (MovMov2Mov 2)', next_hp);
+                              taicpu(next_hp).oper[1]^.reg := taicpu(p).oper[1]^.reg;
+                              AllocRegBetween(taicpu(p).oper[1]^.reg, p, next_hp, UsedRegs);
+
+                              { If this was the only reference to the old register,
+                                then we can remove the original MOV now }
+
+                              if (taicpu(p).oppostfix = PF_None) and
+                                { A bit of a hack - sometimes registers aren't tracked properly, so do not
+                                  remove if the register was apparently not allocated when its value is
+                                  first set at the MOV command (this is especially true for the stack
+                                  register). [Kit] }
+                                (getsupreg(taicpu(p).oper[1]^.reg) <> RS_STACK_POINTER_REG) and
+                                RegInUsedRegs(taicpu(p).oper[0]^.reg, UsedRegs) and
+                                not RegUsedAfterInstruction(taicpu(p).oper[0]^.reg, next_hp, TmpUsedRegs) then
+                                begin
+                                  DebugMsg('Peephole Optimization: RedundantMovProcess 2c done', p);
+                                  RemoveCurrentP(p);
+                                  Result := True;
+                                  Exit;
+                                end;
+                            end;
+                        end;
+                    end;
+
+                  { On low optimisation settions, don't search more than one instruction ahead }
+                  if not(cs_opt_level3 in current_settings.optimizerswitches) or
+                    { Stop at procedure calls and jumps }
+                    is_calljmp(taicpu(next_hp).opcode) or
+                    { If the read register has changed value, or the MOV
+                      destination register has been used, drop out }
+                    RegInInstruction(taicpu(p).oper[0]^.reg, next_hp) or
+                    RegModifiedByInstruction(taicpu(p).oper[1]^.reg, next_hp) then
+                    Break;
+
+                  current_hp := next_hp;
+                end;
+            end;
+        end;
+    end;
+
+
+  function TARMAsmOptimizer.OptPass1UXTB(var p : tai) : Boolean;
+    var
+      hp1, hp2: tai;
+    begin
+      Result:=false;
+      {
+        change
+        uxtb reg2,reg1
+        strb reg2,[...]
+        dealloc reg2
+        to
+        strb reg1,[...]
+      }
+      if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
+        (taicpu(p).ops=2) and
+        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+        MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
+        assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
+        { the reference in strb might not use reg2 }
+        not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
+        { reg1 might not be modified inbetween }
+        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+        begin
+          DebugMsg('Peephole UxtbStrb2Strb done', p);
+          taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
+          GetNextInstruction(p,hp2);
+          asml.remove(p);
+          p.free;
+          p:=hp2;
+          result:=true;
+        end
+      {
+        change
+        uxtb reg2,reg1
+        uxth reg3,reg2
+        dealloc reg2
+        to
+        uxtb reg3,reg1
+      }
+      else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
+        (taicpu(p).ops=2) and
+        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+        MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
+        (taicpu(hp1).ops = 2) and
+        MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
+        RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
+        { reg1 might not be modified inbetween }
+        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+        begin
+          DebugMsg('Peephole UxtbUxth2Uxtb done', p);
+          AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
+          taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
+          asml.remove(hp1);
+          hp1.free;
+          result:=true;
+        end
+      {
+        change
+        uxtb reg2,reg1
+        uxtb reg3,reg2
+        dealloc reg2
+        to
+        uxtb reg3,reg1
+      }
+      else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
+        (taicpu(p).ops=2) and
+        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+        MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
+        (taicpu(hp1).ops = 2) and
+        MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
+        RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
+        { reg1 might not be modified inbetween }
+        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+        begin
+          DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
+          AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
+          taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
+          asml.remove(hp1);
+          hp1.free;
+          result:=true;
+        end
+      {
+        change
+        uxtb reg2,reg1
+        and reg3,reg2,#0x*FF
+        dealloc reg2
+        to
+        uxtb reg3,reg1
+      }
+      else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
+        (taicpu(p).ops=2) and
+        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+        MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
+        (taicpu(hp1).ops=3) and
+        (taicpu(hp1).oper[2]^.typ=top_const) and
+        ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
+        MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
+        RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
+        { reg1 might not be modified inbetween }
+        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+        begin
+          DebugMsg('Peephole UxtbAndImm2Uxtb done', p);
+          taicpu(hp1).opcode:=A_UXTB;
+          taicpu(hp1).ops:=2;
+          taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
+          GetNextInstruction(p,hp2);
+          asml.remove(p);
+          p.free;
+          p:=hp2;
+          result:=true;
+        end
+      else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+        RemoveSuperfluousMove(p, hp1, 'UxtbMov2Data') then
+        Result:=true;
+    end;
+
+
+  function TARMAsmOptimizer.OptPass1UXTH(var p : tai) : Boolean;
+    var
+      hp1: tai;
+    begin
+      Result:=false;
+      {
+        change
+        uxth reg2,reg1
+        strh reg2,[...]
+        dealloc reg2
+        to
+        strh reg1,[...]
+      }
+      if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
+        (taicpu(p).ops=2) and
+        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+        MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
+        RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
+        { the reference in strb might not use reg2 }
+        not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
+        { reg1 might not be modified inbetween }
+        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+        begin
+          DebugMsg('Peephole UXTHStrh2Strh done', p);
+          taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
+          GetNextInstruction(p, hp1);
+          asml.remove(p);
+          p.free;
+          p:=hp1;
+          result:=true;
+        end
+      {
+        change
+        uxth reg2,reg1
+        uxth reg3,reg2
+        dealloc reg2
+        to
+        uxth reg3,reg1
+      }
+      else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
+        (taicpu(p).ops=2) and
+        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+        MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
+        (taicpu(hp1).ops=2) and
+        MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
+        RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
+        { reg1 might not be modified inbetween }
+        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+        begin
+          DebugMsg('Peephole UxthUxth2Uxth done', p);
+          AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
+          taicpu(hp1).opcode:=A_UXTH;
+          taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
+          GetNextInstruction(p, hp1);
+          asml.remove(p);
+          p.free;
+          p:=hp1;
+          result:=true;
+        end
+      {
+        change
+        uxth reg2,reg1
+        and reg3,reg2,#65535
+        dealloc reg2
+        to
+        uxth reg3,reg1
+      }
+      else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
+        (taicpu(p).ops=2) and
+        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+        MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
+        (taicpu(hp1).ops=3) and
+        (taicpu(hp1).oper[2]^.typ=top_const) and
+        ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
+        MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
+        RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
+        { reg1 might not be modified inbetween }
+        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+        begin
+          DebugMsg('Peephole UxthAndImm2Uxth done', p);
+          taicpu(hp1).opcode:=A_UXTH;
+          taicpu(hp1).ops:=2;
+          taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
+          GetNextInstruction(p, hp1);
+          asml.remove(p);
+          p.free;
+          p:=hp1;
+          result:=true;
+        end
+      else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+           RemoveSuperfluousMove(p, hp1, 'UxthMov2Data') then
+        Result:=true;
+    end;
+
+
+  function TARMAsmOptimizer.OptPass1SXTB(var p : tai) : Boolean;
+    var
+      hp1, hp2: tai;
+    begin
+      Result:=false;
+      {
+        change
+        sxtb reg2,reg1
+        strb reg2,[...]
+        dealloc reg2
+        to
+        strb reg1,[...]
+      }
+      if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
+        (taicpu(p).ops=2) and
+        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+        MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
+        assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
+        { the reference in strb might not use reg2 }
+        not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
+        { reg1 might not be modified inbetween }
+        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+        begin
+          DebugMsg('Peephole SxtbStrb2Strb done', p);
+          taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
+          GetNextInstruction(p,hp2);
+          asml.remove(p);
+          p.free;
+          p:=hp2;
+          result:=true;
+        end
+      {
+        change
+        sxtb reg2,reg1
+        sxth reg3,reg2
+        dealloc reg2
+        to
+        sxtb reg3,reg1
+      }
+      else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
+        (taicpu(p).ops=2) and
+        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+        MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
+        (taicpu(hp1).ops = 2) and
+        MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
+        RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
+        { reg1 might not be modified inbetween }
+        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+        begin
+          DebugMsg('Peephole SxtbSxth2Sxtb done', p);
+          AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
+          taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
+          asml.remove(hp1);
+          hp1.free;
+          result:=true;
+        end
+      {
+        change
+        sxtb reg2,reg1
+        sxtb reg3,reg2
+        dealloc reg2
+        to
+        uxtb reg3,reg1
+      }
+      else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
+        (taicpu(p).ops=2) and
+        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+        MatchInstruction(hp1, A_SXTB, [C_None], [PF_None]) and
+        (taicpu(hp1).ops = 2) and
+        MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
+        RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
+        { reg1 might not be modified inbetween }
+        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+        begin
+          DebugMsg('Peephole SxtbSxtb2Sxtb done', p);
+          AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
+          taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
+          asml.remove(hp1);
+          hp1.free;
+          result:=true;
+        end
+      {
+        change
+        sxtb reg2,reg1
+        and reg3,reg2,#0x*FF
+        dealloc reg2
+        to
+        uxtb reg3,reg1
+      }
+      else if MatchInstruction(p, A_SXTB, [C_None], [PF_None]) and
+        (taicpu(p).ops=2) and
+        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+        MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
+        (taicpu(hp1).ops=3) and
+        (taicpu(hp1).oper[2]^.typ=top_const) and
+        ((taicpu(hp1).oper[2]^.val and $FF)=$FF) and
+        MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
+        RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
+        { reg1 might not be modified inbetween }
+        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+        begin
+          DebugMsg('Peephole SxtbAndImm2Uxtb done', p);
+          taicpu(hp1).opcode:=A_UXTB;
+          taicpu(hp1).ops:=2;
+          taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
+          GetNextInstruction(p,hp2);
+          asml.remove(p);
+          p.free;
+          p:=hp2;
+          result:=true;
+        end
+      else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+           RemoveSuperfluousMove(p, hp1, 'UxtbMov2Data') then
+        Result:=true;
+    end;
+
+
+  function TARMAsmOptimizer.OptPass1SXTH(var p : tai) : Boolean;
+    var
+      hp1: tai;
+    begin
+      Result:=false;
+      {
+        change
+        sxth reg2,reg1
+        strh reg2,[...]
+        dealloc reg2
+        to
+        strh reg1,[...]
+      }
+      if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
+        (taicpu(p).ops=2) and
+        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+        MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
+        RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
+        { the reference in strb might not use reg2 }
+        not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
+        { reg1 might not be modified inbetween }
+        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+        begin
+          DebugMsg('Peephole SXTHStrh2Strh done', p);
+          taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
+          GetNextInstruction(p, hp1);
+          asml.remove(p);
+          p.free;
+          p:=hp1;
+          result:=true;
+        end
+      {
+        change
+        sxth reg2,reg1
+        sxth reg3,reg2
+        dealloc reg2
+        to
+        sxth reg3,reg1
+      }
+      else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
+        (taicpu(p).ops=2) and
+        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+        MatchInstruction(hp1, A_SXTH, [C_None], [PF_None]) and
+        (taicpu(hp1).ops=2) and
+        MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
+        RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
+        { reg1 might not be modified inbetween }
+        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+        begin
+          DebugMsg('Peephole SxthSxth2Sxth done', p);
+          AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
+          taicpu(hp1).opcode:=A_SXTH;
+          taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
+          GetNextInstruction(p, hp1);
+          asml.remove(p);
+          p.free;
+          p:=hp1;
+          result:=true;
+        end
+      {
+        change
+        sxth reg2,reg1
+        and reg3,reg2,#65535
+        dealloc reg2
+        to
+        uxth reg3,reg1
+      }
+      else if MatchInstruction(p, A_SXTH, [C_None], [PF_None]) and
+        (taicpu(p).ops=2) and
+        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+        MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
+        (taicpu(hp1).ops=3) and
+        (taicpu(hp1).oper[2]^.typ=top_const) and
+        ((taicpu(hp1).oper[2]^.val and $FFFF)=$FFFF) and
+        MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
+        RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
+        { reg1 might not be modified inbetween }
+        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+        begin
+          DebugMsg('Peephole SxthAndImm2Uxth done', p);
+          taicpu(hp1).opcode:=A_UXTH;
+          taicpu(hp1).ops:=2;
+          taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
+          GetNextInstruction(p, hp1);
+          asml.remove(p);
+          p.free;
+          p:=hp1;
+          result:=true;
+        end
+      else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+           RemoveSuperfluousMove(p, hp1, 'UxthMov2Data') then
+        Result:=true;
+    end;
+
+
+  function TARMAsmOptimizer.OptPass1And(var p : tai) : Boolean;
+    var
+      hp1, hp2: tai;
+      i: longint;
+    begin
+      Result:=false;
+      {
+        optimize
+        and reg2,reg1,const1
+        ...
+      }
+      if (taicpu(p).ops>2) and
+         (taicpu(p).oper[1]^.typ = top_reg) and
+         (taicpu(p).oper[2]^.typ = top_const) then
+        begin
+          {
+            change
+            and reg2,reg1,const1
+            ...
+            and reg3,reg2,const2
+            to
+            and reg3,reg1,(const1 and const2)
+          }
+          if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+          MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_None]) and
+          RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
+          MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
+          (taicpu(hp1).oper[2]^.typ = top_const)
+{$ifdef AARCH64}
+          and ((((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBQ) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_64)) or
+               ((getsubreg(taicpu(p).oper[0]^.reg)=R_SUBL) and is_shifter_const(taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val,OS_32))
+          ) or
+          ((taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0))
+{$endif AARCH64}
+          then
+            begin
+              if not(RegUsedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) then
+                begin
+                  DebugMsg('Peephole AndAnd2And done', p);
+                  AllocRegBetween(taicpu(hp1).oper[0]^.reg,p,hp1,UsedRegs);
+                  if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
+                    begin
+                      DebugMsg('Peephole AndAnd2Mov0 1 done', p);
+                      taicpu(p).opcode:=A_MOV;
+                      taicpu(p).ops:=2;
+                      taicpu(p).loadConst(1,0);
+                      taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
+                    end
+                  else
+                    begin
+                      DebugMsg('Peephole AndAnd2And 1 done', p);
+                      taicpu(p).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
+                      taicpu(p).oppostfix:=taicpu(hp1).oppostfix;
+                      taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
+                    end;
+                  asml.remove(hp1);
+                  hp1.free;
+                  Result:=true;
+                  exit;
+                end
+              else if not(RegUsedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+                begin
+                  if (taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val)=0 then
+                    begin
+                      DebugMsg('Peephole AndAnd2Mov0 2 done', hp1);
+                      taicpu(hp1).opcode:=A_MOV;
+                      taicpu(hp1).loadConst(1,0);
+                      taicpu(hp1).ops:=2;
+                      taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
+                    end
+                  else
+                    begin
+                      DebugMsg('Peephole AndAnd2And 2 done', hp1);
+                      AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
+                      taicpu(hp1).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
+                      taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
+                      taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
+                    end;
+                  GetNextInstruction(p, hp1);
+                  RemoveCurrentP(p);
+                  p:=hp1;
+                  Result:=true;
+                  exit;
+                end;
+            end
+          {
+            change
+            and reg2,reg1,$xxxxxxFF
+            strb reg2,[...]
+            dealloc reg2
+            to
+            strb reg1,[...]
+          }
+          else if ((taicpu(p).oper[2]^.val and $FF) = $FF) and
+            MatchInstruction(p, A_AND, [C_None], [PF_None]) and
+            GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+            MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
+            assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
+            { the reference in strb might not use reg2 }
+            not(RegInRef(taicpu(p).oper[0]^.reg,taicpu(hp1).oper[1]^.ref^)) and
+            { reg1 might not be modified inbetween }
+            not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+            begin
+              DebugMsg('Peephole AndStrb2Strb done', p);
+{$ifdef AARCH64}
+              taicpu(hp1).loadReg(0,newreg(R_INTREGISTER,getsupreg(taicpu(p).oper[1]^.reg),R_SUBD));
+{$else AARCH64}
+              taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
+{$endif AARCH64}
+              AllocRegBetween(taicpu(p).oper[1]^.reg,p,hp1,UsedRegs);
+              RemoveCurrentP(p);
+              result:=true;
+              exit;
+            end
+          {
+            change
+            and reg2,reg1,255
+            uxtb/uxth reg3,reg2
+            dealloc reg2
+            to
+            and reg3,reg1,x
+          }
+          else if ((taicpu(p).oper[2]^.val and $ffffff00)=0) and
+            MatchInstruction(p, A_AND, [C_None], [PF_None]) and
+            GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+            MatchInstruction(hp1, [A_UXTB,A_UXTH], [C_None], [PF_None]) and
+            (taicpu(hp1).ops = 2) and
+            RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
+            MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
+            { reg1 might not be modified inbetween }
+            not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+            begin
+              DebugMsg('Peephole AndUxt2And done', p);
+              taicpu(hp1).opcode:=A_AND;
+              taicpu(hp1).ops:=3;
+              taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
+              taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
+              GetNextInstruction(p,hp1);
+              asml.remove(p);
+              p.Free;
+              p:=hp1;
+              result:=true;
+              exit;
+            end
+          else if ((taicpu(p).oper[2]^.val and $ffffff80)=0) and
+            MatchInstruction(p, A_AND, [C_None], [PF_None]) and
+            GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+            MatchInstruction(hp1, [A_SXTB,A_SXTH], [C_None], [PF_None]) and
+            (taicpu(hp1).ops = 2) and
+            RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
+            MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
+            { reg1 might not be modified inbetween }
+            not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
+            begin
+              DebugMsg('Peephole AndSxt2And done', p);
+              taicpu(hp1).opcode:=A_AND;
+              taicpu(hp1).ops:=3;
+              taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
+              taicpu(hp1).loadconst(2,taicpu(p).oper[2]^.val);
+              GetNextInstruction(p,hp1);
+              asml.remove(p);
+              p.Free;
+              p:=hp1;
+              result:=true;
+              exit;
+            end
+          {
+            from
+            and reg1,reg0,2^n-1
+            mov reg2,reg1, lsl imm1
+            (mov reg3,reg2, lsr/asr imm1)
+            remove either the and or the lsl/xsr sequence if possible
+          }
+
+          else if (taicpu(p).oper[2]^.val < high(int64)) and 
+	    cutils.ispowerof2(taicpu(p).oper[2]^.val+1,i) and
+            GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+            MatchInstruction(hp1, A_MOV, [taicpu(p).condition], [PF_None]) and
+            (taicpu(hp1).ops=3) and
+            MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
+            (taicpu(hp1).oper[2]^.typ = top_shifterop) and
+{$ifdef ARM}
+            (taicpu(hp1).oper[2]^.shifterop^.rs = NR_NO) and
+{$endif ARM}
+            (taicpu(hp1).oper[2]^.shifterop^.shiftmode=SM_LSL) and
+            RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) then
+            begin
+              {
+                and reg1,reg0,2^n-1
+                mov reg2,reg1, lsl imm1
+                mov reg3,reg2, lsr/asr imm1
+                =>
+                and reg1,reg0,2^n-1
+                if lsr and 2^n-1>=imm1 or asr and 2^n-1>imm1
+              }
+              if GetNextInstructionUsingReg(hp1,hp2,taicpu(p).oper[0]^.reg) and
+                MatchInstruction(hp2, A_MOV, [taicpu(p).condition], [PF_None]) and
+                (taicpu(hp2).ops=3) and
+                MatchOperand(taicpu(hp2).oper[1]^, taicpu(hp1).oper[0]^.reg) and
+                (taicpu(hp2).oper[2]^.typ = top_shifterop) and
+{$ifdef ARM}
+                (taicpu(hp2).oper[2]^.shifterop^.rs = NR_NO) and
+{$endif ARM}
+                (taicpu(hp2).oper[2]^.shifterop^.shiftmode in [SM_ASR,SM_LSR]) and
+                (taicpu(hp1).oper[2]^.shifterop^.shiftimm=taicpu(hp2).oper[2]^.shifterop^.shiftimm) and
+                RegEndOfLife(taicpu(hp1).oper[0]^.reg,taicpu(hp2)) and
+                ((i<32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) or
+                ((i=32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
+                 (taicpu(hp2).oper[2]^.shifterop^.shiftmode=SM_LSR))) then
+                begin
+                  DebugMsg('Peephole AndLslXsr2And done', p);
+                  taicpu(p).oper[0]^.reg:=taicpu(hp2).oper[0]^.reg;
+                  asml.Remove(hp1);
+                  asml.Remove(hp2);
+                  hp1.free;
+                  hp2.free;
+                  result:=true;
+                  exit;
+                end
+              {
+                and reg1,reg0,2^n-1
+                mov reg2,reg1, lsl imm1
+                =>
+                mov reg2,reg0, lsl imm1
+                if imm1>i
+              }
+              else if (i>32-taicpu(hp1).oper[2]^.shifterop^.shiftimm) and
+                      not(RegModifiedBetween(taicpu(p).oper[1]^.reg, p, hp1)) then
+                begin
+                  DebugMsg('Peephole AndLsl2Lsl done', p);
+                  taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[1]^.reg;
+                  GetNextInstruction(p, hp1);
+                  asml.Remove(p);
+                  p.free;
+                  p:=hp1;
+                  result:=true;
+                  exit;
+                end
+            end;
+        end;
+      {
+        change
+        and reg1, ...
+        mov reg2, reg1
+        to
+        and reg2, ...
+      }
+      if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+         (taicpu(p).ops>=3) and
+         RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then
+        Result:=true;
+    end;
+
+end.
+

+ 2 - 2
compiler/armgen/armpara.pas

@@ -43,7 +43,7 @@ type
 implementation
 
   uses
-    symconst,symdef,symsym,defutil;
+    symconst,symdef,symsym,symutil,defutil;
 
 
   function tarmgenparamanager.is_hfa(p: tdef; out basedef: tdef): boolean;
@@ -106,7 +106,7 @@ implementation
             for i:=0 to tabstractrecorddef(p).symtable.symlist.count-1 do
               begin
                 sym:=tsym(tabstractrecorddef(p).symtable.symlist[i]);
-                if sym.typ<>fieldvarsym then
+                if not is_normal_fieldvarsym(sym) then
                   continue;
                 if not is_hfa_internal(tfieldvarsym(sym).vardef,basedef,elecount) then
                   exit

+ 221 - 27
compiler/assemble.pas

@@ -153,6 +153,7 @@ interface
         procedure WriteSourceLine(hp: tailineinfo);
         procedure WriteTempalloc(hp: tai_tempalloc);
         procedure WriteRealConstAsBytes(hp: tai_realconst; const dbdir: string; do_line: boolean);
+        function WriteComments(var hp: tai): boolean;
         function single2str(d : single) : string; virtual;
         function double2str(d : double) : string; virtual;
         function extended2str(e : extended) : string; virtual;
@@ -264,7 +265,7 @@ Implementation
 {$endif FPC_SOFT_FPUX80}
 {$endif}
       cscript,fmodule,verbose,
-      cpuinfo,
+      cpubase,cpuinfo,triplet,
       aasmcpu;
 
     var
@@ -586,7 +587,7 @@ Implementation
         index: longint;
       begin
         MaybeAddLinePostfix;
-        if (cs_link_on_target in current_settings.globalswitches) then
+        if (cs_assemble_on_target in current_settings.globalswitches) then
           newline:=@target_info.newline
         else
           newline:=@source_info.newline;
@@ -622,7 +623,7 @@ Implementation
           compiler itself, especially on hardware with slow disk I/O.
           Consider this as a poor man's pipe on Amiga, because real pipe handling
           would be much more complex and error prone to implement. (KB) }
-        if (([cs_asm_extern,cs_asm_leave,cs_link_on_target] * current_settings.globalswitches) = []) then
+        if (([cs_asm_extern,cs_asm_leave,cs_assemble_on_target] * current_settings.globalswitches) = []) then
          begin
           { try to have an unique name for the .s file }
           tempFileName:=HexStr(GetProcessID shr 4,7)+ExtractFileName(owner.AsmFileName);
@@ -744,8 +745,8 @@ Implementation
       begin
 {$ifdef hasunix}
         DoPipe:=(cs_asm_pipe in current_settings.globalswitches) and
-                (([cs_asm_extern,cs_asm_leave,cs_link_on_target] * current_settings.globalswitches) = []) and
-                ((asminfo^.id in [as_gas,as_ggas,as_darwin,as_powerpc_xcoff,as_clang,as_solaris_as]));
+                (([cs_asm_extern,cs_asm_leave,cs_assemble_on_target] * current_settings.globalswitches) = []) and
+                ((asminfo^.id in [as_gas,as_ggas,as_darwin,as_powerpc_xcoff,as_clang_gas,as_clang_llvm,as_solaris_as]));
 {$else hasunix}
         DoPipe:=false;
 {$endif}
@@ -822,16 +823,20 @@ Implementation
       var
         asfound : boolean;
         UtilExe  : string;
+        asmbin : TCmdStr;
       begin
         asfound:=false;
-        if cs_link_on_target in current_settings.globalswitches then
+        asmbin:=asminfo^.asmbin;
+        if (af_llvm in asminfo^.flags) then
+          asmbin:=asmbin+llvmutilssuffix;
+        if cs_assemble_on_target in current_settings.globalswitches then
          begin
-           { If linking on target, don't add any path PM }
-           FindAssembler:=utilsprefix+ChangeFileExt(asminfo^.asmbin,target_info.exeext);
+           { If assembling on target, don't add any path PM }
+           FindAssembler:=utilsprefix+ChangeFileExt(asmbin,target_info.exeext);
            exit;
          end
         else
-         UtilExe:=utilsprefix+ChangeFileExt(asminfo^.asmbin,source_info.exeext);
+         UtilExe:=utilsprefix+ChangeFileExt(asmbin,source_info.exeext);
         if lastas<>ord(asminfo^.id) then
          begin
            lastas:=ord(asminfo^.id);
@@ -925,18 +930,14 @@ Implementation
 
       begin
         result:=asminfo^.asmcmd;
-        { for Xcode 7.x and later }
-        if MacOSXVersionMin<>'' then
-          Replace(result,'$DARWINVERSION','-mmacosx-version-min='+MacOSXVersionMin)
-        else if iPhoneOSVersionMin<>'' then
-          Replace(result,'$DARWINVERSION','-miphoneos-version-min='+iPhoneOSVersionMin)
-        else
-          Replace(result,'$DARWINVERSION','');
+        if af_llvm in target_asm.flags then
+          Replace(result,'$TRIPLET',targettriplet(triplet_llvm))
 {$ifdef arm}
-        if (target_info.system=system_arm_darwin) then
-          Replace(result,'$ARCH',lower(cputypestr[current_settings.cputype]));
+        else if (target_info.system=system_arm_ios) then
+          Replace(result,'$ARCH',lower(cputypestr[current_settings.cputype]))
 {$endif arm}
-        if (cs_link_on_target in current_settings.globalswitches) then
+        ;
+        if (cs_assemble_on_target in current_settings.globalswitches) then
          begin
            Replace(result,'$ASM',maybequoted(ScriptFixFileName(AsmFileName)));
            Replace(result,'$OBJ',maybequoted(ScriptFixFileName(ObjFileName)));
@@ -945,7 +946,7 @@ Implementation
          begin
 {$ifdef hasunix}
           if DoPipe then
-            if asminfo^.id<>as_clang then
+            if not(asminfo^.id in [as_clang_gas,as_clang_asdarwin,as_clang_llvm]) then
               Replace(result,'$ASM','')
             else
               Replace(result,'$ASM','-')
@@ -1139,7 +1140,7 @@ Implementation
 	      else if sizeof(tai_realconst(hp).value.s80val) = sizeof(single) then
 	        eextended:=float32_to_floatx80(float32(single(tai_realconst(hp).value.s80val)))
 	      else
-	        internalerror(2017091901);
+	        internalerror(2017091902);
               pdata:=@eextended;
             end;
 {$pop}
@@ -1195,6 +1196,58 @@ Implementation
       end;
 
 
+    function TExternalAssembler.WriteComments(var hp: tai): boolean;
+      begin
+        result:=true;
+        case hp.typ of
+          ait_comment :
+            Begin
+              writer.AsmWrite(asminfo^.comment);
+              writer.AsmWritePChar(tai_comment(hp).str);
+              writer.AsmLn;
+            End;
+
+          ait_regalloc :
+            begin
+              if (cs_asm_regalloc in current_settings.globalswitches) then
+                begin
+                  writer.AsmWrite(#9+asminfo^.comment+'Register ');
+                  repeat
+                    writer.AsmWrite(std_regname(Tai_regalloc(hp).reg));
+                    if (hp.next=nil) or
+                       (tai(hp.next).typ<>ait_regalloc) or
+                       (tai_regalloc(hp.next).ratype<>tai_regalloc(hp).ratype) then
+                      break;
+                    hp:=tai(hp.next);
+                    writer.AsmWrite(',');
+                  until false;
+                  writer.AsmWrite(' ');
+                  writer.AsmWriteLn(regallocstr[tai_regalloc(hp).ratype]);
+                end;
+            end;
+
+          ait_tempalloc :
+            begin
+              if (cs_asm_tempalloc in current_settings.globalswitches) then
+                WriteTempalloc(tai_tempalloc(hp));
+            end;
+
+          ait_varloc:
+            begin
+              { ait_varloc is present here only when register allocation is not done ( -sr option ) }
+              if tai_varloc(hp).newlocationhi<>NR_NO then
+                writer.AsmWriteLn(asminfo^.comment+'Var '+tai_varloc(hp).varsym.realname+' located in register '+
+                  std_regname(tai_varloc(hp).newlocationhi)+':'+std_regname(tai_varloc(hp).newlocation))
+              else
+                writer.AsmWriteLn(asminfo^.comment+'Var '+tai_varloc(hp).varsym.realname+' located in register '+
+                  std_regname(tai_varloc(hp).newlocation));
+            end;
+          else
+            result:=false;
+        end;
+      end;
+
+
     procedure TExternalAssembler.WriteTree(p:TAsmList);
       begin
       end;
@@ -1558,6 +1611,7 @@ Implementation
         objsym,
         objsymend : TObjSymbol;
         cpu: tcputype;
+        eabi_section, TmpSection: TObjSection;
       begin
         while assigned(hp) do
          begin
@@ -1615,9 +1669,11 @@ Implementation
                                     (objsym.objsection<>ObjData.CurrObjSec) then
                                    InternalError(200404124);
                                end
+{$push} {$R-}{$Q-}
                              else
                                Tai_const(hp).value:=objsymend.address-objsym.address+Tai_const(hp).symofs;
                            end;
+{$pop}
                        end;
                    end;
                  ObjData.alloc(tai_const(hp).size);
@@ -1670,7 +1726,10 @@ Implementation
                end;
              ait_section:
                begin
-                 ObjData.CreateSection(Tai_section(hp).sectype,Tai_section(hp).name^,Tai_section(hp).secorder);
+                 if Tai_section(hp).sectype=sec_user then
+                   ObjData.CreateSection(Tai_section(hp).sectype,Tai_section(hp).secflags,Tai_section(hp).secprogbits,Tai_section(hp).name^,Tai_section(hp).secorder)
+                 else
+                   ObjData.CreateSection(Tai_section(hp).sectype,Tai_section(hp).name^,Tai_section(hp).secorder);
                  Tai_section(hp).sec:=ObjData.CurrObjSec;
                end;
              ait_symbol :
@@ -1694,6 +1753,28 @@ Implementation
              ait_cutobject :
                if SmartAsm then
                 break;
+             ait_eabi_attribute :
+               begin
+                 eabi_section:=ObjData.findsection('.ARM.attributes');
+                 if not(assigned(eabi_section)) then
+                   begin
+                     TmpSection:=ObjData.CurrObjSec;
+                     ObjData.CreateSection(sec_arm_attribute,[],SPB_ARM_ATTRIBUTES,'',secorder_default);
+                     eabi_section:=ObjData.CurrObjSec;
+                     ObjData.setsection(TmpSection);
+                   end;
+                 if eabi_section.Size=0 then
+                   eabi_section.alloc(16);
+                 eabi_section.alloc(LengthUleb128(tai_eabi_attribute(hp).tag));
+                 case tai_eabi_attribute(hp).eattr_typ of
+                   eattrtype_dword:
+                     eabi_section.alloc(LengthUleb128(tai_eabi_attribute(hp).value));
+                   eattrtype_ntbs:
+                     eabi_section.alloc(Length(tai_eabi_attribute(hp).valuestr^)+1);
+                   else
+                     Internalerror(2019100701);
+                 end;
+               end;
              else
                ;
            end;
@@ -1708,6 +1789,7 @@ Implementation
         objsym,
         objsymend : TObjSymbol;
         cpu: tcputype;
+        eabi_section: TObjSection;
       begin
         while assigned(hp) do
          begin
@@ -1762,15 +1844,23 @@ Implementation
                    begin
                      objsym:=Objdata.SymbolRef(tai_const(hp).sym);
                      objsymend:=Objdata.SymbolRef(tai_const(hp).endsym);
-                     if objsymend.objsection<>objsym.objsection then
+                     if Tai_const(hp).consttype in [aitconst_gottpoff,aitconst_tlsgd,aitconst_tlsdesc] then
+                       begin
+                         if objsymend.objsection<>ObjData.CurrObjSec then
+                           Internalerror(2019092801);
+                         Tai_const(hp).value:=objsymend.address-ObjData.CurrObjSec.Size+Tai_const(hp).symofs;
+                       end
+                     else if objsymend.objsection<>objsym.objsection then
                        begin
                          if (Tai_const(hp).consttype in [aitconst_uleb128bit,aitconst_sleb128bit]) or
                             (objsym.objsection<>ObjData.CurrObjSec) then
                            internalerror(200905042);
                        end
+{$push} {$R-}{$Q-}
                      else
                        Tai_const(hp).value:=objsymend.address-objsym.address+Tai_const(hp).symofs;
                    end;
+{$pop}
                  if (Tai_const(hp).consttype in [aitconst_uleb128bit,aitconst_sleb128bit]) then
                    Tai_const(hp).fixsize;
                  ObjData.alloc(tai_const(hp).size);
@@ -1843,6 +1933,23 @@ Implementation
                      internalerror(2010011102);
                  end;
                end;
+             ait_eabi_attribute :
+               begin
+                 eabi_section:=ObjData.findsection('.ARM.attributes');
+                 if not(assigned(eabi_section)) then
+                   Internalerror(2019100702);
+                 if eabi_section.Size=0 then
+                   eabi_section.alloc(16);
+                 eabi_section.alloc(LengthUleb128(tai_eabi_attribute(hp).tag));
+                 case tai_eabi_attribute(hp).eattr_typ of
+                   eattrtype_dword:
+                     eabi_section.alloc(LengthUleb128(tai_eabi_attribute(hp).value));
+                   eattrtype_ntbs:
+                     eabi_section.alloc(Length(tai_eabi_attribute(hp).valuestr^)+1);
+                   else
+                     Internalerror(2019100703);
+                 end;
+               end;
              else
                ;
            end;
@@ -1875,6 +1982,10 @@ Implementation
         ccomp : comp;
         tmp    : word;
         cpu: tcputype;
+        ddword : dword;
+        eabi_section: TObjSection;
+        s: String;
+        TmpDataPos: TObjSectionOfs;
       begin
         fillchar(zerobuf,sizeof(zerobuf),0);
         fillchar(objsym,sizeof(objsym),0);
@@ -1951,7 +2062,7 @@ Implementation
 		       else if sizeof(tai_realconst(hp).value.s80val) = sizeof(single) then
 			 eextended:=float32_to_floatx80(float32(single(tai_realconst(hp).value.s80val)))
 		       else
-			 internalerror(2017091901);
+			 internalerror(2017091903);
                        pdata:=@eextended;
                      end;
            {$pop}
@@ -1981,8 +2092,29 @@ Implementation
                      objsym:=Objdata.SymbolRef(tai_const(hp).sym);
                      objsymend:=Objdata.SymbolRef(tai_const(hp).endsym);
                      relative_reloc:=(objsym.objsection<>objsymend.objsection);
-                     Tai_const(hp).value:=objsymend.address-objsym.address+Tai_const(hp).symofs;
+                     if Tai_const(hp).consttype in [aitconst_gottpoff] then
+                       begin
+                         if objsymend.objsection<>ObjData.CurrObjSec then
+                           Internalerror(2019092802);
+                         Tai_const(hp).value:=objsymend.address-ObjData.CurrObjSec.Size+Tai_const(hp).symofs;
+                       end
+                     else if Tai_const(hp).consttype in [aitconst_tlsgd,aitconst_tlsdesc] then
+                       begin
+                         if objsymend.objsection<>ObjData.CurrObjSec then
+                           Internalerror(2019092803);
+                         Tai_const(hp).value:=ObjData.CurrObjSec.Size-objsymend.address+Tai_const(hp).symofs;
+                       end
+                     else if objsymend.objsection<>objsym.objsection then
+                       begin
+                         if (Tai_const(hp).consttype in [aitconst_uleb128bit,aitconst_sleb128bit]) or
+                            (objsym.objsection<>ObjData.CurrObjSec) then
+                           internalerror(2019010301);
+                       end
+                     else
+{$push} {$R-}{$Q-}
+                       Tai_const(hp).value:=objsymend.address-objsym.address+Tai_const(hp).symofs;
                    end;
+{$pop}
                  case tai_const(hp).consttype of
                    aitconst_64bit,
                    aitconst_32bit,
@@ -2003,7 +2135,7 @@ Implementation
                    aitconst_rva_symbol :
                      begin
                        { PE32+? }
-                       if target_info.system=system_x86_64_win64 then
+                       if target_info.system in systems_peoptplus then
                          ObjData.writereloc(Tai_const(hp).symofs,sizeof(longint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_RVA)
                        else
                          ObjData.writereloc(Tai_const(hp).symofs,sizeof(pint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_RVA);
@@ -2035,7 +2167,23 @@ Implementation
 {$ifdef arm}
                    aitconst_got:
                      ObjData.writereloc(Tai_const(hp).symofs,sizeof(longint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_GOT32);
+{                   aitconst_gottpoff:
+                     ObjData.writereloc(Tai_const(hp).symofs,sizeof(longint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_TPOFF); }
+                   aitconst_tpoff:
+                     ObjData.writereloc(Tai_const(hp).symofs,sizeof(longint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_TPOFF);
+                   aitconst_tlsgd:
+                     ObjData.writereloc(Tai_const(hp).symofs,sizeof(longint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_TLSGD);
+                   aitconst_tlsdesc:
+                     begin
+                       { must be a relative symbol, thus value being valid }
+                       if not(assigned(tai_const(hp).sym)) or not(assigned(tai_const(hp).endsym)) then
+                         Internalerror(2019092904);
+                       ObjData.writereloc(Tai_const(hp).value,sizeof(longint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_TLSDESC);
+                     end;
 {$endif arm}
+                   aitconst_dtpoff:
+                     { so far, the size of dtpoff is fixed to 4 bytes }
+                     ObjData.writereloc(Tai_const(hp).symofs,4,Objdata.SymbolRef(tai_const(hp).sym),RELOC_DTPOFF);
                    aitconst_gotoff_symbol:
                      ObjData.writereloc(Tai_const(hp).symofs,sizeof(longint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_GOTOFF);
                    aitconst_uleb128bit,
@@ -2135,6 +2283,52 @@ Implementation
              ait_seh_directive :
                tai_seh_directive(hp).generate_code(objdata);
 {$endif DISABLE_WIN64_SEH}
+             ait_eabi_attribute :
+               begin
+                 eabi_section:=ObjData.findsection('.ARM.attributes');
+                 if not(assigned(eabi_section)) then
+                   Internalerror(2019100704);
+                 if eabi_section.Size=0 then
+                   begin
+                     s:='A';
+                     eabi_section.write(s[1],1);
+                     ddword:=eabi_section.Size-1;
+                     eabi_section.write(ddword,4);
+                     s:='aeabi'#0;
+                     eabi_section.write(s[1],6);
+                     s:=#1;
+                     eabi_section.write(s[1],1);
+                     ddword:=eabi_section.Size-1-4-6-1;
+                     eabi_section.write(ddword,4);
+                   end;
+                 leblen:=EncodeUleb128(tai_eabi_attribute(hp).tag,lebbuf,0);
+                 eabi_section.write(lebbuf,leblen);
+
+                 case tai_eabi_attribute(hp).eattr_typ of
+                   eattrtype_dword:
+                     begin
+                       leblen:=EncodeUleb128(tai_eabi_attribute(hp).value,lebbuf,0);
+                       eabi_section.write(lebbuf,leblen);
+                     end;
+                   eattrtype_ntbs:
+                     begin
+                       s:=tai_eabi_attribute(hp).valuestr^+#0;
+                       eabi_section.write(s[1],Length(s));
+                     end
+                   else
+                     Internalerror(2019100705);
+                 end;
+                 { update size of attributes section, write directly to the dyn. arrays as
+                   we do not increase the size of section }
+                 TmpDataPos:=eabi_section.Data.Pos;
+                 eabi_section.Data.seek(1);
+                 ddword:=eabi_section.Size-1;
+                 eabi_section.Data.write(ddword,4);
+                 eabi_section.Data.seek(12);
+                 ddword:=eabi_section.Size-1-4-6;
+                 eabi_section.Data.write(ddword,4);
+                 eabi_section.Data.Seek(TmpDataPos);
+               end;
              else
                ;
            end;
@@ -2391,7 +2585,7 @@ Implementation
       var
         asmkind: tasm;
       begin
-        for asmkind in [as_gas,as_ggas,as_darwin] do
+        for asmkind in [as_gas,as_ggas,as_darwin,as_clang_gas,as_clang_asdarwin] do
           if assigned(asminfos[asmkind]) and
              (target_info.system in asminfos[asmkind]^.supported_targets) then
             begin

+ 114 - 30
compiler/avr/aasmcpu.pas

@@ -108,7 +108,7 @@ uses
 
     { replaces cond. branches by rjmp/jmp and the inverse cond. branch if needed
       and transforms special instructions to valid instruction encodings }
-    procedure finalizeavrcode(list : TAsmList);
+    function finalizeavrcode(list : TAsmList) : Boolean;
 
 implementation
 
@@ -355,7 +355,7 @@ implementation
             else
               result:=taicpu.op_reg_ref(A_LD,r,ref);
           else
-            internalerror(200401041);
+            internalerror(2004010413);
         end;
       end;
 
@@ -374,7 +374,7 @@ implementation
             else
               result:=taicpu.op_ref_reg(A_ST,ref,r);
           else
-            internalerror(200401041);
+            internalerror(2004010414);
         end;
       end;
 
@@ -396,15 +396,42 @@ implementation
       end;
 
 
-    procedure finalizeavrcode(list : TAsmList);
+    function finalizeavrcode(list : TAsmList) : Boolean;
       var
         CurrOffset : longint;
-        curtai : tai;
+        curtai, firstinstruction: tai;
         again : boolean;
         l : tasmlabel;
         inasmblock : Boolean;
+
+      procedure remove_instruction;
+        var
+          i: Integer;
+          hp: tai;
+        begin
+          taicpu(firstinstruction).opcode:=A_SLEEP;
+          for i:=0 to taicpu(firstinstruction).opercnt-1 do
+            taicpu(firstinstruction).freeop(i);
+          taicpu(firstinstruction).opercnt:=0;
+          taicpu(firstinstruction).ops:=0;
+          firstinstruction:=tai(firstinstruction.Next);
+          while assigned(firstinstruction) do
+            begin
+              if firstinstruction.typ in [ait_symbol_end,ait_label] then
+                firstinstruction:=tai(firstinstruction.Next)
+              else
+                begin
+                  hp:=tai(firstinstruction.Next);
+                  list.Remove(firstinstruction);
+                  firstinstruction.free;
+                  firstinstruction:=hp;
+                end;
+            end;
+        end;
+
       begin
         again:=true;
+        Result:=true;
         while again do
           begin
             again:=false;
@@ -439,39 +466,96 @@ implementation
 
             curtai:=tai(list.first);
             inasmblock:=false;
+            firstinstruction:=nil;
             while assigned(curtai) do
               begin
                 case curtai.typ of
                   ait_instruction:
-                    case taicpu(curtai).opcode of
-                      A_BRxx:
-                        if (taicpu(curtai).oper[0]^.typ=top_ref) and ((taicpu(curtai).InsOffset-taicpu(curtai).oper[0]^.ref^.symbol.offset>64) or
-                          (taicpu(curtai).InsOffset-taicpu(curtai).oper[0]^.ref^.symbol.offset<-63)) then
+                    begin
+                      if not(assigned(firstinstruction)) then
+                        firstinstruction:=curtai;
+                      case taicpu(curtai).opcode of
+                        A_BRxx:
+                          if (taicpu(curtai).oper[0]^.typ=top_ref) and ((taicpu(curtai).InsOffset-taicpu(curtai).oper[0]^.ref^.symbol.offset>64) or
+                            (taicpu(curtai).InsOffset-taicpu(curtai).oper[0]^.ref^.symbol.offset<-63)) then
+                            begin
+                              if inasmblock then
+                                Message(asmw_e_brxx_out_of_range)
+                              else
+                                begin
+                                  current_asmdata.getjumplabel(l);
+                                  list.insertafter(tai_label.create(l),curtai);
+                                  if CPUAVR_HAS_JMP_CALL in cpu_capabilities[current_settings.cputype] then
+                                    list.insertafter(taicpu.op_sym(A_JMP,taicpu(curtai).oper[0]^.ref^.symbol),curtai)
+                                  else
+                                    list.insertafter(taicpu.op_sym(A_RJMP,taicpu(curtai).oper[0]^.ref^.symbol),curtai);
+                                  taicpu(curtai).oper[0]^.ref^.symbol:=l;
+                                  taicpu(curtai).condition:=inverse_cond(taicpu(curtai).condition);
+                                  again:=true;
+                                end;
+                            end;
+                        A_JMP:
+                          { replace JMP by RJMP? ...
+                            ... but do not mess with asm block }
+                          if not(inasmblock) and (taicpu(curtai).InsOffset-taicpu(curtai).oper[0]^.ref^.symbol.offset<=2048) and
+                          (taicpu(curtai).InsOffset-taicpu(curtai).oper[0]^.ref^.symbol.offset>=-2047) and
+                          { jmps to function go outside the currently considered scope, so do not mess with them.
+                            Those are generated by the peephole optimizer from call/ret sequences }
+                          not(taicpu(curtai).oper[0]^.ref^.symbol.typ=AT_FUNCTION) then
                           begin
-                            if inasmblock then
-                              Message(asmw_e_brxx_out_of_range)
-                            else
+                            taicpu(curtai).opcode:=A_RJMP;
+                            again:=true;
+                          end;
+                        A_STS:
+                          begin
+                            if current_settings.cputype in [cpu_avrtiny, cpu_avr1] then
+                              with taicpu(curtai).oper[0]^ do
+                                if (ref^.base=NR_NO) and (ref^.index=NR_NO) and (ref^.symbol=nil) and (ref^.offset<$40) then
+                                  begin
+                                    taicpu(curtai).opcode:=A_OUT;
+                                    taicpu(curtai).loadconst(0,ref^.offset);
+                                  end
+                                else if current_settings.cputype=cpu_avr1 then
+                                  begin
+                                    remove_instruction;
+                                    result:=false;
+                                  end;
+                          end;
+                        A_LDS:
+                          begin
+                            if current_settings.cputype in [cpu_avrtiny, cpu_avr1] then
+                              with taicpu(curtai).oper[1]^ do
+                                if (ref^.base=NR_NO) and (ref^.index=NR_NO) and (ref^.symbol=nil) and (ref^.offset<$40) then
+                                  begin
+                                    taicpu(curtai).opcode:=A_IN;
+                                    taicpu(curtai).loadconst(1,ref^.offset)
+                                  end
+                                else if current_settings.cputype=cpu_avr1 then
+                                  begin
+                                    remove_instruction;
+                                    result:=false;
+                                  end;
+                          end;
+                        A_SBIW,
+                        A_MULS,
+                        A_ICALL,
+                        A_IJMP,
+                        A_STD,
+                        A_LD,
+                        A_LDD,
+                        A_ST,
+                        A_ROR,
+                        A_POP,
+                        A_PUSH:
+                          begin
+                            { certain cpu types do not support some instructions, so replace them }
+                            if current_settings.cputype=cpu_avr1 then
                               begin
-                                current_asmdata.getjumplabel(l);
-                                list.insertafter(tai_label.create(l),curtai);
-                                list.insertafter(taicpu.op_sym(A_JMP,taicpu(curtai).oper[0]^.ref^.symbol),curtai);
-                                taicpu(curtai).oper[0]^.ref^.symbol:=l;
-                                taicpu(curtai).condition:=inverse_cond(taicpu(curtai).condition);
-                                again:=true;
+                                remove_instruction;
+                                result:=false;
                               end;
                           end;
-                      A_JMP:
-                        { replace JMP by RJMP? ...
-                          ... but do not mess with asm block }
-                        if not(inasmblock) and (taicpu(curtai).InsOffset-taicpu(curtai).oper[0]^.ref^.symbol.offset<=2048) and
-                        (taicpu(curtai).InsOffset-taicpu(curtai).oper[0]^.ref^.symbol.offset>=-2047) and
-                        { jmps to function go outside the currently considered scope, so do not mess with them.
-                          Those are generated by the peephole optimizer from call/ret sequences }
-                        not(taicpu(curtai).oper[0]^.ref^.symbol.typ=AT_FUNCTION) then
-                        begin
-                          taicpu(curtai).opcode:=A_RJMP;
-                          again:=true;
-                        end;
+                      end;
                     end;
                   ait_marker:
                     case tai_marker(curtai).Kind of

+ 4 - 5
compiler/avr/agavrgas.pas

@@ -93,7 +93,7 @@ unit agavrgas;
               //   internalerror(200308293);
   {$endif extdebug}
               if index<>NR_NO then
-                internalerror(2011021701)
+                internalerror(2011021707)
               else if base<>NR_NO then
                 begin
                   if addressmode=AM_PREDRECEMENT then
@@ -122,7 +122,7 @@ unit agavrgas;
               else if assigned(symbol) or (offset<>0) then
                 begin
                   if assigned(symbol) then
-                    s:=ReplaceForbiddenAsmSymbolChars(symbol.name);
+                    s:=ApplyAsmSymbolRestrictions(symbol.name);
 
                   if s='' then
                     s:=tostr(offset)
@@ -154,8 +154,6 @@ unit agavrgas;
       function getopstr(const o:toper) : string;
         var
           hs : string;
-          first : boolean;
-          r : tsuperregister;
         begin
           case o.typ of
             top_reg:
@@ -165,7 +163,7 @@ unit agavrgas;
             top_ref:
               if o.ref^.refaddr=addr_full then
                 begin
-                  hs:=ReplaceForbiddenAsmSymbolChars(o.ref^.symbol.name);
+                  hs:=ApplyAsmSymbolRestrictions(o.ref^.symbol.name);
                   if o.ref^.offset>0 then
                     hs:=hs+'+'+tostr(o.ref^.offset)
                   else if o.ref^.offset<0 then
@@ -216,6 +214,7 @@ unit agavrgas;
             supported_targets : [system_avr_embedded];
             flags : [af_needar,af_smartlink_sections];
             labelprefix : '.L';
+            labelmaxlen : -1;
             comment : '# ';
             dollarsign: 's';
           );

+ 94 - 29
compiler/avr/aoptcpu.pas

@@ -258,7 +258,6 @@ Implementation
 
     var
       hp1, hp2, hp3: tai;
-      s: string;
     begin
       result:=false;
 
@@ -318,9 +317,9 @@ Implementation
               GetNextInstruction(p, hp1) and
               ((MatchInstruction(hp1, A_CP) and
                 (((taicpu(p).oper[0]^.reg = taicpu(hp1).oper[0]^.reg) and
-                  (taicpu(hp1).oper[1]^.reg = NR_R1)) or
+                  (taicpu(hp1).oper[1]^.reg = GetDefaultZeroReg)) or
                  ((taicpu(p).oper[0]^.reg = taicpu(hp1).oper[1]^.reg) and
-                  (taicpu(hp1).oper[0]^.reg = NR_R1) and
+                  (taicpu(hp1).oper[0]^.reg = GetDefaultZeroReg) and
                   (taicpu(p).opcode in [A_ADC,A_ADD,A_AND,A_ANDI,A_ASR,A_COM,A_EOR,
                                         A_LSL,A_LSR,
                                         A_OR,A_ORI,A_ROL,A_ROR,A_SUB,A_SBI])))) or
@@ -358,7 +357,7 @@ Implementation
                 // If we compare to the same value we are masking then invert the comparison
                 if (taicpu(hp1).opcode=A_CPI) or
                   { sub/sbc with reverted? }
-                  ((taicpu(hp1).oper[0]^.reg = NR_R1) and (taicpu(p).opcode in [A_SUB,A_SBI])) then
+                  ((taicpu(hp1).oper[0]^.reg = GetDefaultZeroReg) and (taicpu(p).opcode in [A_SUB,A_SBI])) then
                   taicpu(hp2).condition:=inverse_cond(taicpu(hp2).condition);
 
                 asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
@@ -377,14 +376,14 @@ Implementation
                   begin
                     { turn
                       ldi reg0, imm
-                      cp/mov reg1, reg0
+                      <op> reg1, reg0
                       dealloc reg0
                       into
-                      cpi/ldi reg1, imm
+                      <op>i reg1, imm
                     }
                     if MatchOpType(taicpu(p),top_reg,top_const) and
                        GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
-                       MatchInstruction(hp1,[A_CP,A_MOV],2) and
+                       MatchInstruction(hp1,[A_CP,A_MOV,A_AND,A_SUB],2) and
                        (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
                        MatchOpType(taicpu(hp1),top_reg,top_reg) and
                        (getsupreg(taicpu(hp1).oper[0]^.reg) in [16..31]) and
@@ -392,6 +391,8 @@ Implementation
                        not(MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^)) then
                       begin
                         TransferUsedRegs(TmpUsedRegs);
+                        UpdateUsedRegs(TmpUsedRegs,tai(p.next));
+                        UpdateUsedRegs(TmpUsedRegs,tai(hp1.next));
                         if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg, hp1, TmpUsedRegs)) then
                           begin
                             case taicpu(hp1).opcode of
@@ -399,6 +400,10 @@ Implementation
                                 taicpu(hp1).opcode:=A_CPI;
                               A_MOV:
                                 taicpu(hp1).opcode:=A_LDI;
+                              A_AND:
+                                taicpu(hp1).opcode:=A_ANDI;
+                              A_SUB:
+                                taicpu(hp1).opcode:=A_SUBI;
                               else
                                 internalerror(2016111901);
                             end;
@@ -415,9 +420,9 @@ Implementation
                                 dealloc.Free;
                               end;
 
-                            DebugMsg('Peephole LdiMov/Cp2Ldi/Cpi performed', p);
+                            DebugMsg('Peephole LdiOp2Opi performed', p);
 
-                            RemoveCurrentP(p);
+                            result:=RemoveCurrentP(p);
                           end;
                       end;
                   end;
@@ -427,13 +432,21 @@ Implementation
                     (getsupreg(taicpu(p).oper[0]^.ref^.base)=RS_NO) and
                     (getsupreg(taicpu(p).oper[0]^.ref^.index)=RS_NO) and
                     (taicpu(p).oper[0]^.ref^.addressmode=AM_UNCHANGED) and
-                    (taicpu(p).oper[0]^.ref^.offset>=32) and
-                    (taicpu(p).oper[0]^.ref^.offset<=95) then
+                    (((CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
+                      (taicpu(p).oper[0]^.ref^.offset>=0) and
+                      (taicpu(p).oper[0]^.ref^.offset<=63)) or
+                     (not(CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
+                      (taicpu(p).oper[0]^.ref^.offset>=32) and
+                      (taicpu(p).oper[0]^.ref^.offset<=95))) then
                     begin
                       DebugMsg('Peephole Sts2Out performed', p);
 
                       taicpu(p).opcode:=A_OUT;
-                      taicpu(p).loadconst(0,taicpu(p).oper[0]^.ref^.offset-32);
+                      if CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype] then
+                        taicpu(p).loadconst(0,taicpu(p).oper[0]^.ref^.offset)
+                      else
+                        taicpu(p).loadconst(0,taicpu(p).oper[0]^.ref^.offset-32);
+                      result:=true;
                     end;
                 A_LDS:
                   if (taicpu(p).oper[1]^.ref^.symbol=nil) and
@@ -441,13 +454,22 @@ Implementation
                     (getsupreg(taicpu(p).oper[1]^.ref^.base)=RS_NO) and
                     (getsupreg(taicpu(p).oper[1]^.ref^.index)=RS_NO) and
                     (taicpu(p).oper[1]^.ref^.addressmode=AM_UNCHANGED) and
-                    (taicpu(p).oper[1]^.ref^.offset>=32) and
-                    (taicpu(p).oper[1]^.ref^.offset<=95) then
+                    (((CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
+                      (taicpu(p).oper[1]^.ref^.offset>=0) and
+                      (taicpu(p).oper[1]^.ref^.offset<=63)) or
+                     (not(CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype]) and
+                      (taicpu(p).oper[1]^.ref^.offset>=32) and
+                      (taicpu(p).oper[1]^.ref^.offset<=95))) then
                     begin
                       DebugMsg('Peephole Lds2In performed', p);
 
                       taicpu(p).opcode:=A_IN;
-                      taicpu(p).loadconst(1,taicpu(p).oper[1]^.ref^.offset-32);
+                      if CPUAVR_NOMEMMAPPED_REGS in cpu_capabilities[current_settings.cputype] then
+                        taicpu(p).loadconst(1,taicpu(p).oper[1]^.ref^.offset)
+                      else
+                        taicpu(p).loadconst(1,taicpu(p).oper[1]^.ref^.offset-32);
+
+                      result:=true;
                     end;
                 A_IN:
                     if GetNextInstruction(p,hp1) then
@@ -653,18 +675,19 @@ Implementation
                   end;
                 A_ADD:
                   begin
-                    if (taicpu(p).oper[1]^.reg=NR_R1) and
+                    if (taicpu(p).oper[1]^.reg=GetDefaultZeroReg) and
                     GetNextInstruction(p, hp1) and
                     MatchInstruction(hp1,A_ADC) then
                     begin
                       DebugMsg('Peephole AddAdc2Add performed', p);
 
-                      result:=RemoveCurrentP(p);
+                      RemoveCurrentP(p, hp1);
+                      Result := True;
                     end;
                   end;
                 A_SUB:
                   begin
-                    if (taicpu(p).oper[1]^.reg=NR_R1) and
+                    if (taicpu(p).oper[1]^.reg=GetDefaultZeroReg) and
                     GetNextInstruction(p, hp1) and
                     MatchInstruction(hp1,A_SBC) then
                     begin
@@ -672,7 +695,8 @@ Implementation
 
                       taicpu(hp1).opcode:=A_SUB;
 
-                      result:=RemoveCurrentP(p);
+                      RemoveCurrentP(p, hp1);
+                      Result := True;
                     end;
                   end;
                 A_CLR:
@@ -719,7 +743,7 @@ Implementation
                       begin
                         DebugMsg('Peephole ClrAdc2Adc performed', p);
 
-                        taicpu(hp1).oper[1]^.reg:=NR_R1;
+                        taicpu(hp1).oper[1]^.reg:=GetDefaultZeroReg;
 
                         alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
                         dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
@@ -762,7 +786,8 @@ Implementation
                        GetNextInstruction(hp2,hp3) and
                        MatchInstruction(hp3,A_POP) then
                       begin
-                       if (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and
+                       if (CPUAVR_HAS_MOVW in cpu_capabilities[current_settings.cputype]) and
+                         (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and
                          ((getsupreg(taicpu(p).oper[0]^.reg) mod 2)=0) and
                          (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp3).oper[0]^.reg)+1) and
                          ((getsupreg(taicpu(hp3).oper[0]^.reg) mod 2)=0) then
@@ -774,9 +799,23 @@ Implementation
 
                            taicpu(hp3).loadreg(1, taicpu(p).oper[0]^.reg);
 
-                           RemoveCurrentP(p);
-                           RemoveCurrentP(p);
-                           result:=RemoveCurrentP(p);
+                           { We're removing 3 concurrent instructions.  Remove hp1
+                             and hp2 manually instead of calling RemoveCurrentP
+                             as this means we won't be calling UpdateUsedRegs 3 times }
+                           asml.Remove(hp1);
+                           hp1.Free;
+
+                           asml.Remove(hp2);
+                           hp2.Free;
+
+                           { By removing p last, we've guaranteed that p.Next is
+                             valid (storing it prior to removing the instructions
+                             may result in a dangling pointer if hp1 immediately
+                             follows p), and because hp1, hp2 and hp3 came from
+                             sequential calls to GetNextInstruction, it is
+                             guaranteed that UpdateUsedRegs will stop at hp3. [Kit] }
+                           RemoveCurrentP(p, hp3);
+                           Result := True;
                          end
                        else
                          begin
@@ -862,7 +901,8 @@ Implementation
                           not(MatchInstruction(hp1,[A_CALL,A_RCALL])) then
                           begin
                             DebugMsg('Peephole Mov2Nop performed', p);
-                            result:=RemoveCurrentP(p);
+                            RemoveCurrentP(p, hp1);
+                            Result := True;
                             exit;
                           end;
                       end;
@@ -1063,20 +1103,45 @@ Implementation
                       mov rX,...
                       mov rX,...
                     }
-                    else if GetNextInstruction(p,hp1) and MatchInstruction(hp1,A_MOV) then
+                    else if GetNextInstruction(p,hp1) and MatchInstruction(hp1,A_MOV) and
+                      { test condition here already instead in the while loop only, else MovMov2Mov 2 might be oversight }
+                      MatchInstruction(hp1,A_MOV) and
+                      MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) then
                       while MatchInstruction(hp1,A_MOV) and
                             MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
                             { don't remove the first mov if the second is a mov rX,rX }
                             not(MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^)) do
                         begin
-                          DebugMsg('Peephole MovMov2Mov performed', p);
+                          DebugMsg('Peephole MovMov2Mov 1 performed', p);
 
-                          result:=RemoveCurrentP(p);
+                          RemoveCurrentP(p,hp1);
+                          Result := True;
 
                           GetNextInstruction(hp1,hp1);
                           if not assigned(hp1) then
                             break;
-                        end;
+                        end
+                    {
+                      This removes the second mov from
+                      mov rX,rY
+
+                      ...
+
+                      mov rX,rY
+
+                      if rX and rY are not modified in-between
+                    }
+                    else if GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[1]^.reg) and
+                      MatchInstruction(hp1,A_MOV) and
+                      MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^) and
+                      MatchOperand(taicpu(p).oper[1]^, taicpu(hp1).oper[1]^) and
+                      not(RegModifiedBetween(taicpu(p).oper[0]^.reg,p,hp1)) then
+                      begin
+                        DebugMsg('Peephole MovMov2Mov 2 performed', p);
+                        AllocRegBetween(taicpu(p).oper[0]^.reg,p,hp1,UsedRegs);
+                        RemoveInstruction(hp1);
+                        Result := True;
+                      end;
                   end;
                 A_SBIC,
                 A_SBIS:

+ 8 - 7
compiler/avr/ccpuinnr.inc

@@ -12,10 +12,11 @@
 
  **********************************************************************}
 
-  in_avr_cli = fpc_in_cpu_first,
-  in_avr_sei = fpc_in_cpu_first+1,
-  in_avr_wdr = fpc_in_cpu_first+2,
-  in_avr_sleep = fpc_in_cpu_first+3,
-  in_avr_nop = fpc_in_cpu_first+4,
-  in_avr_save = fpc_in_cpu_first+5,
-  in_avr_restore = fpc_in_cpu_first+6
+  in_avr_cli = in_cpu_first,
+  in_avr_sei = in_cpu_first+1,
+  in_avr_wdr = in_cpu_first+2,
+  in_avr_sleep = in_cpu_first+3,
+  in_avr_nop = in_cpu_first+4,
+  in_avr_save = in_cpu_first+5,
+  in_avr_restore = in_cpu_first+6
+

File diff suppressed because it is too large
+ 318 - 197
compiler/avr/cgcpu.pas


+ 46 - 7
compiler/avr/cpubase.pas

@@ -111,8 +111,6 @@ unit cpubase;
       first_mm_supreg    = RS_INVALID;
       first_mm_imreg     = 0;
 
-      regnumber_count_bsstart = 32;
-
       regnumber_table : array[tregisterindex] of tregister = (
         {$i ravrnum.inc}
       );
@@ -128,9 +126,6 @@ unit cpubase;
       VOLATILE_INTREGISTERS = [RS_R0,RS_R1,RS_R18..RS_R27,RS_R30,RS_R31];
       VOLATILE_FPUREGISTERS = [];
 
-    type
-      totherregisterset = set of tregisterindex;
-
 {*****************************************************************************
                                 Conditions
 *****************************************************************************}
@@ -232,8 +227,8 @@ unit cpubase;
 *****************************************************************************}
 
       { Stack pointer register }
-      NR_STACK_POINTER_REG = NR_R13;
-      RS_STACK_POINTER_REG = RS_R13;
+      NR_STACK_POINTER_REG = NR_INVALID;
+      RS_STACK_POINTER_REG = RS_INVALID;
       { Frame pointer register }
       RS_FRAME_POINTER_REG = RS_R28;
       NR_FRAME_POINTER_REG = NR_R28;
@@ -303,6 +298,9 @@ unit cpubase;
     function inverse_cond(const c: TAsmCond): TAsmCond; {$ifdef USEINLINE}inline;{$endif USEINLINE}
     function conditions_equal(const c1, c2: TAsmCond): boolean; {$ifdef USEINLINE}inline;{$endif USEINLINE}
 
+    { Checks if Subset is a subset of c (e.g. "less than" is a subset of "less than or equal" }
+    function condition_in(const Subset, c: TAsmCond): Boolean;
+
     function dwarf_reg(r:tregister):byte;
     function dwarf_reg_no_error(r:tregister):shortint;
     function eh_return_data_regno(nr: longint): longint;
@@ -310,6 +308,9 @@ unit cpubase;
 
     function is_calljmp(o:tasmop):boolean;{$ifdef USEINLINE}inline;{$endif USEINLINE}
 
+    function GetDefaultTmpReg : TRegister;
+    function GetDefaultZeroReg : TRegister;
+
   implementation
 
     uses
@@ -413,6 +414,24 @@ unit cpubase;
       end;
 
 
+    { Checks if Subset is a subset of c (e.g. "less than" is a subset of "less than or equal" }
+    function condition_in(const Subset, c: TAsmCond): Boolean;
+      begin
+        Result := (c = C_None) or conditions_equal(Subset, c);
+
+        { Please update as necessary. [Kit] }
+        if not Result then
+          case Subset of
+            C_EQ:
+              Result := (c in [C_GE]);
+            C_LT:
+              Result := (c in [C_NE]);
+            else
+              Result := False;
+          end;
+      end;
+
+
     function rotl(d : dword;b : byte) : dword;
       begin
          result:=(d shr (32-b)) or (d shl b);
@@ -429,11 +448,13 @@ unit cpubase;
         result:=reg;
       end;
 
+
     function dwarf_reg_no_error(r:tregister):shortint;
       begin
         result:=regdwarf_table[findreg_by_number(r)];
       end;
 
+
     function eh_return_data_regno(nr: longint): longint;
       begin
         result:=-1;
@@ -446,4 +467,22 @@ unit cpubase;
       end;
 
 
+    function GetDefaultTmpReg: TRegister;
+      begin
+        if CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype] then
+          Result:=NR_R16
+        else
+          Result:=NR_R0;
+      end;
+
+
+    function GetDefaultZeroReg: TRegister;
+      begin
+        if CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype] then
+          Result:=NR_R17
+        else
+          Result:=NR_R1;
+      end;
+
+
 end.

+ 442 - 284
compiler/avr/cpuinfo.pas

@@ -37,6 +37,7 @@ Type
    { possible supported processors for this target }
    tcputype =
       (cpu_none,
+       cpu_avrtiny,
        cpu_avr1,
        cpu_avr2,
        cpu_avr25,
@@ -46,7 +47,8 @@ Type
        cpu_avr4,
        cpu_avr5,
        cpu_avr51,
-       cpu_avr6
+       cpu_avr6,
+       cpu_avrxmega3
       );
 
    tfputype =
@@ -60,150 +62,225 @@ Type
 
       ct_avrsim,
 
-      ct_atmega645,
-      ct_atmega165a,
-      ct_attiny44a,
-      ct_atmega649a,
-      ct_atmega32u4,
-      ct_attiny26,
-      ct_at90usb1287,
+      ct_at90can32,
+      ct_at90can64,
+      ct_at90can128,
+      ct_at90pwm1,
+      ct_at90pwm2b,
+      ct_at90pwm3b,
+      ct_at90pwm81,
       ct_at90pwm161,
-      ct_attiny48,
-      ct_atmega168p,
-      ct_attiny10,
-      ct_attiny84a,
+      ct_at90pwm216,
+      ct_at90pwm316,
       ct_at90usb82,
-      ct_attiny2313,
-      ct_attiny461,
-      ct_atmega3250pa,
-      ct_atmega3290a,
-      ct_atmega165p,
-      ct_attiny43u,
       ct_at90usb162,
-      ct_atmega16u4,
-      ct_attiny24a,
-      ct_atmega88p,
-      ct_attiny88,
-      ct_atmega6490p,
-      ct_attiny40,
-      ct_atmega324p,
-      ct_attiny167,
-      ct_atmega328,
-      ct_attiny861,
-      ct_attiny85,
-      ct_atmega64m1,
-      ct_atmega645p,
-      ct_atmega8u2,
-      ct_atmega329a,
-      ct_atmega8a,
-      ct_atmega324pa,
-      ct_atmega32hvb,
-      ct_at90pwm316,
-      ct_at90pwm3b,
       ct_at90usb646,
-      ct_attiny20,
-      ct_atmega16,
-      ct_atmega48a,
-      ct_attiny24,
-      ct_atmega644,
-      ct_atmega1284,
+      ct_at90usb647,
+      ct_at90usb1286,
+      ct_at90usb1287,
       ct_ata6285,
-      ct_at90can64,
-      ct_atmega48,
-      ct_at90can32,
-      ct_attiny9,
-      ct_attiny87,
-      ct_atmega1281,
-      ct_at90pwm216,
-      ct_atmega3250a,
-      ct_atmega88a,
-      ct_atmega128rfa1,
-      ct_atmega3290pa,
-      ct_at90pwm81,
-      ct_atmega325p,
-      ct_attiny84,
-      ct_atmega328p,
-      ct_attiny13a,
+      ct_ata6286,
       ct_atmega8,
-      ct_atmega1284p,
+      ct_atmega8a,
+      ct_atmega8hva,
+      ct_atmega8u2,
+      ct_atmega16,
+      ct_atmega16a,
+      ct_atmega16hva,
+      ct_atmega16hvb,
+      ct_atmega16hvbrevb,
+      ct_atmega16m1,
       ct_atmega16u2,
-      ct_attiny45,
-      ct_atmega3250,
-      ct_atmega329,
+      ct_atmega16u4,
+      ct_atmega32,
       ct_atmega32a,
-      ct_attiny5,
-      ct_at90can128,
-      ct_atmega6490,
-      ct_atmega8515,
+      ct_atmega32c1,
+      ct_atmega32hvb,
+      ct_atmega32hvbrevb,
+      ct_atmega32m1,
+      ct_atmega32u2,
+      ct_atmega32u4,
+      ct_atmega48,
+      ct_atmega48a,
+      ct_atmega48p,
+      ct_atmega48pa,
+      ct_atmega48pb,
+      ct_atmega64,
+      ct_atmega64a,
+      ct_atmega64c1,
+      ct_atmega64hve2,
+      ct_atmega64m1,
+      ct_atmega64rfr2,
+      ct_atmega88,
+      ct_atmega88a,
+      ct_atmega88p,
       ct_atmega88pa,
-      ct_atmega168a,
+      ct_atmega88pb,
       ct_atmega128,
-      ct_at90usb1286,
-      ct_atmega164pa,
-      ct_attiny828,
-      ct_atmega88,
-      ct_atmega645a,
-      ct_atmega3290p,
-      ct_atmega644p,
-      ct_atmega164a,
-      ct_attiny4313,
-      ct_atmega162,
-      ct_atmega32c1,
       ct_atmega128a,
-      ct_atmega324a,
-      ct_attiny13,
-      ct_atmega2561,
+      ct_atmega128rfa1,
+      ct_atmega128rfr2,
+      ct_atmega162,
+      ct_atmega164a,
+      ct_atmega164p,
+      ct_atmega164pa,
+      ct_atmega165a,
+      ct_atmega165p,
+      ct_atmega165pa,
+      ct_atmega168,
+      ct_atmega168a,
+      ct_atmega168p,
+      ct_atmega168pa,
+      ct_atmega168pb,
       ct_atmega169a,
-      ct_attiny261,
-      ct_atmega644a,
-      ct_atmega3290,
-      ct_atmega64a,
       ct_atmega169p,
-      ct_atmega2560,
-      ct_atmega32,
-      ct_attiny861a,
-      ct_attiny28,
-      ct_atmega48p,
-      ct_atmega8535,
-      ct_atmega168pa,
-      ct_atmega16m1,
-      ct_atmega16hvb,
-      ct_atmega164p,
+      ct_atmega169pa,
+      ct_atmega256rfr2,
+      ct_atmega324a,
+      ct_atmega324p,
+      ct_atmega324pa,
+      ct_atmega324pb,
+      ct_atmega325,
       ct_atmega325a,
+      ct_atmega325p,
+      ct_atmega325pa,
+      ct_atmega328,
+      ct_atmega328p,
+      ct_atmega328pb,
+      ct_atmega329,
+      ct_atmega329a,
+      ct_atmega329p,
+      ct_atmega329pa,
+      ct_atmega406,
       ct_atmega640,
+      ct_atmega644,
+      ct_atmega644a,
+      ct_atmega644p,
+      ct_atmega644pa,
+      ct_atmega644rfr2,
+      ct_atmega645,
+      ct_atmega645a,
+      ct_atmega645p,
+      ct_atmega649,
+      ct_atmega649a,
+      ct_atmega649p,
+      ct_atmega808,
+      ct_atmega809,
+      ct_atmega1280,
+      ct_atmega1281,
+      ct_atmega1284,
+      ct_atmega1284p,
+      ct_atmega1284rfr2,
+      ct_atmega1608,
+      ct_atmega1609,
+      ct_atmega2560,
+      ct_atmega2561,
+      ct_atmega2564rfr2,
+      ct_atmega3208,
+      ct_atmega3209,
+      ct_atmega3250,
+      ct_atmega3250a,
+      ct_atmega3250p,
+      ct_atmega3250pa,
+      ct_atmega3290,
+      ct_atmega3290a,
+      ct_atmega3290p,
+      ct_atmega3290pa,
+      ct_atmega4808,
+      ct_atmega4809,
       ct_atmega6450,
-      ct_atmega329p,
-      ct_ata6286,
-      ct_at90usb647,
-      ct_atmega168,
+      ct_atmega6450a,
+      ct_atmega6450p,
+      ct_atmega6490,
       ct_atmega6490a,
-      ct_atmega32m1,
-      ct_atmega64c1,
-      ct_atmega32u2,
+      ct_atmega6490p,
+      ct_atmega8515,
+      ct_atmega8535,
       ct_attiny4,
-      ct_atmega644pa,
-      ct_at90pwm1,
+      ct_attiny5,
+      ct_attiny9,
+      ct_attiny10,
+      ct_attiny11,
+      ct_attiny12,
+      ct_attiny13,
+      ct_attiny13a,
+      ct_attiny15,
+      ct_attiny20,
+      ct_attiny24,
+      ct_attiny24a,
+      ct_attiny25,
+      ct_attiny26,
+      ct_attiny28,
+      ct_attiny40,
+      ct_attiny43u,
       ct_attiny44,
-      ct_atmega325pa,
-      ct_atmega6450a,
-      ct_attiny2313a,
-      ct_atmega329pa,
+      ct_attiny44a,
+      ct_attiny45,
+      ct_attiny48,
+      ct_attiny84,
+      ct_attiny84a,
+      ct_attiny85,
+      ct_attiny87,
+      ct_attiny88,
+      ct_attiny102,
+      ct_attiny104,
+      ct_attiny167,
+      ct_attiny202,
+      ct_attiny204,
+      ct_attiny212,
+      ct_attiny214,
+      ct_attiny261,
+      ct_attiny261a,
+      ct_attiny402,
+      ct_attiny404,
+      ct_attiny406,
+      ct_attiny412,
+      ct_attiny414,
+      ct_attiny416,
+      ct_attiny416auto,
+      ct_attiny417,
+      ct_attiny441,
+      ct_attiny461,
       ct_attiny461a,
-      ct_atmega6450p,
-      ct_atmega64,
-      ct_atmega165pa,
-      ct_atmega16a,
-      ct_atmega649,
-      ct_atmega1280,
-      ct_at90pwm2b,
-      ct_atmega649p,
-      ct_atmega3250p,
-      ct_atmega48pa,
+      ct_attiny804,
+      ct_attiny806,
+      ct_attiny807,
+      ct_attiny814,
+      ct_attiny816,
+      ct_attiny817,
+      ct_attiny828,
+      ct_attiny841,
+      ct_attiny861,
+      ct_attiny861a,
+      ct_attiny1604,
+      ct_attiny1606,
+      ct_attiny1607,
+      ct_attiny1614,
+      ct_attiny1616,
+      ct_attiny1617,
+      ct_attiny1624,
+      ct_attiny1626,
+      ct_attiny1627,
       ct_attiny1634,
-      ct_atmega325,
-      ct_atmega169pa,
-      ct_attiny261a,
-      ct_attiny25
+      ct_attiny2313,
+      ct_attiny2313a,
+      ct_attiny3214,
+      ct_attiny3216,
+      ct_attiny3217,
+      ct_attiny4313,
+      // Controller board aliases
+      ct_arduinoleonardo,
+      ct_arduinomega,
+      ct_arduinomicro,
+      ct_arduinonano,
+      ct_arduinonanoevery,
+      ct_arduinouno,
+      ct_atmega256rfr2xpro,
+      ct_atmega324pbxpro,
+      ct_atmega1284pxplained,
+      ct_atmega4809xpro,
+      ct_attiny817xpro,
+      ct_attiny3217xpro
      );
 
    tcontrollerdatatype = record
@@ -234,7 +311,8 @@ Const
      pocall_softfloat
    ];
 
-   cputypestr : array[tcputype] of string[5] = ('',
+   cputypestr : array[tcputype] of string[9] = ('',
+     'AVRTINY',
      'AVR1',
      'AVR2',
      'AVR25',
@@ -244,7 +322,8 @@ Const
      'AVR4',
      'AVR5',
      'AVR51',
-     'AVR6'
+     'AVR6',
+     'AVRXMEGA3'
    );
 
    fputypestr : array[tfputype] of string[6] = (
@@ -282,150 +361,225 @@ Const
         eeprombase:0;
         eepromsize:4096;
         )
-        ,(controllertypestr:'ATMEGA645'; controllerunitstr:'ATMEGA645'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA165A'; controllerunitstr:'ATMEGA165A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY44A'; controllerunitstr:'ATTINY44A'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:96; sramsize:256; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA649A'; controllerunitstr:'ATMEGA649A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA32U4'; controllerunitstr:'ATMEGA32U4'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2560; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY26'; controllerunitstr:'ATTINY26'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:96; sramsize:128; eeprombase:0; eepromsize:128)
-        ,(controllertypestr:'AT90USB1287'; controllerunitstr:'AT90USB1287'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:256; sramsize:8192; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'AT90PWM161'; controllerunitstr:'AT90PWM161'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY48'; controllerunitstr:'ATTINY48'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:256; sramsize:256; eeprombase:0; eepromsize:64)
-        ,(controllertypestr:'ATMEGA168P'; controllerunitstr:'ATMEGA168P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY10'; controllerunitstr:'ATTINY10'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:1024; srambase:64; sramsize:32; eeprombase:0; eepromsize:0)
-        ,(controllertypestr:'ATTINY84A'; controllerunitstr:'ATTINY84A'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'AT90USB82'; controllerunitstr:'AT90USB82'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY2313'; controllerunitstr:'ATTINY2313'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:96; sramsize:128; eeprombase:0; eepromsize:128)
-        ,(controllertypestr:'ATTINY461'; controllerunitstr:'ATTINY461'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:96; sramsize:256; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA3250PA'; controllerunitstr:'ATMEGA3250PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA3290A'; controllerunitstr:'ATMEGA3290A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA165P'; controllerunitstr:'ATMEGA165P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY43U'; controllerunitstr:'ATTINY43U'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:96; sramsize:256; eeprombase:0; eepromsize:64)
-        ,(controllertypestr:'AT90USB162'; controllerunitstr:'AT90USB162'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA16U4'; controllerunitstr:'ATMEGA16U4'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1280; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY24A'; controllerunitstr:'ATTINY24A'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:96; sramsize:128; eeprombase:0; eepromsize:128)
-        ,(controllertypestr:'ATMEGA88P'; controllerunitstr:'ATMEGA88P'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY88'; controllerunitstr:'ATTINY88'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:64)
-        ,(controllertypestr:'ATMEGA6490P'; controllerunitstr:'ATMEGA6490P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATTINY40'; controllerunitstr:'ATTINY40'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:64; sramsize:256; eeprombase:0; eepromsize:0)
-        ,(controllertypestr:'ATMEGA324P'; controllerunitstr:'ATMEGA324P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY167'; controllerunitstr:'ATTINY167'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA328'; controllerunitstr:'ATMEGA328'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY861'; controllerunitstr:'ATTINY861'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY85'; controllerunitstr:'ATTINY85'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA64M1'; controllerunitstr:'ATMEGA64M1'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA645P'; controllerunitstr:'ATMEGA645P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA8U2'; controllerunitstr:'ATMEGA8U2'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA329A'; controllerunitstr:'ATMEGA329A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA8A'; controllerunitstr:'ATMEGA8A'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA324PA'; controllerunitstr:'ATMEGA324PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA32HVB'; controllerunitstr:'ATMEGA32HVB'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'AT90PWM316'; controllerunitstr:'AT90PWM316'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'AT90PWM3B'; controllerunitstr:'AT90PWM3B'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'AT90USB646'; controllerunitstr:'AT90USB646'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATTINY20'; controllerunitstr:'ATTINY20'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:64; sramsize:128; eeprombase:0; eepromsize:0)
-        ,(controllertypestr:'ATMEGA16'; controllerunitstr:'ATMEGA16'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:96; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA48A'; controllerunitstr:'ATMEGA48A'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:256; sramsize:512; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATTINY24'; controllerunitstr:'ATTINY24'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:96; sramsize:128; eeprombase:0; eepromsize:128)
-        ,(controllertypestr:'ATMEGA644'; controllerunitstr:'ATMEGA644'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA1284'; controllerunitstr:'ATMEGA1284'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:256; sramsize:16384; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATA6285'; controllerunitstr:'ATA6285'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:320)
-        ,(controllertypestr:'AT90CAN64'; controllerunitstr:'AT90CAN64'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA48'; controllerunitstr:'ATMEGA48'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:256; sramsize:512; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'AT90CAN32'; controllerunitstr:'AT90CAN32'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY9'; controllerunitstr:'ATTINY9'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:1024; srambase:64; sramsize:32; eeprombase:0; eepromsize:0)
-        ,(controllertypestr:'ATTINY87'; controllerunitstr:'ATTINY87'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA1281'; controllerunitstr:'ATMEGA1281'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:512; sramsize:8192; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'AT90PWM216'; controllerunitstr:'AT90PWM216'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA3250A'; controllerunitstr:'ATMEGA3250A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA88A'; controllerunitstr:'ATMEGA88A'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA128RFA1'; controllerunitstr:'ATMEGA128RFA1'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:512; sramsize:16384; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATMEGA3290PA'; controllerunitstr:'ATMEGA3290PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'AT90PWM81'; controllerunitstr:'AT90PWM81'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:256; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA325P'; controllerunitstr:'ATMEGA325P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY84'; controllerunitstr:'ATTINY84'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA328P'; controllerunitstr:'ATMEGA328P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY13A'; controllerunitstr:'ATTINY13A'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:1024; srambase:96; sramsize:64; eeprombase:0; eepromsize:64)
-        ,(controllertypestr:'ATMEGA8'; controllerunitstr:'ATMEGA8'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA1284P'; controllerunitstr:'ATMEGA1284P'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:256; sramsize:16384; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATMEGA16U2'; controllerunitstr:'ATMEGA16U2'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY45'; controllerunitstr:'ATTINY45'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:96; sramsize:256; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA3250'; controllerunitstr:'ATMEGA3250'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA329'; controllerunitstr:'ATMEGA329'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA32A'; controllerunitstr:'ATMEGA32A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:96; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY5'; controllerunitstr:'ATTINY5'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:512; srambase:64; sramsize:32; eeprombase:0; eepromsize:0)
-        ,(controllertypestr:'AT90CAN128'; controllerunitstr:'AT90CAN128'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:256; sramsize:4096; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATMEGA6490'; controllerunitstr:'ATMEGA6490'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA8515'; controllerunitstr:'ATMEGA8515'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA88PA'; controllerunitstr:'ATMEGA88PA'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA168A'; controllerunitstr:'ATMEGA168A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA128'; controllerunitstr:'ATMEGA128'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:256; sramsize:4096; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'AT90USB1286'; controllerunitstr:'AT90USB1286'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:256; sramsize:8192; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATMEGA164PA'; controllerunitstr:'ATMEGA164PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY828'; controllerunitstr:'ATTINY828'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA88'; controllerunitstr:'ATMEGA88'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA645A'; controllerunitstr:'ATMEGA645A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA3290P'; controllerunitstr:'ATMEGA3290P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA644P'; controllerunitstr:'ATMEGA644P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA164A'; controllerunitstr:'ATMEGA164A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY4313'; controllerunitstr:'ATTINY4313'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:96; sramsize:256; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA162'; controllerunitstr:'ATMEGA162'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA32C1'; controllerunitstr:'ATMEGA32C1'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA128A'; controllerunitstr:'ATMEGA128A'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:256; sramsize:4096; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATMEGA324A'; controllerunitstr:'ATMEGA324A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY13'; controllerunitstr:'ATTINY13'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:1024; srambase:96; sramsize:64; eeprombase:0; eepromsize:64)
-        ,(controllertypestr:'ATMEGA2561'; controllerunitstr:'ATMEGA2561'; cputype: cpu_avr6; fputype:fpu_soft; flashbase:0; flashsize:262144; srambase:512; sramsize:8192; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATMEGA169A'; controllerunitstr:'ATMEGA169A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY261'; controllerunitstr:'ATTINY261'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:96; sramsize:128; eeprombase:0; eepromsize:128)
-        ,(controllertypestr:'ATMEGA644A'; controllerunitstr:'ATMEGA644A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA3290'; controllerunitstr:'ATMEGA3290'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA64A'; controllerunitstr:'ATMEGA64A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA169P'; controllerunitstr:'ATMEGA169P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA2560'; controllerunitstr:'ATMEGA2560'; cputype: cpu_avr6; fputype:fpu_soft; flashbase:0; flashsize:262144; srambase:512; sramsize:8192; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATMEGA32'; controllerunitstr:'ATMEGA32'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:96; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY861A'; controllerunitstr:'ATTINY861A'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY28'; controllerunitstr:'ATTINY28'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:0; sramsize:0; eeprombase:0; eepromsize:0)
-        ,(controllertypestr:'ATMEGA48P'; controllerunitstr:'ATMEGA48P'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:256; sramsize:512; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA8535'; controllerunitstr:'ATMEGA8535'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:96; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA168PA'; controllerunitstr:'ATMEGA168PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA16M1'; controllerunitstr:'ATMEGA16M1'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA16HVB'; controllerunitstr:'ATMEGA16HVB'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA164P'; controllerunitstr:'ATMEGA164P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA325A'; controllerunitstr:'ATMEGA325A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA640'; controllerunitstr:'ATMEGA640'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:512; sramsize:8192; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'ATMEGA6450'; controllerunitstr:'ATMEGA6450'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA329P'; controllerunitstr:'ATMEGA329P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATA6286'; controllerunitstr:'ATA6286'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:320)
-        ,(controllertypestr:'AT90USB647'; controllerunitstr:'AT90USB647'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA168'; controllerunitstr:'ATMEGA168'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA6490A'; controllerunitstr:'ATMEGA6490A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA32M1'; controllerunitstr:'ATMEGA32M1'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA64C1'; controllerunitstr:'ATMEGA64C1'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA32U2'; controllerunitstr:'ATMEGA32U2'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:1024; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY4'; controllerunitstr:'ATTINY4'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:512; srambase:64; sramsize:32; eeprombase:0; eepromsize:0)
-        ,(controllertypestr:'ATMEGA644PA'; controllerunitstr:'ATMEGA644PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'AT90PWM1'; controllerunitstr:'AT90PWM1'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY44'; controllerunitstr:'ATTINY44'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:96; sramsize:256; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA325PA'; controllerunitstr:'ATMEGA325PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA6450A'; controllerunitstr:'ATMEGA6450A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATTINY2313A'; controllerunitstr:'ATTINY2313A'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:96; sramsize:128; eeprombase:0; eepromsize:128)
-        ,(controllertypestr:'ATMEGA329PA'; controllerunitstr:'ATMEGA329PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATTINY461A'; controllerunitstr:'ATTINY461A'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:96; sramsize:256; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA6450P'; controllerunitstr:'ATMEGA6450P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA64'; controllerunitstr:'ATMEGA64'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA165PA'; controllerunitstr:'ATMEGA165PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA16A'; controllerunitstr:'ATMEGA16A'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:96; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA649'; controllerunitstr:'ATMEGA649'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA1280'; controllerunitstr:'ATMEGA1280'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:512; sramsize:8192; eeprombase:0; eepromsize:4096)
-        ,(controllertypestr:'AT90PWM2B'; controllerunitstr:'AT90PWM2B'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:256; sramsize:512; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATMEGA649P'; controllerunitstr:'ATMEGA649P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:65536; srambase:256; sramsize:4096; eeprombase:0; eepromsize:2048)
-        ,(controllertypestr:'ATMEGA3250P'; controllerunitstr:'ATMEGA3250P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA48PA'; controllerunitstr:'ATMEGA48PA'; cputype: cpu_avr4; fputype:fpu_soft; flashbase:0; flashsize:4096; srambase:256; sramsize:512; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATTINY1634'; controllerunitstr:'ATTINY1634'; cputype: cpu_avr35; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:256)
-        ,(controllertypestr:'ATMEGA325'; controllerunitstr:'ATMEGA325'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
-        ,(controllertypestr:'ATMEGA169PA'; controllerunitstr:'ATMEGA169PA'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:16384; srambase:256; sramsize:1024; eeprombase:0; eepromsize:512)
-        ,(controllertypestr:'ATTINY261A'; controllerunitstr:'ATTINY261A'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:96; sramsize:128; eeprombase:0; eepromsize:128)
-        ,(controllertypestr:'ATTINY25'; controllerunitstr:'ATTINY25'; cputype: cpu_avr25; fputype:fpu_soft; flashbase:0; flashsize:2048; srambase:96; sramsize:128; eeprombase:0; eepromsize:128)
+        ,(controllertypestr:'AT90CAN32';controllerunitstr:'AT90CAN32';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'AT90CAN64';controllerunitstr:'AT90CAN64';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'AT90CAN128';controllerunitstr:'AT90CAN128';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:256;sramsize:4096;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'AT90PWM1';controllerunitstr:'AT90PWM1';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90PWM2B';controllerunitstr:'AT90PWM2B';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90PWM3B';controllerunitstr:'AT90PWM3B';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90PWM81';controllerunitstr:'AT90PWM81';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:256;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90PWM161';controllerunitstr:'AT90PWM161';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90PWM216';controllerunitstr:'AT90PWM216';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90PWM316';controllerunitstr:'AT90PWM316';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90USB82';controllerunitstr:'AT90USB82';cputype:cpu_avr35;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90USB162';controllerunitstr:'AT90USB162';cputype:cpu_avr35;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'AT90USB646';controllerunitstr:'AT90USB646';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'AT90USB647';controllerunitstr:'AT90USB647';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'AT90USB1286';controllerunitstr:'AT90USB1286';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:256;sramsize:8192;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'AT90USB1287';controllerunitstr:'AT90USB1287';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:256;sramsize:8192;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATA6285';controllerunitstr:'ATA6285';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:320)
+        ,(controllertypestr:'ATA6286';controllerunitstr:'ATA6286';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:320)
+        ,(controllertypestr:'ATMEGA8';controllerunitstr:'ATMEGA8';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA8A';controllerunitstr:'ATMEGA8A';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA8HVA';controllerunitstr:'ATMEGA8HVA';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATMEGA8U2';controllerunitstr:'ATMEGA8U2';cputype:cpu_avr35;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA16';controllerunitstr:'ATMEGA16';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:96;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA16A';controllerunitstr:'ATMEGA16A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:96;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA16HVA';controllerunitstr:'ATMEGA16HVA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:512;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATMEGA16HVB';controllerunitstr:'ATMEGA16HVB';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA16HVBREVB';controllerunitstr:'ATMEGA16HVBREVB';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA16M1';controllerunitstr:'ATMEGA16M1';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA16U2';controllerunitstr:'ATMEGA16U2';cputype:cpu_avr35;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA16U4';controllerunitstr:'ATMEGA16U4';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1280;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA32';controllerunitstr:'ATMEGA32';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:96;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA32A';controllerunitstr:'ATMEGA32A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:96;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA32C1';controllerunitstr:'ATMEGA32C1';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA32HVB';controllerunitstr:'ATMEGA32HVB';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA32HVBREVB';controllerunitstr:'ATMEGA32HVBREVB';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA32M1';controllerunitstr:'ATMEGA32M1';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA32U2';controllerunitstr:'ATMEGA32U2';cputype:cpu_avr35;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:1024;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA32U4';controllerunitstr:'ATMEGA32U4';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2560;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA48';controllerunitstr:'ATMEGA48';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:256;sramsize:512;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATMEGA48A';controllerunitstr:'ATMEGA48A';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:256;sramsize:512;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATMEGA48P';controllerunitstr:'ATMEGA48P';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:256;sramsize:512;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATMEGA48PA';controllerunitstr:'ATMEGA48PA';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:256;sramsize:512;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATMEGA48PB';controllerunitstr:'ATMEGA48PB';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:256;sramsize:512;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATMEGA64';controllerunitstr:'ATMEGA64';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA64A';controllerunitstr:'ATMEGA64A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA64C1';controllerunitstr:'ATMEGA64C1';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA64HVE2';controllerunitstr:'ATMEGA64HVE2';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA64M1';controllerunitstr:'ATMEGA64M1';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA64RFR2';controllerunitstr:'ATMEGA64RFR2';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:512;sramsize:8192;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA88';controllerunitstr:'ATMEGA88';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA88A';controllerunitstr:'ATMEGA88A';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA88P';controllerunitstr:'ATMEGA88P';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA88PA';controllerunitstr:'ATMEGA88PA';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA88PB';controllerunitstr:'ATMEGA88PB';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA128';controllerunitstr:'ATMEGA128';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:256;sramsize:4096;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA128A';controllerunitstr:'ATMEGA128A';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:256;sramsize:4096;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA128RFA1';controllerunitstr:'ATMEGA128RFA1';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:512;sramsize:16384;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA128RFR2';controllerunitstr:'ATMEGA128RFR2';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:512;sramsize:16384;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA162';controllerunitstr:'ATMEGA162';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA164A';controllerunitstr:'ATMEGA164A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA164P';controllerunitstr:'ATMEGA164P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA164PA';controllerunitstr:'ATMEGA164PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA165A';controllerunitstr:'ATMEGA165A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA165P';controllerunitstr:'ATMEGA165P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA165PA';controllerunitstr:'ATMEGA165PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA168';controllerunitstr:'ATMEGA168';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA168A';controllerunitstr:'ATMEGA168A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA168P';controllerunitstr:'ATMEGA168P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA168PA';controllerunitstr:'ATMEGA168PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA168PB';controllerunitstr:'ATMEGA168PB';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA169A';controllerunitstr:'ATMEGA169A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA169P';controllerunitstr:'ATMEGA169P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA169PA';controllerunitstr:'ATMEGA169PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA256RFR2';controllerunitstr:'ATMEGA256RFR2';cputype:cpu_avr6;fputype:fpu_soft;flashbase:0;flashsize:262144;srambase:512;sramsize:32768;eeprombase:0;eepromsize:8192)
+        ,(controllertypestr:'ATMEGA324A';controllerunitstr:'ATMEGA324A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA324P';controllerunitstr:'ATMEGA324P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA324PA';controllerunitstr:'ATMEGA324PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA324PB';controllerunitstr:'ATMEGA324PB';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA325';controllerunitstr:'ATMEGA325';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA325A';controllerunitstr:'ATMEGA325A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA325P';controllerunitstr:'ATMEGA325P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA325PA';controllerunitstr:'ATMEGA325PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA328';controllerunitstr:'ATMEGA328';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA328P';controllerunitstr:'ATMEGA328P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA328PB';controllerunitstr:'ATMEGA328PB';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA329';controllerunitstr:'ATMEGA329';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA329A';controllerunitstr:'ATMEGA329A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA329P';controllerunitstr:'ATMEGA329P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA329PA';controllerunitstr:'ATMEGA329PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA406';controllerunitstr:'ATMEGA406';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:40960;srambase:256;sramsize:2048;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA640';controllerunitstr:'ATMEGA640';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:512;sramsize:8192;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA644';controllerunitstr:'ATMEGA644';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA644A';controllerunitstr:'ATMEGA644A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA644P';controllerunitstr:'ATMEGA644P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA644PA';controllerunitstr:'ATMEGA644PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA644RFR2';controllerunitstr:'ATMEGA644RFR2';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:512;sramsize:8192;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA645';controllerunitstr:'ATMEGA645';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA645A';controllerunitstr:'ATMEGA645A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA645P';controllerunitstr:'ATMEGA645P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA649';controllerunitstr:'ATMEGA649';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA649A';controllerunitstr:'ATMEGA649A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA649P';controllerunitstr:'ATMEGA649P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA808';controllerunitstr:'ATMEGA808';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:15360;sramsize:1024;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATMEGA809';controllerunitstr:'ATMEGA809';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:15360;sramsize:1024;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATMEGA1280';controllerunitstr:'ATMEGA1280';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:512;sramsize:8192;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA1281';controllerunitstr:'ATMEGA1281';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:512;sramsize:8192;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA1284';controllerunitstr:'ATMEGA1284';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:256;sramsize:16384;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA1284P';controllerunitstr:'ATMEGA1284P';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:256;sramsize:16384;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA1284RFR2';controllerunitstr:'ATMEGA1284RFR2';cputype:cpu_avr51;fputype:fpu_soft;flashbase:0;flashsize:131072;srambase:512;sramsize:16384;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA1608';controllerunitstr:'ATMEGA1608';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATMEGA1609';controllerunitstr:'ATMEGA1609';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATMEGA2560';controllerunitstr:'ATMEGA2560';cputype:cpu_avr6;fputype:fpu_soft;flashbase:0;flashsize:262144;srambase:512;sramsize:8192;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA2561';controllerunitstr:'ATMEGA2561';cputype:cpu_avr6;fputype:fpu_soft;flashbase:0;flashsize:262144;srambase:512;sramsize:8192;eeprombase:0;eepromsize:4096)
+        ,(controllertypestr:'ATMEGA2564RFR2';controllerunitstr:'ATMEGA2564RFR2';cputype:cpu_avr6;fputype:fpu_soft;flashbase:0;flashsize:262144;srambase:512;sramsize:32768;eeprombase:0;eepromsize:8192)
+        ,(controllertypestr:'ATMEGA3208';controllerunitstr:'ATMEGA3208';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:12288;sramsize:4096;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATMEGA3209';controllerunitstr:'ATMEGA3209';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:12288;sramsize:4096;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATMEGA3250';controllerunitstr:'ATMEGA3250';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA3250A';controllerunitstr:'ATMEGA3250A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA3250P';controllerunitstr:'ATMEGA3250P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA3250PA';controllerunitstr:'ATMEGA3250PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA3290';controllerunitstr:'ATMEGA3290';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA3290A';controllerunitstr:'ATMEGA3290A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA3290P';controllerunitstr:'ATMEGA3290P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA3290PA';controllerunitstr:'ATMEGA3290PA';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:256;sramsize:2048;eeprombase:0;eepromsize:1024)
+        ,(controllertypestr:'ATMEGA4808';controllerunitstr:'ATMEGA4808';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:49152;srambase:10240;sramsize:6144;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATMEGA4809';controllerunitstr:'ATMEGA4809';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:49152;srambase:10240;sramsize:6144;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATMEGA6450';controllerunitstr:'ATMEGA6450';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA6450A';controllerunitstr:'ATMEGA6450A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA6450P';controllerunitstr:'ATMEGA6450P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA6490';controllerunitstr:'ATMEGA6490';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA6490A';controllerunitstr:'ATMEGA6490A';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA6490P';controllerunitstr:'ATMEGA6490P';cputype:cpu_avr5;fputype:fpu_soft;flashbase:0;flashsize:65536;srambase:256;sramsize:4096;eeprombase:0;eepromsize:2048)
+        ,(controllertypestr:'ATMEGA8515';controllerunitstr:'ATMEGA8515';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATMEGA8535';controllerunitstr:'ATMEGA8535';cputype:cpu_avr4;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY4';controllerunitstr:'ATTINY4';cputype:cpu_avrtiny;fputype:fpu_soft;flashbase:0;flashsize:512;srambase:64;sramsize:32;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY5';controllerunitstr:'ATTINY5';cputype:cpu_avrtiny;fputype:fpu_soft;flashbase:0;flashsize:512;srambase:64;sramsize:32;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY9';controllerunitstr:'ATTINY9';cputype:cpu_avrtiny;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:64;sramsize:32;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY10';controllerunitstr:'ATTINY10';cputype:cpu_avrtiny;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:64;sramsize:32;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY11';controllerunitstr:'ATTINY11';cputype:cpu_avr1;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:0;sramsize:0;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY12';controllerunitstr:'ATTINY12';cputype:cpu_avr1;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:0;sramsize:0;eeprombase:0;eepromsize:64)
+        ,(controllertypestr:'ATTINY13';controllerunitstr:'ATTINY13';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:96;sramsize:64;eeprombase:0;eepromsize:64)
+        ,(controllertypestr:'ATTINY13A';controllerunitstr:'ATTINY13A';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:96;sramsize:64;eeprombase:0;eepromsize:64)
+        ,(controllertypestr:'ATTINY15';controllerunitstr:'ATTINY15';cputype:cpu_avr1;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:0;sramsize:0;eeprombase:0;eepromsize:64)
+        ,(controllertypestr:'ATTINY20';controllerunitstr:'ATTINY20';cputype:cpu_avrtiny;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:64;sramsize:128;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY24';controllerunitstr:'ATTINY24';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:96;sramsize:128;eeprombase:0;eepromsize:128)
+        ,(controllertypestr:'ATTINY24A';controllerunitstr:'ATTINY24A';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:96;sramsize:128;eeprombase:0;eepromsize:128)
+        ,(controllertypestr:'ATTINY25';controllerunitstr:'ATTINY25';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:96;sramsize:128;eeprombase:0;eepromsize:128)
+        ,(controllertypestr:'ATTINY26';controllerunitstr:'ATTINY26';cputype:cpu_avr2;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:96;sramsize:128;eeprombase:0;eepromsize:128)
+        ,(controllertypestr:'ATTINY28';controllerunitstr:'ATTINY28';cputype:cpu_avr1;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:0;sramsize:0;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY40';controllerunitstr:'ATTINY40';cputype:cpu_avrtiny;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:64;sramsize:256;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY43U';controllerunitstr:'ATTINY43U';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:96;sramsize:256;eeprombase:0;eepromsize:64)
+        ,(controllertypestr:'ATTINY44';controllerunitstr:'ATTINY44';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:96;sramsize:256;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATTINY44A';controllerunitstr:'ATTINY44A';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:96;sramsize:256;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATTINY45';controllerunitstr:'ATTINY45';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:96;sramsize:256;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATTINY48';controllerunitstr:'ATTINY48';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:256;sramsize:256;eeprombase:0;eepromsize:64)
+        ,(controllertypestr:'ATTINY84';controllerunitstr:'ATTINY84';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY84A';controllerunitstr:'ATTINY84A';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY85';controllerunitstr:'ATTINY85';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY87';controllerunitstr:'ATTINY87';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY88';controllerunitstr:'ATTINY88';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:64)
+        ,(controllertypestr:'ATTINY102';controllerunitstr:'ATTINY102';cputype:cpu_avrtiny;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:64;sramsize:32;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY104';controllerunitstr:'ATTINY104';cputype:cpu_avrtiny;fputype:fpu_soft;flashbase:0;flashsize:1024;srambase:64;sramsize:32;eeprombase:0;eepromsize:0)
+        ,(controllertypestr:'ATTINY167';controllerunitstr:'ATTINY167';cputype:cpu_avr35;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY202';controllerunitstr:'ATTINY202';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:16256;sramsize:128;eeprombase:5120;eepromsize:64)
+        ,(controllertypestr:'ATTINY204';controllerunitstr:'ATTINY204';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:16256;sramsize:128;eeprombase:5120;eepromsize:64)
+        ,(controllertypestr:'ATTINY212';controllerunitstr:'ATTINY212';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:16256;sramsize:128;eeprombase:5120;eepromsize:64)
+        ,(controllertypestr:'ATTINY214';controllerunitstr:'ATTINY214';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:16256;sramsize:128;eeprombase:5120;eepromsize:64)
+        ,(controllertypestr:'ATTINY261';controllerunitstr:'ATTINY261';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:96;sramsize:128;eeprombase:0;eepromsize:128)
+        ,(controllertypestr:'ATTINY261A';controllerunitstr:'ATTINY261A';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:96;sramsize:128;eeprombase:0;eepromsize:128)
+        ,(controllertypestr:'ATTINY402';controllerunitstr:'ATTINY402';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:16128;sramsize:256;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY404';controllerunitstr:'ATTINY404';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:16128;sramsize:256;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY406';controllerunitstr:'ATTINY406';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:16128;sramsize:256;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY412';controllerunitstr:'ATTINY412';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:16128;sramsize:256;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY414';controllerunitstr:'ATTINY414';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:16128;sramsize:256;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY416';controllerunitstr:'ATTINY416';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:16128;sramsize:256;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY416AUTO';controllerunitstr:'ATTINY416AUTO';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:16128;sramsize:256;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY417';controllerunitstr:'ATTINY417';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:16128;sramsize:256;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY441';controllerunitstr:'ATTINY441';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:256;sramsize:256;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATTINY461';controllerunitstr:'ATTINY461';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:96;sramsize:256;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATTINY461A';controllerunitstr:'ATTINY461A';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:96;sramsize:256;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATTINY804';controllerunitstr:'ATTINY804';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:15872;sramsize:512;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY806';controllerunitstr:'ATTINY806';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:15872;sramsize:512;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY807';controllerunitstr:'ATTINY807';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:15872;sramsize:512;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY814';controllerunitstr:'ATTINY814';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:15872;sramsize:512;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY816';controllerunitstr:'ATTINY816';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:15872;sramsize:512;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY817';controllerunitstr:'ATTINY817';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:15872;sramsize:512;eeprombase:5120;eepromsize:128)
+        ,(controllertypestr:'ATTINY828';controllerunitstr:'ATTINY828';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATTINY841';controllerunitstr:'ATTINY841';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:256;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY861';controllerunitstr:'ATTINY861';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY861A';controllerunitstr:'ATTINY861A';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:8192;srambase:96;sramsize:512;eeprombase:0;eepromsize:512)
+        ,(controllertypestr:'ATTINY1604';controllerunitstr:'ATTINY1604';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:15360;sramsize:1024;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1606';controllerunitstr:'ATTINY1606';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:15360;sramsize:1024;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1607';controllerunitstr:'ATTINY1607';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:15360;sramsize:1024;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1614';controllerunitstr:'ATTINY1614';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1616';controllerunitstr:'ATTINY1616';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1617';controllerunitstr:'ATTINY1617';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1624';controllerunitstr:'ATTINY1624';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1626';controllerunitstr:'ATTINY1626';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1627';controllerunitstr:'ATTINY1627';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY1634';controllerunitstr:'ATTINY1634';cputype:cpu_avr35;fputype:fpu_soft;flashbase:0;flashsize:16384;srambase:256;sramsize:1024;eeprombase:0;eepromsize:256)
+        ,(controllertypestr:'ATTINY2313';controllerunitstr:'ATTINY2313';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:96;sramsize:128;eeprombase:0;eepromsize:128)
+        ,(controllertypestr:'ATTINY2313A';controllerunitstr:'ATTINY2313A';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:2048;srambase:96;sramsize:128;eeprombase:0;eepromsize:128)
+        ,(controllertypestr:'ATTINY3214';controllerunitstr:'ATTINY3214';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY3216';controllerunitstr:'ATTINY3216';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY3217';controllerunitstr:'ATTINY3217';cputype:cpu_avrxmega3;fputype:fpu_soft;flashbase:0;flashsize:32768;srambase:14336;sramsize:2048;eeprombase:5120;eepromsize:256)
+        ,(controllertypestr:'ATTINY4313';controllerunitstr:'ATTINY4313';cputype:cpu_avr25;fputype:fpu_soft;flashbase:0;flashsize:4096;srambase:96;sramsize:256;eeprombase:0;eepromsize:256)
+        // Controller board aliases
+        ,(controllertypestr:'ARDUINOLEONARDO'; controllerunitstr:'ATMEGA32U4'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2560; eeprombase:0; eepromsize:1024)
+        ,(controllertypestr:'ARDUINOMEGA'; controllerunitstr:'ATMEGA2560'; cputype: cpu_avr6; fputype:fpu_soft; flashbase:0; flashsize:262144; srambase:512; sramsize:8192; eeprombase:0; eepromsize:4096)
+        ,(controllertypestr:'ARDUINOMICRO'; controllerunitstr:'ATMEGA32U4'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2560; eeprombase:0; eepromsize:1024)
+        ,(controllertypestr:'ARDUINONANO'; controllerunitstr:'ATMEGA328P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
+        ,(controllertypestr:'ARDUINONANOEVERY'; controllerunitstr:'ATMEGA4809'; cputype: cpu_avrxmega3; fputype:fpu_soft; flashbase:0; flashsize:49152; srambase:10240; sramsize:6144; eeprombase:5120; eepromsize:256)
+        ,(controllertypestr:'ARDUINOUNO'; controllerunitstr:'ATMEGA328P'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
+        ,(controllertypestr:'ATMEGA256RFR2XPRO';controllerunitstr:'ATMEGA256RFR2';cputype:cpu_avr6;fputype:fpu_soft;flashbase:0;flashsize:262144;srambase:512;sramsize:32768;eeprombase:0;eepromsize:8192)
+        ,(controllertypestr:'ATMEGA324PBXPRO'; controllerunitstr:'ATMEGA324PB'; cputype: cpu_avr5; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:256; sramsize:2048; eeprombase:0; eepromsize:1024)
+        ,(controllertypestr:'ATMEGA1284PXPLAINED'; controllerunitstr:'ATMEGA1284P'; cputype: cpu_avr51; fputype:fpu_soft; flashbase:0; flashsize:131072; srambase:256; sramsize:16384; eeprombase:0; eepromsize:4096)
+        ,(controllertypestr:'ATMEGA4809XPRO'; controllerunitstr:'ATMEGA4809'; cputype: cpu_avrxmega3; fputype:fpu_soft; flashbase:0; flashsize:49152; srambase:10240; sramsize:6144; eeprombase:5120; eepromsize:256)
+        ,(controllertypestr:'ATTINY817XPRO'; controllerunitstr:'ATTINY817'; cputype: cpu_avrxmega3; fputype:fpu_soft; flashbase:0; flashsize:8192; srambase:15872; sramsize:512; eeprombase:5120; eepromsize:128)
+        ,(controllertypestr:'ATTINY3217XPRO'; controllerunitstr:'ATTINY3217'; cputype: cpu_avrxmega3; fputype:fpu_soft; flashbase:0; flashsize:32768; srambase:14336; sramsize:2048; eeprombase:5120; eepromsize:256)
    );
 
    { Supported optimizations, only used for information }
@@ -434,12 +588,12 @@ Const
                                  genericlevel3optimizerswitches-
                                  { no need to write info about those }
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
-                                 [cs_opt_regvar,cs_opt_loopunroll,cs_opt_tailrecursion,
+                                 [{$ifndef llvm}cs_opt_regvar,{$endif}cs_opt_loopunroll,cs_opt_tailrecursion,
                                   cs_opt_stackframe,cs_opt_nodecse,cs_opt_reorder_fields,cs_opt_fastmath];
 
    level1optimizerswitches = genericlevel1optimizerswitches;
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches +
-     [cs_opt_regvar,cs_opt_stackframe,cs_opt_tailrecursion];
+     [{$ifndef llvm}cs_opt_regvar,{$endif}cs_opt_stackframe,cs_opt_tailrecursion];
    level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [{,cs_opt_loopunroll}];
    level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [];
 
@@ -453,22 +607,26 @@ Const
        CPUAVR_HAS_ELPM,
        CPUAVR_HAS_ELPMX,
        CPUAVR_2_BYTE_PC,
-       CPUAVR_3_BYTE_PC
+       CPUAVR_3_BYTE_PC,
+       CPUAVR_16_REGS,
+       CPUAVR_NOMEMMAPPED_REGS
       );
 
  const
    cpu_capabilities : array[tcputype] of set of tcpuflags =
-     ( { cpu_none  } [],
-       { cpu_avr1  } [CPUAVR_2_BYTE_PC],
-       { cpu_avr2  } [CPUAVR_2_BYTE_PC],
-       { cpu_avr25 } [CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_2_BYTE_PC],
-       { cpu_avr3  } [CPUAVR_HAS_JMP_CALL,CPUAVR_2_BYTE_PC],
-       { cpu_avr31 } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_RAMPZ,CPUAVR_HAS_ELPM,CPUAVR_2_BYTE_PC],
-       { cpu_avr35 } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_2_BYTE_PC],
-       { cpu_avr4  } [CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_2_BYTE_PC],
-       { cpu_avr5  } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_2_BYTE_PC],
-       { cpu_avr51 } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_HAS_RAMPZ,CPUAVR_HAS_ELPM,CPUAVR_HAS_ELPMX,CPUAVR_2_BYTE_PC],
-       { cpu_avr6  } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_HAS_RAMPZ,CPUAVR_HAS_ELPM,CPUAVR_HAS_ELPMX,CPUAVR_3_BYTE_PC]
+     ( { cpu_none      } [],
+       { cpu_avrtiny   } [CPUAVR_16_REGS,CPUAVR_2_BYTE_PC,CPUAVR_NOMEMMAPPED_REGS],
+       { cpu_avr1      } [CPUAVR_2_BYTE_PC],
+       { cpu_avr2      } [CPUAVR_HAS_LPMX,CPUAVR_2_BYTE_PC],
+       { cpu_avr25     } [CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_2_BYTE_PC],
+       { cpu_avr3      } [CPUAVR_HAS_JMP_CALL,CPUAVR_2_BYTE_PC],
+       { cpu_avr31     } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_RAMPZ,CPUAVR_HAS_ELPM,CPUAVR_2_BYTE_PC],
+       { cpu_avr35     } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_2_BYTE_PC],
+       { cpu_avr4      } [CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_2_BYTE_PC],
+       { cpu_avr5      } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_2_BYTE_PC],
+       { cpu_avr51     } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_HAS_RAMPZ,CPUAVR_HAS_ELPM,CPUAVR_HAS_ELPMX,CPUAVR_2_BYTE_PC],
+       { cpu_avr6      } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_HAS_RAMPZ,CPUAVR_HAS_ELPM,CPUAVR_HAS_ELPMX,CPUAVR_3_BYTE_PC],
+       { cpu_avrxmega3 } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_2_BYTE_PC,CPUAVR_NOMEMMAPPED_REGS]
      );
 
 Implementation

+ 23 - 21
compiler/avr/cpupara.pas

@@ -57,7 +57,10 @@ unit cpupara;
 
     function tcpuparamanager.get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;
       begin
-        result:=VOLATILE_INTREGISTERS;
+        if CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype] then
+          result:=VOLATILE_INTREGISTERS-[RS_R0,RS_R1,RS_R18,RS_R19]
+        else
+          result:=VOLATILE_INTREGISTERS;
       end;
 
 
@@ -167,7 +170,7 @@ unit cpupara;
             result:=not(def.size in [1,2,4]);
           }
           else
-            if (def.size > 8) then
+            if (def.size > 8) or ((CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype]) and (def.size > 4)) then
               result:=true
             else
               result:=inherited ret_in_param(def,pd);
@@ -198,13 +201,13 @@ unit cpupara;
         paracgsize   : tcgsize;
         paralen : longint;
         i : integer;
-        firstparaloc: boolean;
 
       procedure assignintreg;
         begin
           { In case of po_delphi_nested_cc, the parent frame pointer
             is always passed on the stack. }
-           if (nextintreg>RS_R9) and
+           if (((nextintreg>RS_R9) and not(CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype])) or
+               (nextintreg>RS_R21)) and
               (not(vo_is_parentfp in hp.varoptions) or
                not(po_delphi_nested_cc in p.procoptions)) then
              begin
@@ -296,14 +299,14 @@ unit cpupara;
              if paralen=0 then
                internalerror(200410311);
 {$endif EXTDEBUG}
-             firstparaloc:=true;
              if loc=LOC_REGISTER then
                begin
                  { the lsb is located in the register with the lowest number,
                    by adding paralen mod 2, make the size even
                  }
                  nextintreg:=curintreg-(paralen+(paralen mod 2))+1;
-                 if nextintreg>=RS_R8 then
+                 if ((nextintreg>=RS_R8) and not(CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype])) or
+                   (nextintreg>=RS_R20) then
                    curintreg:=nextintreg-1
                  else
                    begin
@@ -338,7 +341,8 @@ unit cpupara;
                  case loc of
                     LOC_REGISTER:
                       begin
-                        if nextintreg>=RS_R8 then
+                        if ((nextintreg>=RS_R8) and not(CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype])) or
+                          (nextintreg>=RS_R20) then
                           begin
                             paraloc^.loc:=LOC_REGISTER;
                             paraloc^.size:=OS_8;
@@ -355,19 +359,19 @@ unit cpupara;
                       begin
                         if push_addr_param(hp.varspez,paradef,p.proccalloption) then
                           begin
-                            paraloc^.size:=OS_ADDR;
-                            paraloc^.def:=cpointerdef.getreusable_no_free(paradef);
-                            assignintreg
-                          end
+                           paraloc^.size:=OS_ADDR;
+                           paraloc^.def:=cpointerdef.getreusable_no_free(paradef);
+                           assignintreg;
+                         end
                         else
                           begin
-                             paraloc^.def:=hp.vardef;
-                             paraloc^.loc:=LOC_REFERENCE;
-                             paraloc^.reference.index:=NR_STACK_POINTER_REG;
-                             paraloc^.reference.offset:=stack_offset;
-                             inc(stack_offset,hp.vardef.size);
-                          end;
-                        dec(paralen,hp.vardef.size);
+                            paraloc^.def:=hp.vardef;
+                            paraloc^.loc:=LOC_REFERENCE;
+                            paraloc^.reference.index:=NR_STACK_POINTER_REG;
+                            paraloc^.reference.offset:=stack_offset;
+                            inc(stack_offset,paralen);
+                         end;
+                        paralen:=0;
                       end;
                     else
                       internalerror(2002071002);
@@ -380,7 +384,6 @@ unit cpupara;
                          inc(paraloc^.reference.offset,2);
                        end;
                    end;
-                 firstparaloc:=false;
                end;
           end;
         curfloatreg:=nextfloatreg;
@@ -394,7 +397,6 @@ unit cpupara;
       var
         cur_stack_offset: aword;
         curintreg, curfloatreg, curmmreg: tsuperregister;
-        retcgsize  : tcgsize;
       begin
         init_values(curintreg,curfloatreg,curmmreg,cur_stack_offset);
 
@@ -549,7 +551,7 @@ unit cpupara;
               end;
           end
         else
-          internalerror(200410231);
+          internalerror(2004102305);
       end;
 
 begin

+ 3 - 2
compiler/avr/cpupi.pas

@@ -44,7 +44,7 @@ unit cpupi;
   implementation
 
     uses
-       globals,systems,
+       globals,systems,verbose,
        cpubase,
        aasmtai,aasmdata,
        tgobj,
@@ -80,7 +80,8 @@ unit cpupi;
       begin
         { because of the limited branch distance of cond. branches, they must be replaced
           sometimes by normal jmps and an inverse branch }
-        finalizeavrcode(aktproccode);
+        if not(finalizeavrcode(aktproccode)) then
+          message1(cg_w_cannot_compile_subroutine,procdef.fullprocname(false));
       end;
 
 begin

+ 4 - 22
compiler/avr/itcpugas.pas

@@ -53,39 +53,21 @@ interface
 implementation
 
     uses
-      cutils,verbose;
+      cutils,verbose,rgbase;
 
     const
-      gas_regname_table : array[tregisterindex] of string[7] = (
+      gas_regname_table : TRegNameTable = (
         {$i ravrstd.inc}
       );
 
-      gas_regname_index : array[tregisterindex] of tregisterindex = (
+      gas_regname_index : TRegisterIndexTable = (
         {$i ravrsri.inc}
       );
 
-    function findreg_by_gasname(const s:string):tregisterindex;
-      var
-        i,p : tregisterindex;
-      begin
-        {Binary search.}
-        p:=0;
-        i:=regnumber_count_bsstart;
-        repeat
-          if (p+i<=high(tregisterindex)) and (gas_regname_table[gas_regname_index[p+i]]<=s) then
-            p:=p+i;
-          i:=i shr 1;
-        until i=0;
-        if gas_regname_table[gas_regname_index[p]]=s then
-          findreg_by_gasname:=gas_regname_index[p]
-        else
-          findreg_by_gasname:=0;
-      end;
-
 
     function gas_regnum_search(const s:string):Tregister;
       begin
-        result:=regnumber_table[findreg_by_gasname(s)];
+        result:=regnumber_table[findreg_by_name_table(s,gas_regname_table,gas_regname_index)];
       end;
 
 

+ 13 - 7
compiler/avr/navradd.pas

@@ -33,11 +33,12 @@ interface
        private
          function  GetResFlags(unsigned:Boolean):TResFlags;
        protected
-         function pass_1 : tnode;override;
          procedure second_cmpordinal;override;
          procedure second_cmpsmallset;override;
          procedure second_cmp64bit;override;
          procedure second_cmp;
+       public
+         function pass_1 : tnode;override;
        end;
 
   implementation
@@ -217,7 +218,7 @@ interface
                   tmpreg1:=cg.GetNextReg(tmpreg1);
               end;
 
-            current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,tmpreg1,NR_R1));
+            current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,tmpreg1,GetDefaultZeroReg));
 
             location_reset(location,LOC_FLAGS,OS_NO);
             location.resflags:=getresflags(unsigned);
@@ -229,13 +230,18 @@ interface
           begin
             { decrease register pressure on registers >= r16 }
             if (right.location.value and $ff)=0 then
-              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,left.location.register,NR_R1))
+              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,left.location.register,GetDefaultZeroReg))
             else
-              current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CPI,left.location.register,right.location.value and $ff))
+              begin
+                cg.getcpuregister(current_asmdata.CurrAsmList,NR_R26);
+                current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_LDI,NR_R26,right.location.value and $ff));
+                current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,left.location.register,NR_R26));
+                cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_R26);
+              end;
           end
         { on the left side, we allow only a constant if it is 0 }
         else if (left.location.loc=LOC_CONSTANT) and (left.location.value=0) then
-          current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,NR_R1,right.location.register))
+          current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,GetDefaultZeroReg,right.location.register))
         else
           current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,left.location.register,right.location.register));
 
@@ -262,7 +268,7 @@ interface
               begin
                 { just use R1? }
                 if ((right.location.value64 shr ((i-1)*8)) and $ff)=0 then
-                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CPC,tmpreg1,NR_R1))
+                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CPC,tmpreg1,GetDefaultZeroReg))
                 else
                   begin
                     tmpreg2:=cg.getintregister(current_asmdata.CurrAsmList,OS_8);
@@ -272,7 +278,7 @@ interface
               end
             { above it is checked, if left=0, then a constant is allowed }
             else if (left.location.loc=LOC_CONSTANT) and (left.location.value=0) then
-              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CPC,NR_R1,tmpreg2))
+              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CPC,GetDefaultZeroReg,tmpreg2))
             else
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CPC,tmpreg1,tmpreg2));
           end;

+ 50 - 9
compiler/avr/navrmat.pas

@@ -34,7 +34,9 @@ interface
       end;
 
       tavrshlshrnode = class(tcgshlshrnode)
+        function pass_1: tnode;override;
         procedure second_integer;override;
+        procedure second_64bit;override;
       end;
 
 implementation
@@ -46,7 +48,7 @@ implementation
       aasmbase,aasmcpu,aasmtai,aasmdata,
       defutil,
       cgbase,cgobj,hlcgobj,cgutils,
-      pass_2,procinfo,
+      pass_1,pass_2,procinfo,
       ncon,
       cpubase,
       ncgutil,cgcpu;
@@ -57,13 +59,13 @@ implementation
 
     procedure tavrnotnode.second_boolean;
       var
-        tmpreg,lreg : tregister;
+        tmpreg : tregister;
         i : longint;
         falselabel,truelabel,skiplabel: TAsmLabel;
       begin
+        secondpass(left);
         if not handle_locjump then
           begin
-            secondpass(left);
             { short code? }
             if (left.location.loc in [LOC_SUBSETREG,LOC_CSUBSETREG]) and
               (left.location.sreg.bitlen=1) then
@@ -108,7 +110,7 @@ implementation
                  LOC_REGISTER,LOC_CREGISTER,LOC_REFERENCE,LOC_CREFERENCE :
                    begin
                      hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
-                     current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CPI,left.location.register,0));
+                     current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CP,GetDefaultZeroReg,left.location.register));
 
                      tmpreg:=left.location.register;
                      for i:=2 to tcgsize2size[left.location.size] do
@@ -117,7 +119,7 @@ implementation
                            tmpreg:=left.location.registerhi
                          else
                            tmpreg:=cg.GetNextReg(tmpreg);
-                         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CPC,NR_R1,tmpreg));
+                         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CPC,GetDefaultZeroReg,tmpreg));
                        end;
                      location_reset(location,LOC_FLAGS,OS_NO);
                      location.resflags:=F_EQ;
@@ -129,6 +131,28 @@ implementation
       end;
 
 
+{*****************************************************************************
+                             TAVRSHLSHRNODE
+*****************************************************************************}
+
+    function tavrshlshrnode.pass_1 : tnode;
+      begin
+        { the avr code generator can handle 64 bit shifts by constants directly }
+        if is_constintnode(right) and is_64bit(resultdef) then
+          begin
+            result:=nil;
+            firstpass(left);
+            firstpass(right);
+            if codegenerror then
+              exit;
+
+            expectloc:=LOC_REGISTER;
+          end
+        else
+          Result:=inherited pass_1;
+      end;
+
+
     procedure tavrshlshrnode.second_integer;
       var
          op : topcg;
@@ -142,7 +166,7 @@ implementation
           shln: op:=OP_SHL;
           shrn: op:=OP_SHR;
           else
-            internalerror(2013120102);
+            internalerror(2013120109);
         end;
         opsize:=left.location.size;
         opdef:=left.resultdef;
@@ -152,7 +176,13 @@ implementation
           (left.location.size<>opsize) then
           hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,true);
         location_reset(location,LOC_REGISTER,opsize);
-        location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
+        if is_64bit(resultdef) then
+          begin
+            location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_32);
+            location.registerhi:=cg.getintregister(current_asmdata.CurrAsmList,OS_32);
+          end
+        else
+          location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
 
         { shifting by a constant directly coded: }
         if (right.nodetype=ordconstn) then
@@ -164,8 +194,12 @@ implementation
                shiftval:=tordconstnode(right).value.uvalue and 31
              else
                shiftval:=tordconstnode(right).value.uvalue and 63;
-             hlcg.a_op_const_reg_reg(current_asmdata.CurrAsmList,op,opdef,
-               shiftval,left.location.register,location.register);
+             if is_64bit(resultdef) then
+               cg64.a_op64_const_reg_reg(current_asmdata.CurrAsmList,op,location.size,
+                 shiftval,left.location.register64,location.register64)
+             else
+               hlcg.a_op_const_reg_reg(current_asmdata.CurrAsmList,op,opdef,
+                 shiftval,left.location.register,location.register);
           end
         else
           begin
@@ -186,6 +220,13 @@ implementation
           end;
       end;
 
+
+    procedure tavrshlshrnode.second_64bit;
+      begin
+        second_integer;
+        // inherited second_64bit;
+      end;
+
 begin
   cnotnode:=tavrnotnode;
   cshlshrnode:=tavrshlshrnode;

+ 5 - 9
compiler/avr/raavrgas.pas

@@ -326,10 +326,8 @@ Unit raavrgas;
 
       var
         tempreg : tregister;
-        ireg : tsuperregister;
         hl : tasmlabel;
         ofs : longint;
-        registerset : tcpuregisterset;
         tempstr : string;
         tempsymtyp : tasmsymtype;
       Begin
@@ -415,7 +413,7 @@ Unit raavrgas;
                     OPR_REFERENCE :
                       inc(oper.opr.ref.offset,l);
                     else
-                      internalerror(200309202);
+                      internalerror(2003092012);
                   end;
                   Consume(AS_RPAREN);
                 end
@@ -499,7 +497,7 @@ Unit raavrgas;
                        OPR_REFERENCE :
                          inc(oper.opr.ref.offset,l);
                        else
-                         internalerror(200309202);
+                         internalerror(2003092013);
                      end;
                    end
                end;
@@ -612,18 +610,16 @@ Unit raavrgas;
 
 
     function tavrattreader.is_asmopcode(const s: string):boolean;
-
+(*
       const
         { sorted by length so longer postfixes will match first }
         postfix2strsorted : array[1..19] of string[2] = (
           'EP','SB','BT','SH',
           'IA','IB','DA','DB','FD','FA','ED','EA',
           'B','D','E','P','T','H','S');
-
+*)
       var
-        len,
-        j,
-        sufidx : longint;
+        j : longint;
         hs : string;
         maxlen : longint;
         icond : tasmcond;

+ 9 - 11
compiler/avr/rgcpu.pas

@@ -49,13 +49,15 @@ unit rgcpu;
 
     uses
       verbose, cutils,
+      globals,
       cgobj,
-      procinfo;
+      procinfo,
+      cpuinfo;
 
 
     procedure trgcpu.add_constraints(reg:tregister);
-      var
-        supreg,i : Tsuperregister;
+      {var
+        supreg,i : Tsuperregister;}
       begin
         case getsubreg(reg) of
           { Let 64bit floats conflict with all odd float regs }
@@ -74,8 +76,8 @@ unit rgcpu;
           { Let 64bit ints conflict with all odd int regs }
           R_SUBQ:
             begin
-              supreg:=getsupreg(reg);
               {
+              supreg:=getsupreg(reg);
               i:=RS_G1;
               while (i<=RS_I7) do
                 begin
@@ -93,9 +95,8 @@ unit rgcpu;
         helpins  : tai;
         tmpref   : treference;
         helplist : TAsmList;
-        hreg     : tregister;
       begin
-        if abs(spilltemp.offset)>63 then
+        if (abs(spilltemp.offset)>63) or (CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype]) then
           begin
             helplist:=TAsmList.create;
 
@@ -119,9 +120,8 @@ unit rgcpu;
       var
         tmpref   : treference;
         helplist : TAsmList;
-        hreg     : tregister;
       begin
-        if abs(spilltemp.offset)>63 then
+        if (abs(spilltemp.offset)>63) or (CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype]) then
           begin
             helplist:=TAsmList.create;
 
@@ -183,11 +183,9 @@ unit rgcpu;
 
 
     function trgcpu.do_spill_replace(list:TAsmList;instr:tai_cpu_abstract_sym;orgreg:tsuperregister;const spilltemp:treference):boolean;
-      var
-        b : byte;
       begin
         result:=false;
-        if not(spilltemp.offset in [0..63]) then
+        if not(spilltemp.offset in [0..63]) or (CPUAVR_16_REGS in cpu_capabilities[current_settings.cputype]) then
           exit;
 
         { Replace 'mov  dst,orgreg' with 'ldd  dst,spilltemp'

+ 45 - 0
compiler/avr/tripletcpu.pas

@@ -0,0 +1,45 @@
+{
+    Copyright (c) 2020 by Jonas Maebe
+
+    Construct the cpu part of the triplet
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit tripletcpu;
+
+{$i fpcdefs.inc}
+
+interface
+
+uses
+  globtype;
+
+function tripletcpustr(tripletstyle: ttripletstyle): ansistring;
+
+implementation
+
+uses
+  globals, cpuinfo;
+
+function tripletcpustr(tripletstyle: ttripletstyle): ansistring;
+  begin
+    result:='avr';
+  end;
+
+
+end.
+

+ 3 - 3
compiler/blockutl.pas

@@ -174,7 +174,7 @@ implementation
       { find the type of the descriptor structure }
       descriptordef:=search_named_unit_globaltype('BLOCKRTL','FPC_BLOCK_DESCRIPTOR_SIMPLE',true).typedef;
       { create new static variable }
-      descriptor:=cstaticvarsym.create(name,vs_value,descriptordef,[],true);
+      descriptor:=cstaticvarsym.create(name,vs_value,descriptordef,[]);
       symtablestack.top.insert(descriptor);
       include(descriptor.symoptions,sp_internal);
       { create typed constant for the descriptor }
@@ -227,7 +227,7 @@ implementation
         begin
           { alias for the type to invoke the procvar, used in the symcreat
             handling of tsk_block_invoke_procvar }
-          result.localst.insert(ctypesym.create('__FPC_BLOCK_INVOKE_PV_TYPE',orgpv,true));
+          result.localst.insert(ctypesym.create('__FPC_BLOCK_INVOKE_PV_TYPE',orgpv));
           result.synthetickind:=tsk_block_invoke_procvar;
         end;
     end;
@@ -253,7 +253,7 @@ implementation
       result:=cstaticvarsym.create(
         '$'+literalname,
         vs_value,
-        blockliteraldef,[],true);
+        blockliteraldef,[]);
       include(result.symoptions,sp_internal);
       symtablestack.top.insert(result);
       { initialise it }

+ 3 - 8
compiler/browcol.pas

@@ -1560,15 +1560,10 @@ end;
                    Symbol^.Flags:=(Symbol^.Flags or sfPointer);
                    Symbol^.RelatedTypeID:=Ptrint(tpointerdef(vardef).pointeddef);
                  end;
-               if typ=fieldvarsym then
-                 MemInfo.Addr:=tfieldvarsym(sym).fieldoffset
+               if tabstractnormalvarsym(sym).localloc.loc=LOC_REFERENCE then
+                 MemInfo.Addr:=tabstractnormalvarsym(sym).localloc.reference.offset
                else
-                 begin
-                   if tabstractnormalvarsym(sym).localloc.loc=LOC_REFERENCE then
-                     MemInfo.Addr:=tabstractnormalvarsym(sym).localloc.reference.offset
-                   else
-                     MemInfo.Addr:=0;
-                 end;
+                 MemInfo.Addr:=0;
                if assigned(vardef) and (vardef.typ=arraydef) then
                  begin
                    if tarraydef(vardef).highrange<tarraydef(vardef).lowrange then

+ 1 - 2
compiler/ccharset.pas

@@ -25,7 +25,6 @@ unit ccharset;
        tunicodestring = ^tunicodechar;
 
        tcsconvert = class
-         // !!!!!!1constructor create;
        end;
 
        tunicodecharmappingflag = (umf_noinfo,umf_leadbyte,umf_undefined,
@@ -205,7 +204,7 @@ unit ccharset;
               hp:=hp^.next;
            end;
          getmap:=nil;
-      end;////////
+      end;
 
     function getmap(cp : word) : punicodemap;
 

+ 46 - 4
compiler/cclasses.pas

@@ -2190,7 +2190,7 @@ end;
         while assigned(NewNode) do
          begin
            Next:=NewNode.Next;
-           prefetch(next.next);
+           prefetch(pointer(Next)^);
            NewNode.Free;
            NewNode:=Next;
           end;
@@ -2798,9 +2798,51 @@ end;
 
 
     function tdynamicarray.equal(other:tdynamicarray):boolean;
-      begin
-        result:=false;
-        { TODO }
+      var
+        ofsthis,
+        ofsother,
+        remthis,
+        remother,
+        len : sizeint;
+        blockthis,
+        blockother : pdynamicblock;
+      begin
+        if not assigned(other) then
+          exit(false);
+        if size<>other.size then
+          exit(false);
+        blockthis:=Firstblock;
+        blockother:=other.FirstBlock;
+        ofsthis:=0;
+        ofsother:=0;
+
+        while assigned(blockthis) and assigned(blockother) do
+          begin
+            remthis:=blockthis^.used-ofsthis;
+            remother:=blockother^.used-ofsother;
+            len:=min(remthis,remother);
+            if not CompareMem(@blockthis^.data[ofsthis],@blockother^.data[ofsother],len) then
+              exit(false);
+            inc(ofsthis,len);
+            inc(ofsother,len);
+            if ofsthis=blockthis^.used then
+              begin
+                blockthis:=blockthis^.next;
+                ofsthis:=0;
+              end;
+            if ofsother=blockother^.used then
+              begin
+                blockother:=blockother^.next;
+                ofsother:=0;
+              end;
+          end;
+
+        if assigned(blockthis) and not assigned(blockother) then
+          result:=blockthis^.used=0
+        else if assigned(blockother) and not assigned(blockthis) then
+          result:=blockother^.used=0
+        else
+          result:=true;
       end;
 
 

+ 31 - 0
compiler/cepiktimer.pas

@@ -0,0 +1,31 @@
+{
+    Copyright (c) 2018 by Florian Klaempfl
+
+    Basic infrastructure for measuring timings of different compilation steps
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+
+{$macro on}
+{ "fix" the unit name }
+{$define epiktimer:=cepiktimer}
+{ do not depend on the classes unit }
+{$DEFINE NOCLASSES}
+
+{ include the original file }
+{$i ../../epiktimer/epiktimer.pas}
+

+ 52 - 13
compiler/cfidwarf.pas

@@ -63,7 +63,14 @@ interface
       end;
 
       TDwarfAsmCFI=class(TAsmCFI)
-        use_eh_frame : boolean;
+      public type
+        TDataType = (
+          dt_none,
+          dt_debug,
+          dt_eh_frame
+        );
+      public
+        datatype : TDataType;
         constructor create;override;
       end;
 
@@ -115,7 +122,7 @@ interface
 implementation
 
     uses
-      systems,
+      systems,globals,
       cutils,
       verbose,
       dwarfbase;
@@ -230,7 +237,11 @@ implementation
       begin
         inherited;
         if tf_use_psabieh in target_info.flags then
-          use_eh_frame:=true;
+          datatype:=dt_eh_frame
+        else if cs_debuginfo in current_settings.moduleswitches then
+          datatype:=dt_debug
+        else
+          datatype:=dt_none;
       end;
 
 
@@ -257,7 +268,7 @@ implementation
       end;
 
 
-{$ifdef i386}
+{$if defined(i386)}
     { if more cpu dependend stuff is implemented, this needs more refactoring }
     procedure TDwarfAsmCFILowLevel.generate_initial_instructions(list:TAsmList);
       begin
@@ -268,7 +279,17 @@ implementation
         list.concat(tai_const.create_uleb128bit(dwarf_reg(NR_RETURN_ADDRESS_REG)));
         list.concat(tai_const.create_uleb128bit((-sizeof(aint)) div data_alignment_factor));
       end;
-{$else i386}
+{$elseif defined(avr)}
+    procedure TDwarfAsmCFILowLevel.generate_initial_instructions(list:TAsmList);
+      begin
+        list.concat(tai_const.create_8bit(DW_CFA_def_cfa));
+        list.concat(tai_const.create_uleb128bit(32));
+        list.concat(tai_const.create_uleb128bit(2));
+        list.concat(tai_const.create_8bit(DW_CFA_offset_extended));
+        list.concat(tai_const.create_uleb128bit(36));
+        list.concat(tai_const.create_uleb128bit((-1) div data_alignment_factor));
+      end;
+{$else}
     { if more cpu dependend stuff is implemented, this needs more refactoring }
     procedure TDwarfAsmCFILowLevel.generate_initial_instructions(list:TAsmList);
       begin
@@ -294,10 +315,14 @@ implementation
         tc             : tai_const;
       begin
         CurrentLSDALabel:=nil;
-        if use_eh_frame then
-          new_section(list,sec_eh_frame,'',0)
-        else
-          new_section(list,sec_debug_frame,'',0);
+        case datatype of
+          dt_none:
+            exit;
+          dt_debug:
+            new_section(list,sec_debug_frame,'',0);
+          dt_eh_frame:
+            new_section(list,sec_eh_frame,'',0);
+        end;
         { debug_frame:
             CIE
              DWORD   length
@@ -328,7 +353,7 @@ implementation
         current_asmdata.getlabel(lenendlabel,alt_dbgframe);
         list.concat(tai_const.create_rel_sym(aitconst_32bit,lenstartlabel,lenendlabel));
         list.concat(tai_label.create(lenstartlabel));
-        if use_eh_frame then
+        if datatype=dt_eh_frame then
           begin
             list.concat(tai_const.create_32bit(0));
             list.concat(tai_const.create_8bit(1));
@@ -348,7 +373,7 @@ implementation
         list.concat(tai_const.create_sleb128bit(data_alignment_factor));
         list.concat(tai_const.create_8bit(dwarf_reg(NR_RETURN_ADDRESS_REG)));
         { augmentation data }
-        if use_eh_frame then
+        if datatype=dt_eh_frame then
           begin
             current_asmdata.getlabel(augstartlabel,alt_dbgframe);
             current_asmdata.getlabel(augendlabel,alt_dbgframe);
@@ -401,7 +426,7 @@ implementation
                   }
                   list.concat(tai_const.create_rel_sym(aitconst_32bit,lenstartlabel,lenendlabel));
                   list.concat(tai_label.create(lenstartlabel));
-                  if use_eh_frame then
+                  if datatype=dt_eh_frame then
                     begin
                       { relative offset to the CIE }
                       current_asmdata.getlabel(fdeofslabel,alt_dbgframe);
@@ -423,7 +448,7 @@ implementation
                   list.concat(tai_const.create_rel_sym(aitconst_ptr,hp.oper[0].beginsym,hp.oper[0].endsym));
 
                   { we wrote a 'z' into the CIE augmentation data }
-                  if use_eh_frame then
+                  if datatype=dt_eh_frame then
                     begin
                       { size of augmentation }
                       list.concat(tai_const.create_8bit(sizeof(pint)));
@@ -460,6 +485,8 @@ implementation
 
     procedure TDwarfAsmCFILowLevel.start_frame(list:TAsmList);
       begin
+        if datatype=dt_none then
+          exit;
         current_asmdata.getlabel(FFrameEndLabel,alt_dbgframe);
         FLastloclabel:=get_frame_start;
         list.concat(tai_label.create(get_frame_start));
@@ -483,6 +510,8 @@ implementation
 
     procedure TDwarfAsmCFILowLevel.outmost_frame(list: TAsmList);
       begin
+        if datatype=dt_none then
+          exit;
         cfa_advance_loc(list);
         DwarfList.concat(tdwarfitem.create_reg(DW_CFA_undefined,doe_uleb,NR_RETURN_ADDRESS_REG));
       end;
@@ -490,6 +519,8 @@ implementation
 
     procedure TDwarfAsmCFILowLevel.end_frame(list:TAsmList);
       begin
+        if datatype=dt_none then
+          exit;
         if not assigned(FFrameStartLabel) then
           internalerror(2004041213);
         DwarfList.concat(tdwarfitem.create(DW_CFA_end_frame));
@@ -515,6 +546,8 @@ implementation
 
     procedure TDwarfAsmCFILowLevel.cfa_offset(list:TAsmList;reg:tregister;ofs:longint);
       begin
+        if datatype=dt_none then
+          exit;
         cfa_advance_loc(list);
 { TODO: check if ref is a temp}
         { offset must be positive }
@@ -524,6 +557,8 @@ implementation
 
     procedure TDwarfAsmCFILowLevel.cfa_restore(list:TAsmList;reg:tregister);
       begin
+        if datatype=dt_none then
+          exit;
         cfa_advance_loc(list);
         DwarfList.concat(tdwarfitem.create_reg(DW_CFA_restore_extended,doe_uleb,reg));
       end;
@@ -531,6 +566,8 @@ implementation
 
     procedure TDwarfAsmCFILowLevel.cfa_def_cfa_register(list:TAsmList;reg:tregister);
       begin
+        if datatype=dt_none then
+          exit;
         cfa_advance_loc(list);
         DwarfList.concat(tdwarfitem.create_reg(DW_CFA_def_cfa_register,doe_uleb,reg));
       end;
@@ -538,6 +575,8 @@ implementation
 
     procedure TDwarfAsmCFILowLevel.cfa_def_cfa_offset(list:TAsmList;ofs:longint);
       begin
+        if datatype=dt_none then
+          exit;
         cfa_advance_loc(list);
         DwarfList.concat(tdwarfitem.create_const(DW_CFA_def_cfa_offset,doe_uleb,ofs));
       end;

+ 10 - 7
compiler/cfileutl.pas

@@ -99,7 +99,7 @@ interface
 
       TSearchPathList = class(TCmdStrList)
         procedure AddPath(s:TCmdStr;addfirst:boolean);overload;
-        procedure AddPath(SrcPath,s:TCmdStr;addfirst:boolean);overload;
+        procedure AddLibraryPath(const sysroot: TCmdStr; s:TCmdStr;addfirst:boolean);overload;
         procedure AddList(list:TSearchPathList;addfirst:boolean);
         function  FindFile(const f : TCmdStr;allowcache:boolean;var foundfile:TCmdStr):boolean;
       end;
@@ -526,7 +526,7 @@ end;
    function CurDirRelPath(systeminfo: tsysteminfo): TCmdStr;
 
    begin
-     if systeminfo.system <> system_powerpc_macos then
+     if systeminfo.system <> system_powerpc_macosclassic then
        CurDirRelPath:= '.'+systeminfo.DirSep
      else
        CurDirRelPath:= ':'
@@ -877,7 +877,7 @@ end;
      var
        i      : longint;
      begin
-       if source_info.system = system_powerpc_MACOS then
+       if source_info.system = system_powerpc_macosclassic then
          FixFileName:= TranslatePathToMac(s, true)
        else
         if (tf_files_case_aware in source_info.flags) or
@@ -940,7 +940,7 @@ end;
      var
        i : longint;
      begin
-       if target_info.system = system_powerpc_MACOS then
+       if target_info.system = system_powerpc_macosclassic then
          TargetFixFileName:= TranslatePathToMac(s, true)
        else
         if (tf_files_case_aware in target_info.flags) or
@@ -995,11 +995,11 @@ end;
 
     procedure TSearchPathList.AddPath(s:TCmdStr;addfirst:boolean);
       begin
-        AddPath('',s,AddFirst);
+        AddLibraryPath('',s,AddFirst);
       end;
 
 
-   procedure TSearchPathList.AddPath(SrcPath,s:TCmdStr;addfirst:boolean);
+   procedure TSearchPathList.AddLibraryPath(const sysroot: TCmdStr; s:TCmdStr;addfirst:boolean);
      var
        staridx,
        i,j      : longint;
@@ -1074,7 +1074,10 @@ end;
 
          { fix pathname }
          DePascalQuote(currPath);
-         currPath:=SrcPath+FixPath(currPath,false);
+         { GNU LD convention: if library search path starts with '=', it's relative to the
+           sysroot; otherwise, interpret it as a regular path }
+         if (length(currPath) >0) and (currPath[1]='=') then
+           currPath:=sysroot+FixPath(copy(currPath,2,length(currPath)-1),false);
          if currPath='' then
            currPath:= CurDirRelPath(source_info)
          else

+ 61 - 6
compiler/cg64f32.pas

@@ -72,6 +72,7 @@ unit cg64f32;
         procedure a_op64_reg_ref(list : TAsmList;op:TOpCG;size : tcgsize;reg : tregister64; const ref: treference);override;
         procedure a_op64_const_loc(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;const l: tlocation);override;
         procedure a_op64_reg_loc(list : TAsmList;op:TOpCG;size : tcgsize;reg : tregister64;const l : tlocation);override;
+        procedure a_op64_ref_loc(list: TAsmList; op: TOpCG; size: tcgsize;const ref: treference; const l: tlocation);override;
         procedure a_op64_loc_reg(list : TAsmList;op:TOpCG;size : tcgsize;const l : tlocation;reg : tregister64);override;
         procedure a_op64_const_ref(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;const ref : treference);override;
 
@@ -324,8 +325,23 @@ unit cg64f32;
             reg.reglo:=reg.reghi;
             reg.reghi:=tmpreg;
           end;
-        cg.a_load_reg_ref(list,OS_32,OS_32,reg.reglo,ref);
         tmpref := ref;
+{$if defined(cpu8bitalu) or defined(cpu16bitalu)}
+        { Preload base and index to a separate temp register for 8 & 16 bit CPUs 
+          to reduce spilling and produce a better code. }
+        if (tmpref.base<>NR_NO) and (getsupreg(tmpref.base)>=first_int_imreg) then
+          begin
+            tmpreg:=cg.getaddressregister(list);
+            cg.a_load_reg_reg(list,OS_ADDR,OS_ADDR,tmpref.base,tmpreg);
+            tmpref.base:=tmpreg;
+            if tmpref.index<>NR_NO then
+              begin
+                cg.a_op_reg_reg(list,OP_ADD,OS_ADDR,tmpref.index,tmpref.base);
+                tmpref.index:=NR_NO;
+              end;
+          end;
+{$endif}
+        cg.a_load_reg_ref(list,OS_32,OS_32,reg.reglo,tmpref);
         inc(tmpref.offset,4);
         cg.a_load_reg_ref(list,OS_32,OS_32,reg.reghi,tmpref);
       end;
@@ -356,6 +372,21 @@ unit cg64f32;
             reg.reghi := tmpreg;
           end;
         tmpref := ref;
+{$if defined(cpu8bitalu) or defined(cpu16bitalu)}
+        { Preload base and index to a separate temp register for 8 & 16 bit CPUs 
+          to reduce spilling and produce a better code. }
+        if (tmpref.base<>NR_NO) and (getsupreg(tmpref.base)>=first_int_imreg) then
+          begin
+            tmpreg:=cg.getaddressregister(list);
+            cg.a_load_reg_reg(list,OS_ADDR,OS_ADDR,tmpref.base,tmpreg);
+            tmpref.base:=tmpreg;
+            if tmpref.index<>NR_NO then
+              begin
+                cg.a_op_reg_reg(list,OP_ADD,OS_ADDR,tmpref.index,tmpref.base);
+                tmpref.index:=NR_NO;
+              end;
+          end;
+{$endif}
         if (tmpref.base=reg.reglo) then
          begin
            tmpreg:=cg.getaddressregister(list);
@@ -674,7 +705,7 @@ unit cg64f32;
           LOC_CONSTANT :
             cg.a_load_const_reg(list,OS_32,longint(hi(l.value64)),reg);
           else
-            internalerror(200203244);
+            internalerror(2002032411);
         end;
       end;
 
@@ -705,6 +736,25 @@ unit cg64f32;
       end;
 
 
+    procedure tcg64f32.a_op64_ref_loc(list : TAsmList;op:TOpCG;size : tcgsize;const ref : treference;const l : tlocation);
+      var
+        tempreg: tregister64;
+      begin
+        case l.loc of
+          LOC_REFERENCE, LOC_CREFERENCE:
+            begin
+              tempreg.reghi:=cg.getintregister(list,OS_32);
+              tempreg.reglo:=cg.getintregister(list,OS_32);
+              a_load64_ref_reg(list,ref,tempreg);
+              a_op64_reg_ref(list,op,size,tempreg,l.reference);
+            end;
+          LOC_REGISTER,LOC_CREGISTER:
+            a_op64_ref_reg(list,op,size,ref,l.register64);
+          else
+            internalerror(2020042803);
+        end;
+      end;
+
 
     procedure tcg64f32.a_op64_loc_reg(list : TAsmList;op:TOpCG;size : tcgsize;const l : tlocation;reg : tregister64);
       begin
@@ -738,12 +788,17 @@ unit cg64f32;
       begin
         tempreg.reghi:=cg.getintregister(list,OS_32);
         tempreg.reglo:=cg.getintregister(list,OS_32);
-        a_load64_ref_reg(list,ref,tempreg);
         if op in [OP_NEG,OP_NOT] then
-          a_op64_reg_reg(list,op,size,tempreg,tempreg)
+          begin
+            a_op64_reg_reg(list,op,size,reg,tempreg);
+            a_load64_reg_ref(list,tempreg,ref);
+          end
         else
-          a_op64_reg_reg(list,op,size,reg,tempreg);
-        a_load64_reg_ref(list,tempreg,ref);
+          begin
+            a_load64_ref_reg(list,ref,tempreg);
+            a_op64_reg_reg(list,op,size,reg,tempreg);
+            a_load64_reg_ref(list,tempreg,ref);
+          end;
       end;
 
 

Some files were not shown because too many files changed in this diff