Browse Source

* synchronized with privatetrunk till r30095

git-svn-id: branches/hlcgllvm@30101 -
Jonas Maebe 10 years ago
parent
commit
67b8aceaee
100 changed files with 9963 additions and 3074 deletions
  1. 250 58
      .gitattributes
  2. 18 27
      .gitignore
  3. 52 10
      Makefile
  4. 8 5
      Makefile.fpc
  5. 100 15
      compiler/Makefile
  6. 17 13
      compiler/Makefile.fpc
  7. 93 35
      compiler/aarch64/a64att.inc
  8. 58 0
      compiler/aarch64/a64atts.inc
  9. 159 43
      compiler/aarch64/a64ins.dat
  10. 93 35
      compiler/aarch64/a64op.inc
  11. 168 161
      compiler/aarch64/a64reg.dat
  12. 462 1125
      compiler/aarch64/aasmcpu.pas
  13. 288 0
      compiler/aarch64/agcpugas.pas
  14. 25 7
      compiler/aarch64/aoptcpub.pas
  15. 2217 0
      compiler/aarch64/cgcpu.pas
  16. 200 38
      compiler/aarch64/cpubase.pas
  17. 17 2
      compiler/aarch64/cpuinfo.pas
  18. 40 0
      compiler/aarch64/cpunode.pas
  19. 458 521
      compiler/aarch64/cpupara.pas
  20. 68 0
      compiler/aarch64/cpupi.pas
  21. 70 0
      compiler/aarch64/cputarg.pas
  22. 228 0
      compiler/aarch64/hlcgcpu.pas
  23. 402 0
      compiler/aarch64/ncpuadd.pas
  24. 201 0
      compiler/aarch64/ncpucnv.pas
  25. 184 0
      compiler/aarch64/ncpuinl.pas
  26. 196 0
      compiler/aarch64/ncpumat.pas
  27. 142 0
      compiler/aarch64/ncpumem.pas
  28. 175 0
      compiler/aarch64/ncpuset.pas
  29. 5 0
      compiler/aarch64/ra64con.inc
  30. 162 157
      compiler/aarch64/ra64dwa.inc
  31. 1 1
      compiler/aarch64/ra64nor.inc
  32. 6 1
      compiler/aarch64/ra64num.inc
  33. 101 96
      compiler/aarch64/ra64rni.inc
  34. 48 43
      compiler/aarch64/ra64sri.inc
  35. 162 157
      compiler/aarch64/ra64sta.inc
  36. 6 1
      compiler/aarch64/ra64std.inc
  37. 5 0
      compiler/aarch64/ra64sup.inc
  38. 88 0
      compiler/aarch64/racpu.pas
  39. 1053 0
      compiler/aarch64/racpugas.pas
  40. 171 0
      compiler/aarch64/rgcpu.pas
  41. 4 2
      compiler/aasmbase.pas
  42. 2 2
      compiler/aasmcnst.pas
  43. 15 16
      compiler/aasmdata.pas
  44. 46 11
      compiler/aasmtai.pas
  45. 16 3
      compiler/aggas.pas
  46. 4 0
      compiler/agjasmin.pas
  47. 23 0
      compiler/alpha/cpuinfo.pas
  48. 22 5
      compiler/aoptobj.pas
  49. 336 30
      compiler/arm/aasmcpu.pas
  50. 1 2
      compiler/arm/agarmgas.pas
  51. 35 7
      compiler/arm/aoptcpu.pas
  52. 8 5
      compiler/arm/aoptcpub.pas
  53. 73 67
      compiler/arm/armins.dat
  54. 1 1
      compiler/arm/armnop.inc
  55. 97 146
      compiler/arm/armtab.inc
  56. 47 15
      compiler/arm/cgcpu.pas
  57. 43 1
      compiler/arm/cpuelf.pas
  58. 6 1
      compiler/arm/cpuinfo.pas
  59. 58 10
      compiler/arm/hlcgcpu.pas
  60. 12 2
      compiler/arm/narmadd.pas
  61. 11 4
      compiler/arm/narmset.pas
  62. 2 1
      compiler/arm/raarmgas.pas
  63. 10 0
      compiler/arm/rgcpu.pas
  64. 4 2
      compiler/assemble.pas
  65. 5 5
      compiler/avr/aasmcpu.pas
  66. 4 6
      compiler/avr/agavrgas.pas
  67. 6 0
      compiler/avr/cpuinfo.pas
  68. 4 4
      compiler/avr/cpupara.pas
  69. 8 0
      compiler/avr/navradd.pas
  70. 385 0
      compiler/blockutl.pas
  71. 3 0
      compiler/cfileutl.pas
  72. 13 0
      compiler/cgbase.pas
  73. 2 2
      compiler/cghlcpu.pas
  74. 29 10
      compiler/cgobj.pas
  75. 6 0
      compiler/cgutils.pas
  76. 1 0
      compiler/compinnr.inc
  77. 1 1
      compiler/comprsrc.pas
  78. 77 43
      compiler/dbgdwarf.pas
  79. 6 10
      compiler/dbgstabs.pas
  80. 27 9
      compiler/defcmp.pas
  81. 9 0
      compiler/defutil.pas
  82. 1 1
      compiler/expunix.pas
  83. 1 1
      compiler/finput.pas
  84. 1 0
      compiler/fpcdefs.inc
  85. 10 2
      compiler/fppu.pas
  86. 24 0
      compiler/generic/cpuinfo.pas
  87. 51 37
      compiler/globals.pas
  88. 20 8
      compiler/globtype.pas
  89. 4 3
      compiler/hlcg2ll.pas
  90. 30 14
      compiler/hlcgobj.pas
  91. 63 22
      compiler/htypechk.pas
  92. 8 1
      compiler/i386/cgcpu.pas
  93. 29 8
      compiler/i386/cpuinfo.pas
  94. 20 1
      compiler/i386/cpupara.pas
  95. 8 0
      compiler/i386/i386att.inc
  96. 8 0
      compiler/i386/i386atts.inc
  97. 8 0
      compiler/i386/i386int.inc
  98. 1 1
      compiler/i386/i386nop.inc
  99. 8 0
      compiler/i386/i386op.inc
  100. 9 1
      compiler/i386/i386prop.inc

File diff suppressed because it is too large
+ 250 - 58
.gitattributes


+ 18 - 27
.gitignore

@@ -1344,6 +1344,24 @@ packages/fcl-net/src/*.o
 packages/fcl-net/src/*.ppu
 packages/fcl-net/src/*.ppu
 packages/fcl-net/src/*.s
 packages/fcl-net/src/*.s
 packages/fcl-net/src/Package.fpc
 packages/fcl-net/src/Package.fpc
+packages/fcl-net/src/amiga/*.bak
+packages/fcl-net/src/amiga/*.exe
+packages/fcl-net/src/amiga/*.o
+packages/fcl-net/src/amiga/*.ppu
+packages/fcl-net/src/amiga/*.s
+packages/fcl-net/src/amiga/Package.fpc
+packages/fcl-net/src/amiga/build-stamp.*
+packages/fcl-net/src/amiga/fpcmade.*
+packages/fcl-net/src/amiga/units
+packages/fcl-net/src/aros/*.bak
+packages/fcl-net/src/aros/*.exe
+packages/fcl-net/src/aros/*.o
+packages/fcl-net/src/aros/*.ppu
+packages/fcl-net/src/aros/*.s
+packages/fcl-net/src/aros/Package.fpc
+packages/fcl-net/src/aros/build-stamp.*
+packages/fcl-net/src/aros/fpcmade.*
+packages/fcl-net/src/aros/units
 packages/fcl-net/src/fpcmade.*
 packages/fcl-net/src/fpcmade.*
 packages/fcl-net/src/netware/*.bak
 packages/fcl-net/src/netware/*.bak
 packages/fcl-net/src/netware/*.exe
 packages/fcl-net/src/netware/*.exe
@@ -1801,15 +1819,6 @@ packages/gdbint/src/Package.fpc
 packages/gdbint/src/build-stamp.*
 packages/gdbint/src/build-stamp.*
 packages/gdbint/src/fpcmade.*
 packages/gdbint/src/fpcmade.*
 packages/gdbint/src/units
 packages/gdbint/src/units
-packages/gdbint/tests/*.bak
-packages/gdbint/tests/*.exe
-packages/gdbint/tests/*.o
-packages/gdbint/tests/*.ppu
-packages/gdbint/tests/*.s
-packages/gdbint/tests/Package.fpc
-packages/gdbint/tests/build-stamp.*
-packages/gdbint/tests/fpcmade.*
-packages/gdbint/tests/units
 packages/gdbint/units
 packages/gdbint/units
 packages/gdbm/*.bak
 packages/gdbm/*.bak
 packages/gdbm/*.exe
 packages/gdbm/*.exe
@@ -2723,15 +2732,6 @@ packages/imlib/*.ppu
 packages/imlib/*.s
 packages/imlib/*.s
 packages/imlib/Package.fpc
 packages/imlib/Package.fpc
 packages/imlib/build-stamp.*
 packages/imlib/build-stamp.*
-packages/imlib/examples/*.bak
-packages/imlib/examples/*.exe
-packages/imlib/examples/*.o
-packages/imlib/examples/*.ppu
-packages/imlib/examples/*.s
-packages/imlib/examples/Package.fpc
-packages/imlib/examples/build-stamp.*
-packages/imlib/examples/fpcmade.*
-packages/imlib/examples/units
 packages/imlib/fpcmade.*
 packages/imlib/fpcmade.*
 packages/imlib/src/*.bak
 packages/imlib/src/*.bak
 packages/imlib/src/*.exe
 packages/imlib/src/*.exe
@@ -2742,15 +2742,6 @@ packages/imlib/src/Package.fpc
 packages/imlib/src/build-stamp.*
 packages/imlib/src/build-stamp.*
 packages/imlib/src/fpcmade.*
 packages/imlib/src/fpcmade.*
 packages/imlib/src/units
 packages/imlib/src/units
-packages/imlib/tests/*.bak
-packages/imlib/tests/*.exe
-packages/imlib/tests/*.o
-packages/imlib/tests/*.ppu
-packages/imlib/tests/*.s
-packages/imlib/tests/Package.fpc
-packages/imlib/tests/build-stamp.*
-packages/imlib/tests/fpcmade.*
-packages/imlib/tests/units
 packages/imlib/units
 packages/imlib/units
 packages/ldap/*.bak
 packages/ldap/*.bak
 packages/ldap/*.exe
 packages/ldap/*.exe

+ 52 - 10
Makefile

@@ -1,9 +1,9 @@
 #
 #
-# Don't edit, this file is generated by FPCMake Version 2.0.0 [2014-08-17 rev 28432]
+# Don't edit, this file is generated by FPCMake Version 2.0.0 [2015-02-23 rev 29972]
 #
 #
 default: help
 default: help
-MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim i386-android i386-aros m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii powerpc-aix sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-netbsd x86_64-solaris x86_64-openbsd x86_64-darwin x86_64-win64 x86_64-embedded arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian arm-android powerpc64-linux powerpc64-darwin powerpc64-embedded powerpc64-aix avr-embedded armeb-linux armeb-embedded mips-linux mipsel-linux mipsel-embedded mipsel-android jvm-java jvm-android i8086-msdos
-BSDs = freebsd netbsd openbsd darwin
+MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim i386-android i386-aros m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii powerpc-aix sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-netbsd x86_64-solaris x86_64-openbsd x86_64-darwin x86_64-win64 x86_64-embedded x86_64-iphonesim x86_64-dragonfly arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian arm-android powerpc64-linux powerpc64-darwin powerpc64-embedded powerpc64-aix avr-embedded armeb-linux armeb-embedded mips-linux mipsel-linux mipsel-embedded mipsel-android jvm-java jvm-android i8086-msdos aarch64-darwin
+BSDs = freebsd netbsd openbsd darwin dragonfly
 UNIXs = linux $(BSDs) solaris qnx haiku aix
 UNIXs = linux $(BSDs) solaris qnx haiku aix
 LIMIT83fs = go32v2 os2 emx watcom msdos
 LIMIT83fs = go32v2 os2 emx watcom msdos
 OSNeedsComspecToRunBatch = go32v2 watcom
 OSNeedsComspecToRunBatch = go32v2 watcom
@@ -326,7 +326,7 @@ FPCFPMAKE=$(FPC)
 endif
 endif
 endif
 endif
 override PACKAGE_NAME=fpc
 override PACKAGE_NAME=fpc
-override PACKAGE_VERSION=2.7.1
+override PACKAGE_VERSION=3.1.1
 REQUIREDVERSION=2.6.4
 REQUIREDVERSION=2.6.4
 REQUIREDVERSION2=2.6.2
 REQUIREDVERSION2=2.6.2
 ifndef inOS2
 ifndef inOS2
@@ -379,6 +379,9 @@ endif
 ifeq ($(CPU_TARGET),avr)
 ifeq ($(CPU_TARGET),avr)
 PPSUF=avr
 PPSUF=avr
 endif
 endif
+ifeq ($(CPU_TARGET),aarch64)
+PPSUF=a64
+endif
 ifdef CROSSCOMPILE
 ifdef CROSSCOMPILE
 ifneq ($(CPU_TARGET),jvm)
 ifneq ($(CPU_TARGET),jvm)
 PPPRE=ppcross
 PPPRE=ppcross
@@ -467,7 +470,7 @@ endif
 endif
 endif
 BuildOnlyBaseCPUs=jvm
 BuildOnlyBaseCPUs=jvm
 ifneq ($(wildcard utils),)
 ifneq ($(wildcard utils),)
-NOUTILSTARGETS=embedded gba $(BuildOnlyBaseCPUs)
+NOUTILSTARGETS=embedded gba nds msdos $(BuildOnlyBaseCPUs)
 ifeq ($(findstring $(OS_TARGET),$(NOUTILSTARGETS)),)
 ifeq ($(findstring $(OS_TARGET),$(NOUTILSTARGETS)),)
 ifdef BUILDFULLNATIVE
 ifdef BUILDFULLNATIVE
 UTILS=1
 UTILS=1
@@ -633,6 +636,12 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 ifeq ($(FULL_TARGET),x86_64-embedded)
 override TARGET_DIRS+=compiler rtl utils packages ide installer
 override TARGET_DIRS+=compiler rtl utils packages ide installer
 endif
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+override TARGET_DIRS+=compiler rtl utils packages ide installer
+endif
+ifeq ($(FULL_TARGET),x86_64-dragonfly)
+override TARGET_DIRS+=compiler rtl utils packages ide installer
+endif
 ifeq ($(FULL_TARGET),arm-linux)
 ifeq ($(FULL_TARGET),arm-linux)
 override TARGET_DIRS+=compiler rtl utils packages ide installer
 override TARGET_DIRS+=compiler rtl utils packages ide installer
 endif
 endif
@@ -702,6 +711,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 ifeq ($(FULL_TARGET),i8086-msdos)
 override TARGET_DIRS+=compiler rtl utils packages ide installer
 override TARGET_DIRS+=compiler rtl utils packages ide installer
 endif
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+override TARGET_DIRS+=compiler rtl utils packages ide installer
+endif
 override INSTALL_FPCPACKAGE=y
 override INSTALL_FPCPACKAGE=y
 ifdef REQUIRE_UNITSDIR
 ifdef REQUIRE_UNITSDIR
 override UNITSDIR+=$(REQUIRE_UNITSDIR)
 override UNITSDIR+=$(REQUIRE_UNITSDIR)
@@ -945,6 +957,12 @@ EXEEXT=
 HASSHAREDLIB=1
 HASSHAREDLIB=1
 SHORTSUFFIX=lnx
 SHORTSUFFIX=lnx
 endif
 endif
+ifeq ($(OS_TARGET),dragonfly)
+BATCHEXT=.sh
+EXEEXT=
+HASSHAREDLIB=1
+SHORTSUFFIX=df
+endif
 ifeq ($(OS_TARGET),freebsd)
 ifeq ($(OS_TARGET),freebsd)
 BATCHEXT=.sh
 BATCHEXT=.sh
 EXEEXT=
 EXEEXT=
@@ -1459,8 +1477,8 @@ endif
 ifdef CREATESHARED
 ifdef CREATESHARED
 override FPCOPT+=-Cg
 override FPCOPT+=-Cg
 endif
 endif
-ifneq ($(findstring $(OS_TARGET),freebsd openbsd netbsd linux solaris),)
-ifeq ($(CPU_TARGET),x86_64)
+ifneq ($(findstring $(OS_TARGET),dragonfly freebsd openbsd netbsd linux solaris),)
+ifneq ($(findstring $(CPU_TARGET),x86_64 mips mipsel),)
 override FPCOPT+=-Cg
 override FPCOPT+=-Cg
 endif
 endif
 endif
 endif
@@ -2271,6 +2289,22 @@ TARGET_DIRS_PACKAGES=1
 TARGET_DIRS_IDE=1
 TARGET_DIRS_IDE=1
 TARGET_DIRS_INSTALLER=1
 TARGET_DIRS_INSTALLER=1
 endif
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_IDE=1
+TARGET_DIRS_INSTALLER=1
+endif
+ifeq ($(FULL_TARGET),x86_64-dragonfly)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_IDE=1
+TARGET_DIRS_INSTALLER=1
+endif
 ifeq ($(FULL_TARGET),arm-linux)
 ifeq ($(FULL_TARGET),arm-linux)
 TARGET_DIRS_COMPILER=1
 TARGET_DIRS_COMPILER=1
 TARGET_DIRS_RTL=1
 TARGET_DIRS_RTL=1
@@ -2455,6 +2489,14 @@ TARGET_DIRS_PACKAGES=1
 TARGET_DIRS_IDE=1
 TARGET_DIRS_IDE=1
 TARGET_DIRS_INSTALLER=1
 TARGET_DIRS_INSTALLER=1
 endif
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_IDE=1
+TARGET_DIRS_INSTALLER=1
+endif
 ifdef TARGET_DIRS_COMPILER
 ifdef TARGET_DIRS_COMPILER
 compiler_all:
 compiler_all:
 	$(MAKE) -C compiler all
 	$(MAKE) -C compiler all
@@ -2765,14 +2807,14 @@ BUILDSTAMP=build-stamp.$(FULL_TARGET)
 .PHONY: all clean distclean build buildbase install installbase
 .PHONY: all clean distclean build buildbase install installbase
 .PHONY: installother zipinstallbase zipinstallotherzipinstall
 .PHONY: installother zipinstallbase zipinstallotherzipinstall
 .PHONY: singlezipinstall versioncheckstartingcompiler
 .PHONY: singlezipinstall versioncheckstartingcompiler
-versioncheckstartingcompiler: 
+versioncheckstartingcompiler:
 ifndef CROSSCOMPILE
 ifndef CROSSCOMPILE
 ifneq ($(FPC_VERSION),$(REQUIREDVERSION))
 ifneq ($(FPC_VERSION),$(REQUIREDVERSION))
 ifneq ($(FPC_VERSION),$(REQUIREDVERSION2))
 ifneq ($(FPC_VERSION),$(REQUIREDVERSION2))
 ifndef OVERRIDEVERSIONCHECK
 ifndef OVERRIDEVERSIONCHECK
-	$(error The only supported starting compiler version is $(REQUIREDVERSION). You are trying to build with $(FPC_VERSION). If you are absolutely sure that the current compiler is built from the exact same version/revision, you can try to use OVERRIDEVERSIONCHECK=1 to override )
+	$(error The only supported starting compiler version is $(REQUIREDVERSION). You are trying to build with $(FPC_VERSION).)
 else
 else
-	@$(ECHO) You have overriden the starting compiler versioncheck while using starting compiler version $(FPC_VERSION). This situation is not supported and strange things and errors may happen. Remove OVERRIDEVERSIONCHECK=1 to fix this. 
+	@$(ECHO) You have overriden the starting compiler versioncheck while using starting compiler version $(FPC_VERSION). This situation is not supported and strange things and errors may happen. Remove OVERRIDEVERSIONCHECK=1 to fix this.
 endif
 endif
 endif
 endif
 endif
 endif

+ 8 - 5
Makefile.fpc

@@ -4,7 +4,7 @@
 
 
 [package]
 [package]
 name=fpc
 name=fpc
-version=2.7.1
+version=3.1.1
 
 
 [target]
 [target]
 dirs=compiler rtl utils packages ide installer
 dirs=compiler rtl utils packages ide installer
@@ -79,6 +79,9 @@ endif
 ifeq ($(CPU_TARGET),avr)
 ifeq ($(CPU_TARGET),avr)
 PPSUF=avr
 PPSUF=avr
 endif
 endif
+ifeq ($(CPU_TARGET),aarch64)
+PPSUF=a64
+endif
 
 
 # cross compilers uses full cpu_target, not just ppc-suffix
 # cross compilers uses full cpu_target, not just ppc-suffix
 # (except if the target cannot run a native compiler)
 # (except if the target cannot run a native compiler)
@@ -204,7 +207,7 @@ endif
 BuildOnlyBaseCPUs=jvm
 BuildOnlyBaseCPUs=jvm
 
 
 ifneq ($(wildcard utils),)
 ifneq ($(wildcard utils),)
-NOUTILSTARGETS=embedded gba $(BuildOnlyBaseCPUs)
+NOUTILSTARGETS=embedded gba nds msdos $(BuildOnlyBaseCPUs)
 ifeq ($(findstring $(OS_TARGET),$(NOUTILSTARGETS)),)
 ifeq ($(findstring $(OS_TARGET),$(NOUTILSTARGETS)),)
 ifdef BUILDFULLNATIVE
 ifdef BUILDFULLNATIVE
 UTILS=1
 UTILS=1
@@ -263,14 +266,14 @@ BUILDSTAMP=build-stamp.$(FULL_TARGET)
 .PHONY: installother zipinstallbase zipinstallotherzipinstall
 .PHONY: installother zipinstallbase zipinstallotherzipinstall
 .PHONY: singlezipinstall versioncheckstartingcompiler
 .PHONY: singlezipinstall versioncheckstartingcompiler
 
 
-versioncheckstartingcompiler: 
+versioncheckstartingcompiler:
 ifndef CROSSCOMPILE
 ifndef CROSSCOMPILE
 ifneq ($(FPC_VERSION),$(REQUIREDVERSION))
 ifneq ($(FPC_VERSION),$(REQUIREDVERSION))
 ifneq ($(FPC_VERSION),$(REQUIREDVERSION2))
 ifneq ($(FPC_VERSION),$(REQUIREDVERSION2))
 ifndef OVERRIDEVERSIONCHECK
 ifndef OVERRIDEVERSIONCHECK
-	$(error The only supported starting compiler version is $(REQUIREDVERSION). You are trying to build with $(FPC_VERSION). If you are absolutely sure that the current compiler is built from the exact same version/revision, you can try to use OVERRIDEVERSIONCHECK=1 to override )
+	$(error The only supported starting compiler version is $(REQUIREDVERSION). You are trying to build with $(FPC_VERSION).)
 else
 else
-	@$(ECHO) You have overriden the starting compiler versioncheck while using starting compiler version $(FPC_VERSION). This situation is not supported and strange things and errors may happen. Remove OVERRIDEVERSIONCHECK=1 to fix this. 
+	@$(ECHO) You have overriden the starting compiler versioncheck while using starting compiler version $(FPC_VERSION). This situation is not supported and strange things and errors may happen. Remove OVERRIDEVERSIONCHECK=1 to fix this.
 endif
 endif
 endif
 endif
 endif
 endif

+ 100 - 15
compiler/Makefile

@@ -1,9 +1,9 @@
 #
 #
-# Don't edit, this file is generated by FPCMake Version 2.0.0 [2014-08-17 rev 28432]
+# Don't edit, this file is generated by FPCMake Version 2.0.0 [2015-02-23 rev 29972]
 #
 #
 default: all
 default: all
-MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim i386-android i386-aros m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii powerpc-aix sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-netbsd x86_64-solaris x86_64-openbsd x86_64-darwin x86_64-win64 x86_64-embedded arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian arm-android powerpc64-linux powerpc64-darwin powerpc64-embedded powerpc64-aix avr-embedded armeb-linux armeb-embedded mips-linux mipsel-linux mipsel-embedded mipsel-android jvm-java jvm-android i8086-msdos
-BSDs = freebsd netbsd openbsd darwin
+MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim i386-android i386-aros m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii powerpc-aix sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-netbsd x86_64-solaris x86_64-openbsd x86_64-darwin x86_64-win64 x86_64-embedded x86_64-iphonesim x86_64-dragonfly arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian arm-android powerpc64-linux powerpc64-darwin powerpc64-embedded powerpc64-aix avr-embedded armeb-linux armeb-embedded mips-linux mipsel-linux mipsel-embedded mipsel-android jvm-java jvm-android i8086-msdos aarch64-darwin
+BSDs = freebsd netbsd openbsd darwin dragonfly
 UNIXs = linux $(BSDs) solaris qnx haiku aix
 UNIXs = linux $(BSDs) solaris qnx haiku aix
 LIMIT83fs = go32v2 os2 emx watcom msdos
 LIMIT83fs = go32v2 os2 emx watcom msdos
 OSNeedsComspecToRunBatch = go32v2 watcom
 OSNeedsComspecToRunBatch = go32v2 watcom
@@ -326,9 +326,9 @@ FPCFPMAKE=$(FPC)
 endif
 endif
 endif
 endif
 override PACKAGE_NAME=compiler
 override PACKAGE_NAME=compiler
-override PACKAGE_VERSION=2.7.1
+override PACKAGE_VERSION=3.1.1
 unexport FPC_VERSION FPC_COMPILERINFO
 unexport FPC_VERSION FPC_COMPILERINFO
-CYCLETARGETS=i386 powerpc sparc arm x86_64 powerpc64 m68k armeb mipsel mips avr jvm i8086
+CYCLETARGETS=i386 powerpc sparc arm x86_64 powerpc64 m68k armeb mipsel mips avr jvm i8086 aarch64
 ALLTARGETS=$(CYCLETARGETS)
 ALLTARGETS=$(CYCLETARGETS)
 ifdef ALPHA
 ifdef ALPHA
 PPC_TARGET=alpha
 PPC_TARGET=alpha
@@ -372,6 +372,9 @@ endif
 ifdef I8086
 ifdef I8086
 PPC_TARGET=i8086
 PPC_TARGET=i8086
 endif
 endif
+ifdef AARCH64
+PPC_TARGET=aarch64
+endif
 ifndef PPC_TARGET
 ifndef PPC_TARGET
 PPC_TARGET=$(CPU_TARGET)
 PPC_TARGET=$(CPU_TARGET)
 endif
 endif
@@ -469,6 +472,9 @@ endif
 ifeq ($(CPC_TARGET),i8086)
 ifeq ($(CPC_TARGET),i8086)
 CPUSUF=8086
 CPUSUF=8086
 endif
 endif
+ifeq ($(CPC_TARGET),aarch64)
+CPUSUF=a64
+endif
 NOCPUDEF=1
 NOCPUDEF=1
 MSGFILE=msg/error$(FPCLANG).msg
 MSGFILE=msg/error$(FPCLANG).msg
 SVNVERSION:=$(firstword $(wildcard $(addsuffix /svnversion$(SRCEXEEXT),$(SEARCHPATH))))
 SVNVERSION:=$(firstword $(wildcard $(addsuffix /svnversion$(SRCEXEEXT),$(SEARCHPATH))))
@@ -477,7 +483,7 @@ ifneq ($(REVINC),)
 override LOCALOPT+=-dREVINC
 override LOCALOPT+=-dREVINC
 ifeq ($(REVSTR),)
 ifeq ($(REVSTR),)
 ifneq ($(SVNVERSION),)
 ifneq ($(SVNVERSION),)
-REVSTR:=$(shell $(SVNVERSION) -c .)
+REVSTR:=$(subst r,,$(subst r1:,,r$(shell $(SVNVERSION) -c .)))
 export REVSTR
 export REVSTR
 else
 else
 ifeq ($(REVINC),force)
 ifeq ($(REVINC),force)
@@ -525,7 +531,7 @@ override LOCALOPT+=-Fux86
 endif
 endif
 OPTWPOCOLLECT=-OWdevirtcalls,optvmts -FW$(BASEDIR)/pp1.wpo
 OPTWPOCOLLECT=-OWdevirtcalls,optvmts -FW$(BASEDIR)/pp1.wpo
 OPTWPOPERFORM=-Owdevirtcalls,optvmts -Fw$(BASEDIR)/pp1.wpo
 OPTWPOPERFORM=-Owdevirtcalls,optvmts -Fw$(BASEDIR)/pp1.wpo
-ifneq ($(findstring $(OS_TARGET),darwin linux freebsd solaris),)
+ifneq ($(findstring $(OS_TARGET),darwin linux dragonfly freebsd solaris),)
 ifdef LINKSMART
 ifdef LINKSMART
 ifdef CREATESMART
 ifdef CREATESMART
 OPTWPOCOLLECT+=-OWsymbolliveness -Xs-
 OPTWPOCOLLECT+=-OWsymbolliveness -Xs-
@@ -545,6 +551,9 @@ endif
 ifeq ($(OS_TARGET),msdos)
 ifeq ($(OS_TARGET),msdos)
 NoNativeBinaries=1
 NoNativeBinaries=1
 endif
 endif
+ifeq ($(OS_TARGET),nds)
+NoNativeBinaries=1
+endif
 ifeq ($(FULL_TARGET),i386-linux)
 ifeq ($(FULL_TARGET),i386-linux)
 override TARGET_DIRS+=utils
 override TARGET_DIRS+=utils
 endif
 endif
@@ -704,6 +713,12 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 ifeq ($(FULL_TARGET),x86_64-embedded)
 override TARGET_DIRS+=utils
 override TARGET_DIRS+=utils
 endif
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+override TARGET_DIRS+=utils
+endif
+ifeq ($(FULL_TARGET),x86_64-dragonfly)
+override TARGET_DIRS+=utils
+endif
 ifeq ($(FULL_TARGET),arm-linux)
 ifeq ($(FULL_TARGET),arm-linux)
 override TARGET_DIRS+=utils
 override TARGET_DIRS+=utils
 endif
 endif
@@ -773,6 +788,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 ifeq ($(FULL_TARGET),i8086-msdos)
 override TARGET_DIRS+=utils
 override TARGET_DIRS+=utils
 endif
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+override TARGET_DIRS+=utils
+endif
 ifeq ($(FULL_TARGET),i386-linux)
 ifeq ($(FULL_TARGET),i386-linux)
 override TARGET_PROGRAMS+=pp
 override TARGET_PROGRAMS+=pp
 endif
 endif
@@ -932,6 +950,12 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 ifeq ($(FULL_TARGET),x86_64-embedded)
 override TARGET_PROGRAMS+=pp
 override TARGET_PROGRAMS+=pp
 endif
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+override TARGET_PROGRAMS+=pp
+endif
+ifeq ($(FULL_TARGET),x86_64-dragonfly)
+override TARGET_PROGRAMS+=pp
+endif
 ifeq ($(FULL_TARGET),arm-linux)
 ifeq ($(FULL_TARGET),arm-linux)
 override TARGET_PROGRAMS+=pp
 override TARGET_PROGRAMS+=pp
 endif
 endif
@@ -1001,6 +1025,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 ifeq ($(FULL_TARGET),i8086-msdos)
 override TARGET_PROGRAMS+=pp
 override TARGET_PROGRAMS+=pp
 endif
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+override TARGET_PROGRAMS+=pp
+endif
 override INSTALL_FPCPACKAGE=y
 override INSTALL_FPCPACKAGE=y
 ifeq ($(FULL_TARGET),i386-linux)
 ifeq ($(FULL_TARGET),i386-linux)
 override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
 override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
@@ -1161,6 +1188,12 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 ifeq ($(FULL_TARGET),x86_64-embedded)
 override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
 override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
 endif
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
+endif
+ifeq ($(FULL_TARGET),x86_64-dragonfly)
+override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
+endif
 ifeq ($(FULL_TARGET),arm-linux)
 ifeq ($(FULL_TARGET),arm-linux)
 override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
 override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
 endif
 endif
@@ -1230,6 +1263,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 ifeq ($(FULL_TARGET),i8086-msdos)
 override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
 override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
 endif
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
+endif
 ifeq ($(FULL_TARGET),i386-linux)
 ifeq ($(FULL_TARGET),i386-linux)
 override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
 override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
 endif
 endif
@@ -1389,6 +1425,12 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 ifeq ($(FULL_TARGET),x86_64-embedded)
 override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
 override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
 endif
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
+endif
+ifeq ($(FULL_TARGET),x86_64-dragonfly)
+override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
+endif
 ifeq ($(FULL_TARGET),arm-linux)
 ifeq ($(FULL_TARGET),arm-linux)
 override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
 override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
 endif
 endif
@@ -1458,6 +1500,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 ifeq ($(FULL_TARGET),i8086-msdos)
 override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
 override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
 endif
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
+endif
 ifeq ($(FULL_TARGET),i386-linux)
 ifeq ($(FULL_TARGET),i386-linux)
 override COMPILER_TARGETDIR+=.
 override COMPILER_TARGETDIR+=.
 endif
 endif
@@ -1617,6 +1662,12 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 ifeq ($(FULL_TARGET),x86_64-embedded)
 override COMPILER_TARGETDIR+=.
 override COMPILER_TARGETDIR+=.
 endif
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+override COMPILER_TARGETDIR+=.
+endif
+ifeq ($(FULL_TARGET),x86_64-dragonfly)
+override COMPILER_TARGETDIR+=.
+endif
 ifeq ($(FULL_TARGET),arm-linux)
 ifeq ($(FULL_TARGET),arm-linux)
 override COMPILER_TARGETDIR+=.
 override COMPILER_TARGETDIR+=.
 endif
 endif
@@ -1686,6 +1737,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 ifeq ($(FULL_TARGET),i8086-msdos)
 override COMPILER_TARGETDIR+=.
 override COMPILER_TARGETDIR+=.
 endif
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+override COMPILER_TARGETDIR+=.
+endif
 ifeq ($(FULL_TARGET),i386-linux)
 ifeq ($(FULL_TARGET),i386-linux)
 override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 endif
 endif
@@ -1845,6 +1899,12 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 ifeq ($(FULL_TARGET),x86_64-embedded)
 override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 endif
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
+endif
+ifeq ($(FULL_TARGET),x86_64-dragonfly)
+override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
+endif
 ifeq ($(FULL_TARGET),arm-linux)
 ifeq ($(FULL_TARGET),arm-linux)
 override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 endif
 endif
@@ -1914,6 +1974,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 ifeq ($(FULL_TARGET),i8086-msdos)
 override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 endif
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
+endif
 ifdef REQUIRE_UNITSDIR
 ifdef REQUIRE_UNITSDIR
 override UNITSDIR+=$(REQUIRE_UNITSDIR)
 override UNITSDIR+=$(REQUIRE_UNITSDIR)
 endif
 endif
@@ -2156,6 +2219,12 @@ EXEEXT=
 HASSHAREDLIB=1
 HASSHAREDLIB=1
 SHORTSUFFIX=lnx
 SHORTSUFFIX=lnx
 endif
 endif
+ifeq ($(OS_TARGET),dragonfly)
+BATCHEXT=.sh
+EXEEXT=
+HASSHAREDLIB=1
+SHORTSUFFIX=df
+endif
 ifeq ($(OS_TARGET),freebsd)
 ifeq ($(OS_TARGET),freebsd)
 BATCHEXT=.sh
 BATCHEXT=.sh
 EXEEXT=
 EXEEXT=
@@ -2717,6 +2786,12 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 ifeq ($(FULL_TARGET),x86_64-embedded)
 REQUIRE_PACKAGES_RTL=1
 REQUIRE_PACKAGES_RTL=1
 endif
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+REQUIRE_PACKAGES_RTL=1
+endif
+ifeq ($(FULL_TARGET),x86_64-dragonfly)
+REQUIRE_PACKAGES_RTL=1
+endif
 ifeq ($(FULL_TARGET),arm-linux)
 ifeq ($(FULL_TARGET),arm-linux)
 REQUIRE_PACKAGES_RTL=1
 REQUIRE_PACKAGES_RTL=1
 endif
 endif
@@ -2786,6 +2861,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 ifeq ($(FULL_TARGET),i8086-msdos)
 REQUIRE_PACKAGES_RTL=1
 REQUIRE_PACKAGES_RTL=1
 endif
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+REQUIRE_PACKAGES_RTL=1
+endif
 ifdef REQUIRE_PACKAGES_RTL
 ifdef REQUIRE_PACKAGES_RTL
 PACKAGEDIR_RTL:=$(firstword $(subst /Makefile.fpc,,$(strip $(wildcard $(addsuffix /rtl/Makefile.fpc,$(PACKAGESDIR))))))
 PACKAGEDIR_RTL:=$(firstword $(subst /Makefile.fpc,,$(strip $(wildcard $(addsuffix /rtl/Makefile.fpc,$(PACKAGESDIR))))))
 ifneq ($(PACKAGEDIR_RTL),)
 ifneq ($(PACKAGEDIR_RTL),)
@@ -2937,8 +3015,8 @@ endif
 ifdef CREATESHARED
 ifdef CREATESHARED
 override FPCOPT+=-Cg
 override FPCOPT+=-Cg
 endif
 endif
-ifneq ($(findstring $(OS_TARGET),freebsd openbsd netbsd linux solaris),)
-ifeq ($(CPU_TARGET),x86_64)
+ifneq ($(findstring $(OS_TARGET),dragonfly freebsd openbsd netbsd linux solaris),)
+ifneq ($(findstring $(CPU_TARGET),x86_64 mips mipsel),)
 override FPCOPT+=-Cg
 override FPCOPT+=-Cg
 endif
 endif
 endif
 endif
@@ -3577,6 +3655,12 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 ifeq ($(FULL_TARGET),x86_64-embedded)
 TARGET_DIRS_UTILS=1
 TARGET_DIRS_UTILS=1
 endif
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+TARGET_DIRS_UTILS=1
+endif
+ifeq ($(FULL_TARGET),x86_64-dragonfly)
+TARGET_DIRS_UTILS=1
+endif
 ifeq ($(FULL_TARGET),arm-linux)
 ifeq ($(FULL_TARGET),arm-linux)
 TARGET_DIRS_UTILS=1
 TARGET_DIRS_UTILS=1
 endif
 endif
@@ -3646,6 +3730,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 ifeq ($(FULL_TARGET),i8086-msdos)
 TARGET_DIRS_UTILS=1
 TARGET_DIRS_UTILS=1
 endif
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+TARGET_DIRS_UTILS=1
+endif
 ifdef TARGET_DIRS_UTILS
 ifdef TARGET_DIRS_UTILS
 utils_all:
 utils_all:
 	$(MAKE) -C utils all
 	$(MAKE) -C utils all
@@ -3774,7 +3861,7 @@ INSTALLEXEFILE=$(PPCROSSNAME)
 else
 else
 INSTALLEXEFILE=$(EXENAME)
 INSTALLEXEFILE=$(EXENAME)
 endif
 endif
-PPC_TARGETS=i386 m68k powerpc sparc arm armeb x86_64 powerpc64 alpha vis ia64 mips mipsel avr jvm i8086
+PPC_TARGETS=i386 m68k powerpc sparc arm armeb x86_64 powerpc64 alpha vis ia64 mips mipsel avr jvm i8086 aarch64
 INSTALL_TARGETS=$(addsuffix _exe_install,$(sort $(CYCLETARGETS) $(PPC_TARGETS)))
 INSTALL_TARGETS=$(addsuffix _exe_install,$(sort $(CYCLETARGETS) $(PPC_TARGETS)))
 .PHONY: $(PPC_TARGETS) $(INSTALL_TARGETS)
 .PHONY: $(PPC_TARGETS) $(INSTALL_TARGETS)
 $(PPC_TARGETS):
 $(PPC_TARGETS):
@@ -3809,11 +3896,11 @@ ppuclean:
 tempclean:
 tempclean:
 	-$(DEL) $(PPCROSSNAME) $(TEMPNAME) $(TEMPNAME1) $(TEMPNAME2) $(TEMPNAME3) $(MSG2INC) pp1.wpo pp2.wpo
 	-$(DEL) $(PPCROSSNAME) $(TEMPNAME) $(TEMPNAME1) $(TEMPNAME2) $(TEMPNAME3) $(MSG2INC) pp1.wpo pp2.wpo
 execlean :
 execlean :
-	-$(DEL) ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) $(EXENAME) $(TEMPWPONAME1) $(TEMPWPONAME2)
+	-$(DEL) ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) ppca64$(EXEEXT) $(EXENAME) $(TEMPWPONAME1) $(TEMPWPONAME2)
 $(addsuffix _clean,$(ALLTARGETS)):
 $(addsuffix _clean,$(ALLTARGETS)):
 	-$(DELTREE) $(addprefix $(subst _clean,,$@),/units)
 	-$(DELTREE) $(addprefix $(subst _clean,,$@),/units)
 	-$(DEL) $(addprefix $(subst _clean,,$@)/,*$(OEXT) *$(PPUEXT) *$(RSTEXT) *$(ASMEXT) *$(STATICLIBEXT) *$(SHAREDLIBEXT) *$(PPLEXT))
 	-$(DEL) $(addprefix $(subst _clean,,$@)/,*$(OEXT) *$(PPUEXT) *$(RSTEXT) *$(ASMEXT) *$(STATICLIBEXT) *$(SHAREDLIBEXT) *$(PPLEXT))
-	-$(DEL) $(addprefix $(subst _clean,,$@)/,ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) $(EXENAME))
+	-$(DEL) $(addprefix $(subst _clean,,$@)/,ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) ppca64$(EXEEXT) $(EXENAME))
 cycleclean: cleanall $(addsuffix _clean,$(CPC_TARGET))
 cycleclean: cleanall $(addsuffix _clean,$(CPC_TARGET))
 	-$(DEL) $(EXENAME)
 	-$(DEL) $(EXENAME)
 clean: tempclean execlean cleanall $(addsuffix _clean,$(CPC_TARGET)) $(addsuffix _clean,$(TARGET_DIRS))
 clean: tempclean execlean cleanall $(addsuffix _clean,$(CPC_TARGET)) $(addsuffix _clean,$(TARGET_DIRS))
@@ -3965,13 +4052,11 @@ cycle:
 	$(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=2
 	$(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=2
 ifndef CROSSINSTALL
 ifndef CROSSINSTALL
 	$(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' rtlclean rtl CYCLELEVEL=3
 	$(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' rtlclean rtl CYCLELEVEL=3
-ifneq ($(OS_TARGET),embedded)
-ifneq ($(OS_TARGET),gba)
+ifndef NoNativeBinaries
 	$(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' cycleclean compiler CYCLELEVEL=3
 	$(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' cycleclean compiler CYCLELEVEL=3
 endif
 endif
 endif
 endif
 endif
 endif
-endif
 else
 else
 cycle: override FPC=
 cycle: override FPC=
 cycle:
 cycle:

+ 17 - 13
compiler/Makefile.fpc

@@ -4,7 +4,7 @@
 
 
 [package]
 [package]
 name=compiler
 name=compiler
-version=2.7.1
+version=3.1.1
 
 
 [target]
 [target]
 programs=pp
 programs=pp
@@ -32,7 +32,7 @@ fpcdir=..
 unexport FPC_VERSION FPC_COMPILERINFO
 unexport FPC_VERSION FPC_COMPILERINFO
 
 
 # Which platforms are ready for inclusion in the cycle
 # Which platforms are ready for inclusion in the cycle
-CYCLETARGETS=i386 powerpc sparc arm x86_64 powerpc64 m68k armeb mipsel mips avr jvm i8086
+CYCLETARGETS=i386 powerpc sparc arm x86_64 powerpc64 m68k armeb mipsel mips avr jvm i8086 aarch64
 
 
 # All supported targets used for clean
 # All supported targets used for clean
 ALLTARGETS=$(CYCLETARGETS)
 ALLTARGETS=$(CYCLETARGETS)
@@ -80,6 +80,9 @@ endif
 ifdef I8086
 ifdef I8086
 PPC_TARGET=i8086
 PPC_TARGET=i8086
 endif
 endif
+ifdef AARCH64
+PPC_TARGET=aarch64
+endif
 
 
 # Default is to generate a compiler for the same
 # Default is to generate a compiler for the same
 # platform as CPU_TARGET (a native compiler)
 # platform as CPU_TARGET (a native compiler)
@@ -205,6 +208,9 @@ endif
 ifeq ($(CPC_TARGET),i8086)
 ifeq ($(CPC_TARGET),i8086)
 CPUSUF=8086
 CPUSUF=8086
 endif
 endif
+ifeq ($(CPC_TARGET),aarch64)
+CPUSUF=a64
+endif
 
 
 # Do not define the default -d$(CPU_TARGET) because that
 # Do not define the default -d$(CPU_TARGET) because that
 # will conflict with our -d$(CPC_TARGET)
 # will conflict with our -d$(CPC_TARGET)
@@ -225,7 +231,7 @@ override LOCALOPT+=-dREVINC
 # svnversion executable is available
 # svnversion executable is available
 ifeq ($(REVSTR),)
 ifeq ($(REVSTR),)
 ifneq ($(SVNVERSION),)
 ifneq ($(SVNVERSION),)
-REVSTR:=$(shell $(SVNVERSION) -c .)
+REVSTR:=$(subst r,,$(subst r1:,,r$(shell $(SVNVERSION) -c .)))
 export REVSTR
 export REVSTR
 else
 else
 ifeq ($(REVINC),force)
 ifeq ($(REVINC),force)
@@ -300,7 +306,7 @@ OPTWPOCOLLECT=-OWdevirtcalls,optvmts -FW$(BASEDIR)/pp1.wpo
 OPTWPOPERFORM=-Owdevirtcalls,optvmts -Fw$(BASEDIR)/pp1.wpo
 OPTWPOPERFORM=-Owdevirtcalls,optvmts -Fw$(BASEDIR)/pp1.wpo
 # symbol liveness WPO requires nm, smart linking and no stripping (the latter
 # symbol liveness WPO requires nm, smart linking and no stripping (the latter
 # is forced by the Makefile when necessary)
 # is forced by the Makefile when necessary)
-ifneq ($(findstring $(OS_TARGET),darwin linux freebsd solaris),)
+ifneq ($(findstring $(OS_TARGET),darwin linux dragonfly freebsd solaris),)
 ifdef LINKSMART
 ifdef LINKSMART
 ifdef CREATESMART
 ifdef CREATESMART
 OPTWPOCOLLECT+=-OWsymbolliveness -Xs-
 OPTWPOCOLLECT+=-OWsymbolliveness -Xs-
@@ -323,6 +329,9 @@ endif
 ifeq ($(OS_TARGET),msdos)
 ifeq ($(OS_TARGET),msdos)
 NoNativeBinaries=1
 NoNativeBinaries=1
 endif
 endif
+ifeq ($(OS_TARGET),nds)
+NoNativeBinaries=1
+endif
 
 
 [rules]
 [rules]
 #####################################################################
 #####################################################################
@@ -397,7 +406,7 @@ endif
 # CPU targets
 # CPU targets
 #####################################################################
 #####################################################################
 
 
-PPC_TARGETS=i386 m68k powerpc sparc arm armeb x86_64 powerpc64 alpha vis ia64 mips mipsel avr jvm i8086
+PPC_TARGETS=i386 m68k powerpc sparc arm armeb x86_64 powerpc64 alpha vis ia64 mips mipsel avr jvm i8086 aarch64
 INSTALL_TARGETS=$(addsuffix _exe_install,$(sort $(CYCLETARGETS) $(PPC_TARGETS)))
 INSTALL_TARGETS=$(addsuffix _exe_install,$(sort $(CYCLETARGETS) $(PPC_TARGETS)))
 
 
 .PHONY: $(PPC_TARGETS) $(INSTALL_TARGETS)
 .PHONY: $(PPC_TARGETS) $(INSTALL_TARGETS)
@@ -452,12 +461,12 @@ tempclean:
         -$(DEL) $(PPCROSSNAME) $(TEMPNAME) $(TEMPNAME1) $(TEMPNAME2) $(TEMPNAME3) $(MSG2INC) pp1.wpo pp2.wpo
         -$(DEL) $(PPCROSSNAME) $(TEMPNAME) $(TEMPNAME1) $(TEMPNAME2) $(TEMPNAME3) $(MSG2INC) pp1.wpo pp2.wpo
 
 
 execlean :
 execlean :
-        -$(DEL) ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) $(EXENAME) $(TEMPWPONAME1) $(TEMPWPONAME2)
+        -$(DEL) ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) ppca64$(EXEEXT) $(EXENAME) $(TEMPWPONAME1) $(TEMPWPONAME2)
 
 
 $(addsuffix _clean,$(ALLTARGETS)):
 $(addsuffix _clean,$(ALLTARGETS)):
         -$(DELTREE) $(addprefix $(subst _clean,,$@),/units)
         -$(DELTREE) $(addprefix $(subst _clean,,$@),/units)
         -$(DEL) $(addprefix $(subst _clean,,$@)/,*$(OEXT) *$(PPUEXT) *$(RSTEXT) *$(ASMEXT) *$(STATICLIBEXT) *$(SHAREDLIBEXT) *$(PPLEXT))
         -$(DEL) $(addprefix $(subst _clean,,$@)/,*$(OEXT) *$(PPUEXT) *$(RSTEXT) *$(ASMEXT) *$(STATICLIBEXT) *$(SHAREDLIBEXT) *$(PPLEXT))
-        -$(DEL) $(addprefix $(subst _clean,,$@)/,ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) $(EXENAME))
+        -$(DEL) $(addprefix $(subst _clean,,$@)/,ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) ppca64$(EXEEXT) $(EXENAME))
 
 
 cycleclean: cleanall $(addsuffix _clean,$(CPC_TARGET))
 cycleclean: cleanall $(addsuffix _clean,$(CPC_TARGET))
         -$(DEL) $(EXENAME)
         -$(DEL) $(EXENAME)
@@ -682,14 +691,10 @@ cycle:
 # ppc<ARCH> (target native)
 # ppc<ARCH> (target native)
 ifndef CROSSINSTALL
 ifndef CROSSINSTALL
         $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' rtlclean rtl CYCLELEVEL=3
         $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' rtlclean rtl CYCLELEVEL=3
-# building a native compiler for embedded targets is not possible
-ifneq ($(OS_TARGET),embedded)
-# building a native compiler for the arm-gba target is not possible
-ifneq ($(OS_TARGET),gba)
+ifndef NoNativeBinaries
         $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' cycleclean compiler CYCLELEVEL=3
         $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' cycleclean compiler CYCLELEVEL=3
 endif
 endif
 endif
 endif
-endif
 
 
 endif
 endif
 
 
@@ -715,7 +720,6 @@ cycle:
 # ppc<ARCH> (target native)
 # ppc<ARCH> (target native)
 ifndef CROSSINSTALL
 ifndef CROSSINSTALL
         $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' 'OPT=$(RTLOPT) $(CROSSOPT)' rtlclean rtl CYCLELEVEL=3
         $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' 'OPT=$(RTLOPT) $(CROSSOPT)' rtlclean rtl CYCLELEVEL=3
-# building a native compiler for JVM and embedded targets is not possible
 ifndef NoNativeBinaries
 ifndef NoNativeBinaries
         $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' 'OPT=$(strip $(LOCALOPT) $(CROSSOPT))' cycleclean compiler CYCLELEVEL=3
         $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' 'OPT=$(strip $(LOCALOPT) $(CROSSOPT))' cycleclean compiler CYCLELEVEL=3
 endif
 endif

+ 93 - 35
compiler/aarch64/a64att.inc

@@ -1,14 +1,37 @@
 { don't edit, this file is generated from armins.dat }
 { don't edit, this file is generated from armins.dat }
 (
 (
+'none',
 'b',
 'b',
-'cb',
-'tb',
+'cbz',
+'cbnz',
+'tbz',
+'tbnz',
 'bl',
 'bl',
 'blr',
 'blr',
 'br',
 'br',
 'ret',
 'ret',
+'brk',
+'hlt',
+'hvc',
+'smc',
+'svc',
+'eret',
+'dcps1',
+'dcps2',
+'dcps3',
+'drps',
+'dc',
+'at',
+'tlbi',
+'hint',
+'clrex',
+'dsb',
+'dmb',
+'isb',
 'ldr',
 'ldr',
 'str',
 'str',
+'ldur',
+'stur',
 'ldp',
 'ldp',
 'stp',
 'stp',
 'ldnp',
 'ldnp',
@@ -16,40 +39,66 @@
 'ldtr',
 'ldtr',
 'sttr',
 'sttr',
 'ldxr',
 'ldxr',
+'ldxp',
 'stxr',
 'stxr',
+'stxp',
 'ldar',
 'ldar',
 'stlr',
 'stlr',
 'ldaxr',
 'ldaxr',
 'stlxr',
 'stlxr',
+'stlxp',
+'ld1',
+'ld2',
+'ld3',
+'ld4',
+'st1',
+'st2',
+'st3',
+'st4',
+'ld1r',
+'ld2r',
+'ld3r',
+'ld4r',
 'prfm',
 'prfm',
+'prfum',
 'add',
 'add',
-'adc',
 'sub',
 'sub',
-'sbc',
 'cmp',
 'cmp',
 'cmn',
 'cmn',
-'mov',
 'and',
 'and',
-'bic',
 'eor',
 'eor',
-'eon',
 'orr',
 'orr',
 'orn',
 'orn',
 'tst',
 'tst',
-'mvn',
+'movz',
+'movn',
 'movk',
 'movk',
+'mrs',
+'msr',
 'adrp',
 'adrp',
 'adr',
 'adr',
 'bfm',
 'bfm',
 'sbfm',
 'sbfm',
 'ubfm',
 'ubfm',
 'extr',
 'extr',
-'sxt',
-'uxt',
+'adc',
+'sbc',
+'bic',
+'eon',
 'asrv',
 'asrv',
-'llslv',
+'lslv',
 'lsrv',
 'lsrv',
 'rorv',
 'rorv',
+'madd',
+'msub',
+'smaddl',
+'smsubl',
+'smulh',
+'umaddl',
+'umsubl',
+'umulh',
+'sdiv',
+'udiv',
 'cls',
 'cls',
 'clz',
 'clz',
 'rbit',
 'rbit',
@@ -62,33 +111,39 @@
 'csneg',
 'csneg',
 'ccmn',
 'ccmn',
 'ccmp',
 'ccmp',
-'madd',
-'msub',
-'smaddl',
-'smsubl',
-'smulh',
-'umaddl',
-'umsubl',
-'umulh',
-'sdiv',
-'udiv',
-'neg',
+'nop',
+'yield',
+'wfe',
+'wfi',
+'sev',
+'sevl',
+'mov',
+'bfi',
+'bfxil',
+'sbfiz',
+'sbfx',
+'ubfiz',
+'ubfx',
 'asr',
 'asr',
 'lsl',
 'lsl',
 'lsr',
 'lsr',
 'ror',
 'ror',
-'cset',
-'csetm',
-'cinc',
-'cinv',
-'cneg',
+'sxt',
+'uxt',
+'neg',
 'ngc',
 'ngc',
+'mvn',
 'mneg',
 'mneg',
 'mul',
 'mul',
 'smnegl',
 'smnegl',
 'smull',
 'smull',
 'umnegl',
 'umnegl',
 'umull',
 'umull',
+'cset',
+'csetm',
+'cinc',
+'cinv',
+'cneg',
 'fmov',
 'fmov',
 'fcvt',
 'fcvt',
 'fcvtas',
 'fcvtas',
@@ -103,13 +158,13 @@
 'fcvtzu',
 'fcvtzu',
 'scvtf',
 'scvtf',
 'ucvtf',
 'ucvtf',
-'fprinta',
-'fprinti',
-'fprintm',
-'fprintn',
-'fprintp',
-'fprintx',
-'fprintz',
+'frinta',
+'frinti',
+'frintm',
+'frintn',
+'frintp',
+'frintx',
+'frintz',
 'fabs',
 'fabs',
 'fneg',
 'fneg',
 'fsqrt',
 'fsqrt',
@@ -129,5 +184,8 @@
 'fcmpe',
 'fcmpe',
 'fccmp',
 'fccmp',
 'fcmmpe',
 'fcmmpe',
-'fcsel'
+'fcsel',
+'umov',
+'ins',
+'movi'
 );
 );

+ 58 - 0
compiler/aarch64/a64atts.inc

@@ -129,5 +129,63 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufNONE
 attsufNONE
 );
 );

+ 159 - 43
compiler/aarch64/a64ins.dat

@@ -1,8 +1,15 @@
+; invalid
+[NONE]
+
 [B]
 [B]
 
 
-[CB]
+[CBZ]
+
+[CBNZ]
 
 
-[TB]
+[TBZ]
+
+[TBNZ]
 
 
 [BL]
 [BL]
 
 
@@ -12,10 +19,50 @@
 
 
 [RET]
 [RET]
 
 
+[BRK]
+
+[HLT]
+
+[HVC]
+
+[SMC]
+
+[SVC]
+
+[ERET]
+
+[DCPS1]
+
+[DCPS2]
+
+[DCPS3]
+
+[DRPS]
+
+[DC]
+
+[AT]
+
+[TLBI]
+
+[HINT]
+
+[CLREX]
+
+[DSB]
+
+[DMB]
+
+[ISB]
+
 [LDR]
 [LDR]
 
 
 [STR]
 [STR]
 
 
+[LDUR]
+
+[STUR]
+
 [LDP]
 [LDP]
 
 
 [STP]
 [STP]
@@ -30,8 +77,12 @@
 
 
 [LDXR]
 [LDXR]
 
 
+[LDXP]
+
 [STXR]
 [STXR]
 
 
+[STXP]
+
 [LDAR]
 [LDAR]
 
 
 [STLR]
 [STLR]
@@ -40,40 +91,64 @@
 
 
 [STLXR]
 [STLXR]
 
 
+[STLXP]
+
+[LD1]
+
+[LD2]
+
+[LD3]
+
+[LD4]
+
+[ST1]
+
+[ST2]
+
+[ST3]
+
+[ST4]
+
+[LD1R]
+
+[LD2R]
+
+[LD3R]
+
+[LD4R]
+
 [PRFM]
 [PRFM]
 
 
-[ADD]
+[PRFUM]
 
 
-[ADC]
+[ADD]
 
 
 [SUB]
 [SUB]
 
 
-[SBC]
-
 [CMP]
 [CMP]
 
 
 [CMN]
 [CMN]
 
 
-[MOV]
-
 [AND]
 [AND]
 
 
-[BIC]
-
 [EOR]
 [EOR]
 
 
-[EON]
-
 [ORR]
 [ORR]
 
 
 [ORN]
 [ORN]
 
 
 [TST]
 [TST]
 
 
-[MVN]
+[MOVZ]
+
+[MOVN]
 
 
 [MOVK]
 [MOVK]
 
 
+[MRS]
+
+[MSR]
+
 [ADRP]
 [ADRP]
 
 
 [ADR]
 [ADR]
@@ -86,18 +161,42 @@
 
 
 [EXTR]
 [EXTR]
 
 
-[SXT]
+[ADC]
 
 
-[UXT]
+[SBC]
+
+[BIC]
+
+[EON]
 
 
 [ASRV]
 [ASRV]
 
 
-[LLSLV]
+[LSLV]
 
 
 [LSRV]
 [LSRV]
 
 
 [RORV]
 [RORV]
 
 
+[MADD]
+
+[MSUB]
+
+[SMADDL]
+
+[SMSUBL]
+
+[SMULH]
+
+[UMADDL]
+
+[UMSUBL]
+
+[UMULH]
+
+[SDIV]
+
+[UDIV]
+
 [CLS]
 [CLS]
 
 
 [CLZ]
 [CLZ]
@@ -122,29 +221,33 @@
 
 
 [CCMP]
 [CCMP]
 
 
-[MADD]
+; Aliases
+; they are not generated by the compiler, they are only used for inline assembler
+[NOP]
 
 
-[MSUB]
+[YIELD]
 
 
-[SMADDL]
+[WFE]
 
 
-[SMSUBL]
+[WFI]
 
 
-[SMULH]
+[SEV]
 
 
-[UMADDL]
+[SEVL]
 
 
-[UMSUBL]
+[MOV]
 
 
-[UMULH]
+[BFI]
 
 
-[SDIV]
+[BFXIL]
 
 
-[UDIV]
+[SBFIZ]
 
 
-; Aliases
-; they are not generated by the compiler, they are only used for inline assembler
-[NEG]
+[SBFX]
+
+[UBFIZ]
+
+[UBFX]
 
 
 [ASR]
 [ASR]
 
 
@@ -154,18 +257,16 @@
 
 
 [ROR]
 [ROR]
 
 
-[CSET]
-
-[CSETM]
-
-[CINC]
+[SXT]
 
 
-[CINV]
+[UXT]
 
 
-[CNEG]
+[NEG]
 
 
 [NGC]
 [NGC]
 
 
+[MVN]
+
 [MNEG]
 [MNEG]
 
 
 [MUL]
 [MUL]
@@ -178,6 +279,16 @@
 
 
 [UMULL]
 [UMULL]
 
 
+[CSET]
+
+[CSETM]
+
+[CINC]
+
+[CINV]
+
+[CNEG]
+
 [FMOV]
 [FMOV]
 
 
 [FCVT]
 [FCVT]
@@ -206,19 +317,19 @@
 
 
 [UCVTF]
 [UCVTF]
 
 
-[FPRINTA]
+[FRINTA]
 
 
-[FPRINTI]
+[FRINTI]
 
 
-[FPRINTM]
+[FRINTM]
 
 
-[FPRINTN]
+[FRINTN]
 
 
-[FPRINTP]
+[FRINTP]
 
 
-[FPRINTX]
+[FRINTX]
 
 
-[FPRINTZ]
+[FRINTZ]
 
 
 [FABS]
 [FABS]
 
 
@@ -260,3 +371,8 @@
 
 
 [FCSEL]
 [FCSEL]
 
 
+[UMOV]
+
+[INS]
+
+[MOVI]

+ 93 - 35
compiler/aarch64/a64op.inc

@@ -1,14 +1,37 @@
 { don't edit, this file is generated from armins.dat }
 { don't edit, this file is generated from armins.dat }
 (
 (
+A_NONE,
 A_B,
 A_B,
-A_CB,
-A_TB,
+A_CBZ,
+A_CBNZ,
+A_TBZ,
+A_TBNZ,
 A_BL,
 A_BL,
 A_BLR,
 A_BLR,
 A_BR,
 A_BR,
 A_RET,
 A_RET,
+A_BRK,
+A_HLT,
+A_HVC,
+A_SMC,
+A_SVC,
+A_ERET,
+A_DCPS1,
+A_DCPS2,
+A_DCPS3,
+A_DRPS,
+A_DC,
+A_AT,
+A_TLBI,
+A_HINT,
+A_CLREX,
+A_DSB,
+A_DMB,
+A_ISB,
 A_LDR,
 A_LDR,
 A_STR,
 A_STR,
+A_LDUR,
+A_STUR,
 A_LDP,
 A_LDP,
 A_STP,
 A_STP,
 A_LDNP,
 A_LDNP,
@@ -16,40 +39,66 @@ A_STNP,
 A_LDTR,
 A_LDTR,
 A_STTR,
 A_STTR,
 A_LDXR,
 A_LDXR,
+A_LDXP,
 A_STXR,
 A_STXR,
+A_STXP,
 A_LDAR,
 A_LDAR,
 A_STLR,
 A_STLR,
 A_LDAXR,
 A_LDAXR,
 A_STLXR,
 A_STLXR,
+A_STLXP,
+A_LD1,
+A_LD2,
+A_LD3,
+A_LD4,
+A_ST1,
+A_ST2,
+A_ST3,
+A_ST4,
+A_LD1R,
+A_LD2R,
+A_LD3R,
+A_LD4R,
 A_PRFM,
 A_PRFM,
+A_PRFUM,
 A_ADD,
 A_ADD,
-A_ADC,
 A_SUB,
 A_SUB,
-A_SBC,
 A_CMP,
 A_CMP,
 A_CMN,
 A_CMN,
-A_MOV,
 A_AND,
 A_AND,
-A_BIC,
 A_EOR,
 A_EOR,
-A_EON,
 A_ORR,
 A_ORR,
 A_ORN,
 A_ORN,
 A_TST,
 A_TST,
-A_MVN,
+A_MOVZ,
+A_MOVN,
 A_MOVK,
 A_MOVK,
+A_MRS,
+A_MSR,
 A_ADRP,
 A_ADRP,
 A_ADR,
 A_ADR,
 A_BFM,
 A_BFM,
 A_SBFM,
 A_SBFM,
 A_UBFM,
 A_UBFM,
 A_EXTR,
 A_EXTR,
-A_SXT,
-A_UXT,
+A_ADC,
+A_SBC,
+A_BIC,
+A_EON,
 A_ASRV,
 A_ASRV,
-A_LLSLV,
+A_LSLV,
 A_LSRV,
 A_LSRV,
 A_RORV,
 A_RORV,
+A_MADD,
+A_MSUB,
+A_SMADDL,
+A_SMSUBL,
+A_SMULH,
+A_UMADDL,
+A_UMSUBL,
+A_UMULH,
+A_SDIV,
+A_UDIV,
 A_CLS,
 A_CLS,
 A_CLZ,
 A_CLZ,
 A_RBIT,
 A_RBIT,
@@ -62,33 +111,39 @@ A_CSINV,
 A_CSNEG,
 A_CSNEG,
 A_CCMN,
 A_CCMN,
 A_CCMP,
 A_CCMP,
-A_MADD,
-A_MSUB,
-A_SMADDL,
-A_SMSUBL,
-A_SMULH,
-A_UMADDL,
-A_UMSUBL,
-A_UMULH,
-A_SDIV,
-A_UDIV,
-A_NEG,
+A_NOP,
+A_YIELD,
+A_WFE,
+A_WFI,
+A_SEV,
+A_SEVL,
+A_MOV,
+A_BFI,
+A_BFXIL,
+A_SBFIZ,
+A_SBFX,
+A_UBFIZ,
+A_UBFX,
 A_ASR,
 A_ASR,
 A_LSL,
 A_LSL,
 A_LSR,
 A_LSR,
 A_ROR,
 A_ROR,
-A_CSET,
-A_CSETM,
-A_CINC,
-A_CINV,
-A_CNEG,
+A_SXT,
+A_UXT,
+A_NEG,
 A_NGC,
 A_NGC,
+A_MVN,
 A_MNEG,
 A_MNEG,
 A_MUL,
 A_MUL,
 A_SMNEGL,
 A_SMNEGL,
 A_SMULL,
 A_SMULL,
 A_UMNEGL,
 A_UMNEGL,
 A_UMULL,
 A_UMULL,
+A_CSET,
+A_CSETM,
+A_CINC,
+A_CINV,
+A_CNEG,
 A_FMOV,
 A_FMOV,
 A_FCVT,
 A_FCVT,
 A_FCVTAS,
 A_FCVTAS,
@@ -103,13 +158,13 @@ A_FCVTZS,
 A_FCVTZU,
 A_FCVTZU,
 A_SCVTF,
 A_SCVTF,
 A_UCVTF,
 A_UCVTF,
-A_FPRINTA,
-A_FPRINTI,
-A_FPRINTM,
-A_FPRINTN,
-A_FPRINTP,
-A_FPRINTX,
-A_FPRINTZ,
+A_FRINTA,
+A_FRINTI,
+A_FRINTM,
+A_FRINTN,
+A_FRINTP,
+A_FRINTX,
+A_FRINTZ,
 A_FABS,
 A_FABS,
 A_FNEG,
 A_FNEG,
 A_FSQRT,
 A_FSQRT,
@@ -129,5 +184,8 @@ A_FCMP,
 A_FCMPE,
 A_FCMPE,
 A_FCCMP,
 A_FCCMP,
 A_FCMMPE,
 A_FCMMPE,
-A_FCSEL
+A_FCSEL,
+A_UMOV,
+A_INS,
+A_MOVI
 );
 );

+ 168 - 161
compiler/aarch64/a64reg.dat

@@ -70,169 +70,176 @@ W30,$01,$04,$1E,w30,30,30
 X30,$01,$05,$1E,x30,30,30
 X30,$01,$05,$1E,x30,30,30
 WZR,$01,$04,$1F,wzr,31,31
 WZR,$01,$04,$1F,wzr,31,31
 XZR,$01,$05,$1F,xzr,31,31
 XZR,$01,$05,$1F,xzr,31,31
+; sp and zr share the same register number, but we still have to be able to
+; differentiate them because some instructions can be encoded with both ->
+; use a different superregister after all
+WSP,$01,$04,$20,wsp,31,31
+SP,$01,$05,$20,sp,31,31
 
 
 
 
 ; vfp registers
 ; vfp registers
-B0,$04,$01,$00,b0,0,0
-H0,$04,$03,$00,h0,0,0
-S0,$04,$09,$00,s0,0,0
-D0,$04,$0a,$00,d0,0,0
-Q0,$04,$05,$00,q0,0,0
-B1,$04,$01,$01,b1,1,1
-H1,$04,$03,$01,h1,1,1
-S1,$04,$09,$01,s1,1,1
-D1,$04,$0a,$01,d1,1,1
-Q1,$04,$05,$01,q1,1,1
-B2,$04,$01,$02,b2,2,2
-H2,$04,$03,$02,h2,2,2
-S2,$04,$09,$02,s2,2,2
-D2,$04,$0a,$02,d2,2,2
-Q2,$04,$05,$02,q2,2,2
-B3,$04,$01,$03,b3,3,3
-H3,$04,$03,$03,h3,3,3
-S3,$04,$09,$03,s3,3,3
-D3,$04,$0a,$03,d3,3,3
-Q3,$04,$05,$03,q3,3,3
-B4,$04,$01,$04,b4,4,4
-H4,$04,$03,$04,h4,4,4
-S4,$04,$09,$04,s4,4,4
-D4,$04,$0a,$04,d4,4,4
-Q4,$04,$05,$04,q4,4,4
-B5,$04,$01,$05,b5,5,5
-H5,$04,$03,$05,h5,5,5
-S5,$04,$09,$05,s5,5,5
-D5,$04,$0a,$05,d5,5,5
-Q5,$04,$05,$05,q5,5,5
-B6,$04,$01,$06,b6,6,6
-H6,$04,$03,$06,h6,6,6
-S6,$04,$09,$06,s6,6,6
-D6,$04,$0a,$06,d6,6,6
-Q6,$04,$05,$06,q6,6,6
-B7,$04,$01,$07,b7,7,7
-H7,$04,$03,$07,h7,7,7
-S7,$04,$09,$07,s7,7,7
-D7,$04,$0a,$07,d7,7,7
-Q7,$04,$05,$07,q7,7,7
-B8,$04,$01,$08,b8,8,8
-H8,$04,$03,$08,h8,8,8
-S8,$04,$09,$08,s8,8,8
-D8,$04,$0a,$08,d8,8,8
-Q8,$04,$05,$08,q8,8,8
-B9,$04,$01,$09,b9,9,9
-H9,$04,$03,$09,h9,9,9
-S9,$04,$09,$09,s9,9,9
-D9,$04,$0a,$09,d9,9,9
-Q9,$04,$05,$09,q9,9,9
-B10,$04,$01,$0A,b10,10,10
-H10,$04,$03,$0A,h10,10,10
-S10,$04,$09,$0A,s10,10,10
-D10,$04,$0a,$0A,d10,10,10
-Q10,$04,$05,$0A,q10,10,10
-B11,$04,$01,$0B,b11,11,11
-H11,$04,$03,$0B,h11,11,11
-S11,$04,$09,$0B,s11,11,11
-D11,$04,$0a,$0B,d11,11,11
-Q11,$04,$05,$0B,q11,11,11
-B12,$04,$01,$0C,b12,12,12
-H12,$04,$03,$0C,h12,12,12
-S12,$04,$09,$0C,s12,12,12
-D12,$04,$0a,$0C,d12,12,12
-Q12,$04,$05,$0C,q12,12,12
-B13,$04,$01,$0D,b13,13,13
-H13,$04,$03,$0D,h13,13,13
-S13,$04,$09,$0D,s13,13,13
-D13,$04,$0a,$0D,d13,13,13
-Q13,$04,$05,$0D,q13,13,13
-B14,$04,$01,$0E,b14,14,14
-H14,$04,$03,$0E,h14,14,14
-S14,$04,$09,$0E,s14,14,14
-D14,$04,$0a,$0E,d14,14,14
-Q14,$04,$05,$0E,q14,14,14
-B15,$04,$01,$0F,b15,15,15
-H15,$04,$03,$0F,h15,15,15
-S15,$04,$09,$0F,s15,15,15
-D15,$04,$0a,$0F,d15,15,15
-Q15,$04,$05,$0F,q15,15,15
-B16,$04,$01,$10,b16,16,16
-H16,$04,$03,$10,h16,16,16
-S16,$04,$09,$10,s16,16,16
-D16,$04,$0a,$10,d16,16,16
-Q16,$04,$05,$10,q16,16,16
-B17,$04,$01,$11,b17,17,17
-H17,$04,$03,$11,h17,17,17
-S17,$04,$09,$11,s17,17,17
-D17,$04,$0a,$11,d17,17,17
-Q17,$04,$05,$11,q17,17,17
-B18,$04,$01,$12,b18,18,18
-H18,$04,$03,$12,h18,18,18
-S18,$04,$09,$12,s18,18,18
-D18,$04,$0a,$12,d18,18,18
-Q18,$04,$05,$12,q18,18,18
-B19,$04,$01,$13,b19,19,19
-H19,$04,$03,$13,h19,19,19
-S19,$04,$09,$13,s19,19,19
-D19,$04,$0a,$13,d19,19,19
-Q19,$04,$05,$13,q19,19,19
-B20,$04,$01,$14,b20,20,20
-H20,$04,$03,$14,h20,20,20
-S20,$04,$09,$14,s20,20,20
-D20,$04,$0a,$14,d20,20,20
-Q20,$04,$05,$14,q20,20,20
-B21,$04,$01,$15,b21,21,21
-H21,$04,$03,$15,h21,21,21
-S21,$04,$09,$15,s21,21,21
-D21,$04,$0a,$15,d21,21,21
-Q21,$04,$05,$15,q21,21,21
-B22,$04,$01,$16,b22,22,22
-H22,$04,$03,$16,h22,22,22
-S22,$04,$09,$16,s22,22,22
-D22,$04,$0a,$16,d22,22,22
-Q22,$04,$05,$16,q22,22,22
-B23,$04,$01,$17,b23,23,23
-H23,$04,$03,$17,h23,23,23
-S23,$04,$09,$17,s23,23,23
-D23,$04,$0a,$17,d23,23,23
-Q23,$04,$05,$17,q23,23,23
-B24,$04,$01,$18,b24,24,24
-H24,$04,$03,$18,h24,24,24
-S24,$04,$09,$18,s24,24,24
-D24,$04,$0a,$18,d24,24,24
-Q24,$04,$05,$18,q24,24,24
-B25,$04,$01,$19,b25,25,25
-H25,$04,$03,$19,h25,25,25
-S25,$04,$09,$19,s25,25,25
-D25,$04,$0a,$19,d25,25,25
-Q25,$04,$05,$19,q25,25,25
-B26,$04,$01,$1A,b26,26,26
-H26,$04,$03,$1A,h26,26,26
-S26,$04,$09,$1A,s26,26,26
-D26,$04,$0a,$1A,d26,26,26
-Q26,$04,$05,$1A,q26,26,26
-B27,$04,$01,$1B,b27,27,27
-H27,$04,$03,$1B,h27,27,27
-S27,$04,$09,$1B,s27,27,27
-D27,$04,$0a,$1B,d27,27,27
-Q27,$04,$05,$1B,q27,27,27
-B28,$04,$01,$1C,b28,28,28
-H28,$04,$03,$1C,h28,28,28
-S28,$04,$09,$1C,s28,28,28
-D28,$04,$0a,$1C,d28,28,28
-Q28,$04,$05,$1C,q28,28,28
-B29,$04,$01,$1D,b29,29,29
-H29,$04,$03,$1D,h29,29,29
-S29,$04,$09,$1D,s29,29,29
-D29,$04,$0a,$1D,d29,29,29
-Q29,$04,$05,$1D,q29,29,29
-B30,$04,$01,$1E,b30,30,30
-H30,$04,$03,$1E,h30,30,30
-S30,$04,$09,$1E,s30,30,30
-D30,$04,$0a,$1E,d30,30,30
-Q30,$04,$05,$1E,q30,30,30
-B31,$04,$01,$1F,b31,31,31
-H31,$04,$03,$1F,h31,31,31
-S31,$04,$09,$1F,s31,31,31
-D31,$04,$0a,$1F,d31,31,31
-Q31,$04,$05,$1F,q31,31,31
+B0,$04,$01,$00,b0,64,64
+H0,$04,$03,$00,h0,64,64
+S0,$04,$09,$00,s0,64,64
+D0,$04,$0a,$00,d0,64,64
+Q0,$04,$05,$00,q0,64,64
+B1,$04,$01,$01,b1,65,65
+H1,$04,$03,$01,h1,65,65
+S1,$04,$09,$01,s1,65,65
+D1,$04,$0a,$01,d1,65,65
+Q1,$04,$05,$01,q1,65,65
+B2,$04,$01,$02,b2,66,66
+H2,$04,$03,$02,h2,66,66
+S2,$04,$09,$02,s2,66,66
+D2,$04,$0a,$02,d2,66,66
+Q2,$04,$05,$02,q2,66,66
+B3,$04,$01,$03,b3,67,67
+H3,$04,$03,$03,h3,67,67
+S3,$04,$09,$03,s3,67,67
+D3,$04,$0a,$03,d3,67,67
+Q3,$04,$05,$03,q3,67,67
+B4,$04,$01,$04,b4,68,68
+H4,$04,$03,$04,h4,68,68
+S4,$04,$09,$04,s4,68,68
+D4,$04,$0a,$04,d4,68,68
+Q4,$04,$05,$04,q4,68,68
+B5,$04,$01,$05,b5,69,69
+H5,$04,$03,$05,h5,69,69
+S5,$04,$09,$05,s5,69,69
+D5,$04,$0a,$05,d5,69,69
+Q5,$04,$05,$05,q5,69,69
+B6,$04,$01,$06,b6,70,70
+H6,$04,$03,$06,h6,70,70
+S6,$04,$09,$06,s6,70,70
+D6,$04,$0a,$06,d6,70,70
+Q6,$04,$05,$06,q6,70,70
+B7,$04,$01,$07,b7,71,71
+H7,$04,$03,$07,h7,71,71
+S7,$04,$09,$07,s7,71,71
+D7,$04,$0a,$07,d7,71,71
+Q7,$04,$05,$07,q7,71,71
+B8,$04,$01,$08,b8,72,72
+H8,$04,$03,$08,h8,72,72
+S8,$04,$09,$08,s8,72,72
+D8,$04,$0a,$08,d8,72,72
+Q8,$04,$05,$08,q8,72,72
+B9,$04,$01,$09,b9,73,73
+H9,$04,$03,$09,h9,73,73
+S9,$04,$09,$09,s9,73,73
+D9,$04,$0a,$09,d9,73,73
+Q9,$04,$05,$09,q9,73,73
+B10,$04,$01,$0A,b10,74,74
+H10,$04,$03,$0A,h10,74,74
+S10,$04,$09,$0A,s10,74,74
+D10,$04,$0a,$0A,d10,74,74
+Q10,$04,$05,$0A,q10,74,74
+B11,$04,$01,$0B,b11,75,75
+H11,$04,$03,$0B,h11,75,75
+S11,$04,$09,$0B,s11,75,75
+D11,$04,$0a,$0B,d11,75,75
+Q11,$04,$05,$0B,q11,75,75
+B12,$04,$01,$0C,b12,76,76
+H12,$04,$03,$0C,h12,76,76
+S12,$04,$09,$0C,s12,76,76
+D12,$04,$0a,$0C,d12,76,76
+Q12,$04,$05,$0C,q12,76,76
+B13,$04,$01,$0D,b13,77,77
+H13,$04,$03,$0D,h13,77,77
+S13,$04,$09,$0D,s13,77,77
+D13,$04,$0a,$0D,d13,77,77
+Q13,$04,$05,$0D,q13,77,77
+B14,$04,$01,$0E,b14,78,78
+H14,$04,$03,$0E,h14,78,78
+S14,$04,$09,$0E,s14,78,78
+D14,$04,$0a,$0E,d14,78,78
+Q14,$04,$05,$0E,q14,78,78
+B15,$04,$01,$0F,b15,79,79
+H15,$04,$03,$0F,h15,79,79
+S15,$04,$09,$0F,s15,79,79
+D15,$04,$0a,$0F,d15,79,79
+Q15,$04,$05,$0F,q15,79,79
+B16,$04,$01,$10,b16,80,80
+H16,$04,$03,$10,h16,80,80
+S16,$04,$09,$10,s16,80,80
+D16,$04,$0a,$10,d16,80,80
+Q16,$04,$05,$10,q16,80,80
+B17,$04,$01,$11,b17,81,81
+H17,$04,$03,$11,h17,81,81
+S17,$04,$09,$11,s17,81,81
+D17,$04,$0a,$11,d17,81,81
+Q17,$04,$05,$11,q17,81,81
+B18,$04,$01,$12,b18,82,82
+H18,$04,$03,$12,h18,82,82
+S18,$04,$09,$12,s18,82,82
+D18,$04,$0a,$12,d18,82,82
+Q18,$04,$05,$12,q18,82,82
+B19,$04,$01,$13,b19,83,83
+H19,$04,$03,$13,h19,83,83
+S19,$04,$09,$13,s19,83,83
+D19,$04,$0a,$13,d19,83,83
+Q19,$04,$05,$13,q19,83,83
+B20,$04,$01,$14,b20,84,84
+H20,$04,$03,$14,h20,84,84
+S20,$04,$09,$14,s20,84,84
+D20,$04,$0a,$14,d20,84,84
+Q20,$04,$05,$14,q20,84,84
+B21,$04,$01,$15,b21,85,85
+H21,$04,$03,$15,h21,85,85
+S21,$04,$09,$15,s21,85,85
+D21,$04,$0a,$15,d21,85,85
+Q21,$04,$05,$15,q21,85,85
+B22,$04,$01,$16,b22,86,86
+H22,$04,$03,$16,h22,86,86
+S22,$04,$09,$16,s22,86,86
+D22,$04,$0a,$16,d22,86,86
+Q22,$04,$05,$16,q22,86,86
+B23,$04,$01,$17,b23,87,87
+H23,$04,$03,$17,h23,87,87
+S23,$04,$09,$17,s23,87,87
+D23,$04,$0a,$17,d23,87,87
+Q23,$04,$05,$17,q23,87,87
+B24,$04,$01,$18,b24,88,88
+H24,$04,$03,$18,h24,88,88
+S24,$04,$09,$18,s24,88,88
+D24,$04,$0a,$18,d24,88,88
+Q24,$04,$05,$18,q24,88,88
+B25,$04,$01,$19,b25,89,89
+H25,$04,$03,$19,h25,89,89
+S25,$04,$09,$19,s25,89,89
+D25,$04,$0a,$19,d25,89,89
+Q25,$04,$05,$19,q25,89,89
+B26,$04,$01,$1A,b26,90,90
+H26,$04,$03,$1A,h26,90,90
+S26,$04,$09,$1A,s26,90,90
+D26,$04,$0a,$1A,d26,90,90
+Q26,$04,$05,$1A,q26,90,90
+B27,$04,$01,$1B,b27,91,91
+H27,$04,$03,$1B,h27,91,91
+S27,$04,$09,$1B,s27,91,91
+D27,$04,$0a,$1B,d27,91,91
+Q27,$04,$05,$1B,q27,91,91
+B28,$04,$01,$1C,b28,92,92
+H28,$04,$03,$1C,h28,92,92
+S28,$04,$09,$1C,s28,92,92
+D28,$04,$0a,$1C,d28,92,92
+Q28,$04,$05,$1C,q28,92,92
+B29,$04,$01,$1D,b29,93,93
+H29,$04,$03,$1D,h29,93,93
+S29,$04,$09,$1D,s29,93,93
+D29,$04,$0a,$1D,d29,93,93
+Q29,$04,$05,$1D,q29,93,93
+B30,$04,$01,$1E,b30,94,94
+H30,$04,$03,$1E,h30,94,94
+S30,$04,$09,$1E,s30,94,94
+D30,$04,$0a,$1E,d30,94,94
+Q30,$04,$05,$1E,q30,94,94
+B31,$04,$01,$1F,b31,95,95
+H31,$04,$03,$1F,h31,95,95
+S31,$04,$09,$1F,s31,95,95
+D31,$04,$0a,$1F,d31,95,95
+Q31,$04,$05,$1F,q31,95,95
 
 
 NZCV,$05,$00,$00,nzcv,0,0
 NZCV,$05,$00,$00,nzcv,0,0
-
+FPCR,$05,$00,$01,fpcr,0,0
+FPSR,$05,$00,$02,fpsr,0,0
+TPIDR_EL0,$05,$00,$03,tpidr_el0,0,0

+ 462 - 1125
compiler/aarch64/aasmcpu.pas

@@ -1,7 +1,7 @@
 {
 {
     Copyright (c) 2003-2012 by Florian Klaempfl and others
     Copyright (c) 2003-2012 by Florian Klaempfl and others
 
 
-    Contains the assembler object for ARM64
+    Contains the assembler object for Aarch64
 
 
     This program is free software; you can redistribute it and/or modify
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
     it under the terms of the GNU General Public License as published by
@@ -149,9 +149,6 @@ uses
 
 
       pinsentry=^tinsentry;
       pinsentry=^tinsentry;
 
 
-{    const
-      InsTab : array[0..instabentries-1] of TInsEntry={$i a64tab.inc} }
-
     var
     var
       InsTabCache : PInsTabCache;
       InsTabCache : PInsTabCache;
 
 
@@ -159,6 +156,7 @@ uses
       taicpu = class(tai_cpu_abstract_sym)
       taicpu = class(tai_cpu_abstract_sym)
          oppostfix : TOpPostfix;
          oppostfix : TOpPostfix;
          procedure loadshifterop(opidx:longint;const so:tshifterop);
          procedure loadshifterop(opidx:longint;const so:tshifterop);
+         procedure loadconditioncode(opidx: longint; const c: tasmcond);
          constructor op_none(op : tasmop);
          constructor op_none(op : tasmop);
 
 
          constructor op_reg(op : tasmop;_op1 : tregister);
          constructor op_reg(op : tasmop;_op1 : tregister);
@@ -167,15 +165,21 @@ uses
 
 
          constructor op_reg_reg(op : tasmop;_op1,_op2 : tregister);
          constructor op_reg_reg(op : tasmop;_op1,_op2 : tregister);
          constructor op_reg_ref(op : tasmop;_op1 : tregister;const _op2 : treference);
          constructor op_reg_ref(op : tasmop;_op1 : tregister;const _op2 : treference);
+         constructor op_reg_cond(op: tasmop; _op1: tregister; _op2: tasmcond);
          constructor op_reg_const(op:tasmop; _op1: tregister; _op2: aint);
          constructor op_reg_const(op:tasmop; _op1: tregister; _op2: aint);
+         constructor op_reg_const_shifterop(op : tasmop;_op1: tregister; _op2: aint;_op3 : tshifterop);
 
 
          constructor op_reg_reg_reg(op : tasmop;_op1,_op2,_op3 : tregister);
          constructor op_reg_reg_reg(op : tasmop;_op1,_op2,_op3 : tregister);
          constructor op_reg_reg_reg_reg(op : tasmop;_op1,_op2,_op3,_op4 : tregister);
          constructor op_reg_reg_reg_reg(op : tasmop;_op1,_op2,_op3,_op4 : tregister);
          constructor op_reg_reg_const(op : tasmop;_op1,_op2 : tregister; _op3: aint);
          constructor op_reg_reg_const(op : tasmop;_op1,_op2 : tregister; _op3: aint);
+         constructor op_reg_reg_const_const(op : tasmop;_op1,_op2 : tregister; _op3, _op4: aint);
+         constructor op_reg_reg_const_shifterop(op : tasmop;_op1,_op2 : tregister; _op3: aint; const _op4 : tshifterop);
          constructor op_reg_reg_sym_ofs(op : tasmop;_op1,_op2 : tregister; _op3: tasmsymbol;_op3ofs: longint);
          constructor op_reg_reg_sym_ofs(op : tasmop;_op1,_op2 : tregister; _op3: tasmsymbol;_op3ofs: longint);
          constructor op_reg_reg_ref(op : tasmop;_op1,_op2 : tregister; const _op3: treference);
          constructor op_reg_reg_ref(op : tasmop;_op1,_op2 : tregister; const _op3: treference);
          constructor op_reg_reg_shifterop(op : tasmop;_op1,_op2 : tregister;_op3 : tshifterop);
          constructor op_reg_reg_shifterop(op : tasmop;_op1,_op2 : tregister;_op3 : tshifterop);
-         constructor op_reg_reg_reg_shifterop(op : tasmop;_op1,_op2,_op3 : tregister;_op4 : tshifterop);
+         constructor op_reg_reg_reg_shifterop(op : tasmop;_op1,_op2,_op3 : tregister; const _op4 : tshifterop);
+         constructor op_reg_reg_reg_cond(op : tasmop;_op1,_op2,_op3 : tregister; const _op4: tasmcond);
+
 
 
          { this is for Jmp instructions }
          { this is for Jmp instructions }
          constructor op_cond_sym(op : tasmop;cond:TAsmCond;_op1 : tasmsymbol);
          constructor op_cond_sym(op : tasmop;cond:TAsmCond;_op1 : tasmsymbol);
@@ -188,6 +192,7 @@ uses
          function is_same_reg_move(regtype: Tregistertype):boolean; override;
          function is_same_reg_move(regtype: Tregistertype):boolean; override;
 
 
          function spilling_get_operation_type(opnr: longint): topertype;override;
          function spilling_get_operation_type(opnr: longint): topertype;override;
+         function spilling_get_operation_type_ref(opnr: longint; reg: tregister): topertype;override;
 
 
          { assembler }
          { assembler }
       public
       public
@@ -203,28 +208,29 @@ uses
          procedure ppuwriteoper(ppufile:tcompilerppufile;const o:toper);override;
          procedure ppuwriteoper(ppufile:tcompilerppufile;const o:toper);override;
          procedure ppubuildderefimploper(var o:toper);override;
          procedure ppubuildderefimploper(var o:toper);override;
          procedure ppuderefoper(var o:toper);override;
          procedure ppuderefoper(var o:toper);override;
-      private
-         { next fields are filled in pass1, so pass2 is faster }
-         inssize   : shortint;
-         insoffset : longint;
-         LastInsOffset : longint; { need to be public to be reset }
-         insentry  : PInsEntry;
-         function  InsEnd:longint;
-         procedure create_ot(objdata:TObjData);
-         function  Matches(p:PInsEntry):longint;
-         function  calcsize(p:PInsEntry):shortint;
-         procedure gencode(objdata:TObjData);
-         function  NeedAddrPrefix(opidx:byte):boolean;
-         procedure Swapoperands;
-         function  FindInsentry(objdata:TObjData):boolean;
       end;
       end;
 
 
       tai_align = class(tai_align_abstract)
       tai_align = class(tai_align_abstract)
         { nothing to add }
         { nothing to add }
       end;
       end;
 
 
-    function spilling_create_load(const ref:treference;r:tregister):Taicpu;
-    function spilling_create_store(r:tregister; const ref:treference):Taicpu;
+    type
+      tsimplereftype =
+         { valid reference }
+        (sr_simple,
+         { invalid reference, should not be generated by the code generator (but
+           can be encountered via inline assembly, where it must be rejected) }
+         sr_internal_illegal,
+         { invalid reference, may be generated by the code generator and then
+           must be simplified (also rejected in inline assembly) }
+         sr_complex);
+
+    function simple_ref_type(op: tasmop; size:tcgsize; oppostfix: toppostfix; const ref: treference): tsimplereftype;
+    function can_be_shifter_operand(opc: tasmop; opnr: longint): boolean;
+    function valid_shifter_operand(opc: tasmop; useszr, usessp, is64bit: boolean; sm: tshiftmode; shiftimm: longint): boolean;
+
+    function spilling_create_load(const ref: treference; r: tregister): taicpu;
+    function spilling_create_store(r: tregister; const ref: treference): taicpu;
 
 
     function setoppostfix(i : taicpu;pf : toppostfix) : taicpu;
     function setoppostfix(i : taicpu;pf : toppostfix) : taicpu;
     function setcondition(i : taicpu;c : tasmcond) : taicpu;
     function setcondition(i : taicpu;c : tasmcond) : taicpu;
@@ -261,6 +267,21 @@ implementation
       end;
       end;
 
 
 
 
+    procedure taicpu.loadconditioncode(opidx: longint; const c: tasmcond);
+      begin
+        allocate_oper(opidx+1);
+        with oper[opidx]^ do
+          begin
+            if typ<>top_conditioncode then
+              begin
+                clearop(opidx);
+              end;
+            cc:=c;
+            typ:=top_conditioncode;
+          end;
+      end;
+
+
 {*****************************************************************************
 {*****************************************************************************
                                  taicpu Constructors
                                  taicpu Constructors
 *****************************************************************************}
 *****************************************************************************}
@@ -314,6 +335,16 @@ implementation
       end;
       end;
 
 
 
 
+    constructor taicpu.op_reg_const_shifterop(op: tasmop; _op1: tregister; _op2: aint; _op3: tshifterop);
+      begin
+        inherited create(op);
+        ops:=3;
+        loadreg(0,_op1);
+        loadconst(1,_op2);
+        loadshifterop(2,_op3);
+      end;
+
+
     constructor taicpu.op_reg_ref(op : tasmop;_op1 : tregister;const _op2 : treference);
     constructor taicpu.op_reg_ref(op : tasmop;_op1 : tregister;const _op2 : treference);
       begin
       begin
          inherited create(op);
          inherited create(op);
@@ -323,6 +354,15 @@ implementation
       end;
       end;
 
 
 
 
+    constructor taicpu.op_reg_cond(op: tasmop; _op1: tregister; _op2: tasmcond);
+      begin
+        inherited create(op);
+        ops:=2;
+        loadreg(0,_op1);
+        loadconditioncode(1,_op2);
+      end;
+
+
     constructor taicpu.op_reg_reg_reg(op : tasmop;_op1,_op2,_op3 : tregister);
     constructor taicpu.op_reg_reg_reg(op : tasmop;_op1,_op2,_op3 : tregister);
       begin
       begin
          inherited create(op);
          inherited create(op);
@@ -354,6 +394,28 @@ implementation
       end;
       end;
 
 
 
 
+     constructor taicpu.op_reg_reg_const_const(op: tasmop; _op1, _op2: tregister; _op3, _op4: aint);
+       begin
+         inherited create(op);
+         ops:=4;
+         loadreg(0,_op1);
+         loadreg(1,_op2);
+         loadconst(2,aint(_op3));
+         loadconst(3,aint(_op4));
+       end;
+
+
+     constructor taicpu.op_reg_reg_const_shifterop(op: tasmop; _op1, _op2: tregister; _op3: aint; const _op4: tshifterop);
+       begin
+         inherited create(op);
+         ops:=4;
+         loadreg(0,_op1);
+         loadreg(1,_op2);
+         loadconst(2,aint(_op3));
+         loadshifterop(3,_op4);
+       end;
+
+
      constructor taicpu.op_reg_reg_sym_ofs(op : tasmop;_op1,_op2 : tregister; _op3: tasmsymbol;_op3ofs: longint);
      constructor taicpu.op_reg_reg_sym_ofs(op : tasmop;_op1,_op2 : tregister; _op3: tasmsymbol;_op3ofs: longint);
        begin
        begin
          inherited create(op);
          inherited create(op);
@@ -384,7 +446,7 @@ implementation
       end;
       end;
 
 
 
 
-     constructor taicpu.op_reg_reg_reg_shifterop(op : tasmop;_op1,_op2,_op3 : tregister;_op4 : tshifterop);
+     constructor taicpu.op_reg_reg_reg_shifterop(op : tasmop;_op1,_op2,_op3 : tregister; const _op4 : tshifterop);
       begin
       begin
          inherited create(op);
          inherited create(op);
          ops:=4;
          ops:=4;
@@ -394,6 +456,16 @@ implementation
          loadshifterop(3,_op4);
          loadshifterop(3,_op4);
       end;
       end;
 
 
+     constructor taicpu.op_reg_reg_reg_cond(op: tasmop; _op1, _op2, _op3: tregister; const _op4: tasmcond);
+       begin
+         inherited create(op);
+         ops:=4;
+         loadreg(0,_op1);
+         loadreg(1,_op2);
+         loadreg(2,_op3);
+         loadconditioncode(3,_op4);
+       end;
+
 
 
     constructor taicpu.op_cond_sym(op : tasmop;cond:TAsmCond;_op1 : tasmsymbol);
     constructor taicpu.op_cond_sym(op : tasmop;cond:TAsmCond;_op1 : tasmsymbol);
       begin
       begin
@@ -454,85 +526,403 @@ implementation
       end;
       end;
 
 
 
 
-    function spilling_create_load(const ref:treference;r:tregister):Taicpu;
+    function spilling_create_op(op: tasmop; const ref: treference; r: tregister): taicpu;
+      const
+        { invalid sizes for aarch64 are 0 }
+        subreg2bytesize: array[TSubRegister] of byte =
+          (0,0,0,0,4,8,0,0,0,4,8,0,0,0);
       var
       var
-        op: tasmop;
+        scalefactor: byte;
       begin
       begin
+        scalefactor:=subreg2bytesize[getsubreg(r)];
+        if scalefactor=0 then
+          internalerror(2014120301);
+        if (ref.offset>4095*scalefactor) or
+           ((ref.offset>255) and
+            ((ref.offset mod scalefactor)<>0)) or
+           (ref.offset<-256) then
+          internalerror(2014120302);
         case getregtype(r) of
         case getregtype(r) of
-          R_INTREGISTER :
-            result:=taicpu.op_reg_ref(A_LDR,r,ref);
-          R_MMREGISTER :
-            begin
-              case getsubreg(r) of
-                R_SUBFD:
-                  op:=A_LDR;
-                R_SUBFS:
-                  op:=A_LDR;
-                else
-                  internalerror(2009112905);
-              end;
-              result:=taicpu.op_reg_ref(op,r,ref);
-            end;
+          R_INTREGISTER,
+          R_MMREGISTER:
+            result:=taicpu.op_reg_ref(op,r,ref);
           else
           else
             internalerror(200401041);
             internalerror(200401041);
         end;
         end;
       end;
       end;
 
 
 
 
-    function spilling_create_store(r:tregister; const ref:treference):Taicpu;
+    function is_valid_load_symbol(op: tasmop; oppostfix: toppostfix; const ref: treference): tsimplereftype;
+      begin
+        result:=sr_complex;
+        if not assigned(ref.symboldata) and
+           not(ref.refaddr in [addr_gotpageoffset,addr_gotpage,addr_pageoffset,addr_page]) then
+          exit;
+        { can't use pre-/post-indexed mode here (makes no sense either) }
+        if ref.addressmode<>AM_OFFSET then
+          exit;
+        { "ldr literal" must be a 32/64 bit LDR and have a symbol }
+        if assigned(ref.symboldata) and
+           ((op<>A_LDR) or
+            not(oppostfix in [PF_NONE,PF_W,PF_SW]) or
+            not assigned(ref.symbol)) then
+          exit;
+        { if this is a (got) page offset load, we must have a base register and a
+          symbol }
+        if (ref.refaddr in [addr_gotpageoffset,addr_pageoffset]) and
+           (not assigned(ref.symbol) or
+            (ref.base=NR_NO) or
+            (ref.index<>NR_NO) or
+            (ref.offset<>0)) then
+          begin
+            result:=sr_internal_illegal;
+            exit;
+          end;
+        { cannot have base or index register (we generate these kind of
+          references internally, they should never end up here with an
+          extra base or offset) }
+        if (ref.refaddr in [addr_gotpage,addr_page]) and
+           (ref.base<>NR_NO) or
+           (ref.index<>NR_NO) then
+          begin
+            result:=sr_internal_illegal;
+            exit;
+          end;
+        result:=sr_simple;
+      end;
+
+
+    function simple_ref_type(op: tasmop; size:tcgsize; oppostfix: toppostfix; const ref: treference): tsimplereftype;
       var
       var
-        op: tasmop;
+        maxoffs: asizeint;
+        accesssize: longint;
       begin
       begin
-        case getregtype(r) of
-          R_INTREGISTER :
-            result:=taicpu.op_reg_ref(A_STR,r,ref);
-          R_MMREGISTER :
-            begin
-              case getsubreg(r) of
-                R_SUBFD:
-                  op:=A_STR;
-                R_SUBFS:
-                  op:=A_STR;
+        result:=sr_internal_illegal;
+        { post-indexed is only allowed for vector and immediate loads/stores }
+        if (ref.addressmode=AM_POSTINDEXED) and
+           not(op in [A_LD1,A_LD2,A_LD3,A_LD4,A_ST1,A_ST2,A_ST3,A_ST4]) and
+           (not(op in [A_LDR,A_STR,A_LDP,A_STP]) or
+            (ref.base=NR_NO) or
+            (ref.index<>NR_NO)) then
+          exit;
+
+        { can only have a shift mode if we have an index }
+        if (ref.index=NR_NO) and
+           (ref.shiftmode<>SM_None) then
+          exit;
+
+        { the index can never be the stack pointer }
+        if ref.index=NR_SP then
+          exit;
+
+        { no instruction supports an index without a base }
+        if (ref.base=NR_NO) and
+           (ref.index<>NR_NO) then
+          begin
+            result:=sr_complex;
+            exit;
+          end;
+
+        { LDR literal or GOT entry: 32 or 64 bit, label }
+        if assigned(ref.symboldata) or
+           assigned(ref.symbol) then
+          begin
+            { we generate these kind of references internally; at least for now,
+              they should never end up here with an extra base or offset or so }
+            result:=is_valid_load_symbol(op,oppostfix,ref);
+            exit;
+          end;
+
+        { any other reference cannot be gotpage/gotpageoffset/pic }
+        if ref.refaddr in [addr_gotpage,addr_gotpageoffset,addr_page,addr_pageoffset,addr_pic] then
+          exit;
+
+        { base & index:
+            * index cannot be the stack pointer
+            * offset must be 0
+            * can scale with the size of the access
+            * can zero/sign extend 32 bit index register, and/or multiple by
+              access size
+            * no pre/post-indexing
+        }
+        if (ref.base<>NR_NO) and
+           (ref.index<>NR_NO) then
+          begin
+            if ref.addressmode in [AM_PREINDEXED,AM_POSTINDEXED] then
+              exit;
+            case op of
+              { this holds for both integer and fpu/vector loads }
+              A_LDR,A_STR:
+                if (ref.offset=0) and
+                   (((ref.shiftmode=SM_None) and
+                     (ref.shiftimm=0)) or
+                    ((ref.shiftmode in [SM_LSL,SM_UXTW,SM_SXTW]) and
+                     (ref.shiftimm=tcgsizep2size[size]))) then
+                  result:=sr_simple
                 else
                 else
-                  internalerror(2009112904);
-              end;
-              result:=taicpu.op_reg_ref(op,r,ref);
+                  result:=sr_complex;
+              { todo }
+              A_LD1,A_LD2,A_LD3,A_LD4,
+              A_ST1,A_ST2,A_ST3,A_ST4:
+                internalerror(2014110704);
+              { these don't support base+index }
+              A_LDUR,A_STUR,
+              A_LDP,A_STP:
+                result:=sr_complex;
+              else
+                { nothing: result is already sr_internal_illegal };
             end;
             end;
+            exit;
+          end;
+
+        { base + immediate offset. Variants:
+            * LDR*/STR*:
+              - pre- or post-indexed with signed 9 bit immediate
+              - regular with unsiged scaled immediate (multiple of access
+                size), in the range 0 to (12 bit * access_size)-1
+            * LDP/STP
+              - pre- or post-indexed with signed 9 bit immediate
+              - regular with signed 9 bit immediate
+            * LDUR*/STUR*:
+              - regular with signed 9 bit immediate
+        }
+        if ref.base<>NR_NO then
+          begin
+            accesssize:=1 shl tcgsizep2size[size];
+            case op of
+              A_LDR,A_STR:
+                begin
+                  if (ref.addressmode=AM_OFFSET) and
+                     (ref.offset>=0) and
+                     (ref.offset<(((1 shl 12)-1)*accesssize)) and
+                     ((ref.offset mod accesssize)=0) then
+                    result:=sr_simple
+                  else if (ref.offset>=-256) and
+                     (ref.offset<=255) then
+                    begin
+                      { non pre-/post-indexed regular loads/stores can only be
+                        performed using LDUR/STUR }
+                      if ref.addressmode in [AM_PREINDEXED,AM_POSTINDEXED] then
+                        result:=sr_simple
+                      else
+                        result:=sr_complex
+                    end
+                  else
+                    result:=sr_complex;
+                end;
+              A_LDP,A_LDNP,
+              A_STP,A_STNP:
+                begin
+                  { only supported for 32/64 bit }
+                  if not(oppostfix in [PF_W,PF_SW,PF_None]) then
+                    exit;
+                  { offset must be a multple of the access size }
+                  if (ref.offset mod accesssize)<>0 then
+                    exit;
+                  { offset must fit in a signed 7 bit offset }
+                  if (ref.offset>=-(1 shl (6+tcgsizep2size[size]))) and
+                     (ref.offset<=(1 shl (6+tcgsizep2size[size]))-1) then
+                    result:=sr_simple
+                  else
+                    result:=sr_complex;
+                end;
+              A_LDUR,A_STUR:
+                begin
+                  if (ref.addressmode=AM_OFFSET) and
+                     (ref.offset>=-256) and
+                     (ref.offset<=255) then
+                    result:=sr_simple
+                  else
+                    result:=sr_complex;
+                end;
+              { todo }
+              A_LD1,A_LD2,A_LD3,A_LD4,
+              A_ST1,A_ST2,A_ST3,A_ST4:
+                internalerror(2014110907);
+              A_LDAR,
+              A_LDAXR,
+              A_LDXR,
+              A_LDXP,
+              A_STLR,
+              A_STLXR,
+              A_STLXP,
+              A_STXP,
+              A_STXR:
+                begin
+                  if (ref.addressmode=AM_OFFSET) and
+                     (ref.offset=0) then
+                    result:=sr_simple;
+                end
+              else
+                { nothing: result is already sr_internal_illegal };
+            end;
+            exit;
+          end;
+        { absolute addresses are not supported, have to load them first into
+          a register }
+        result:=sr_complex;
+      end;
+
+
+    function can_be_shifter_operand(opc: tasmop; opnr: longint): boolean;
+      begin
+        case opc of
+          A_ADD,
+          A_AND,
+          A_EON,
+          A_EOR,
+          A_ORN,
+          A_ORR,
+          A_SUB:
+            result:=opnr=3;
+          A_BIC,
+          A_CMN,
+          A_CMP,
+          A_MOVK,
+          A_MOVZ,
+          A_MOVN,
+          A_MVN,
+          A_NEG,
+          A_TST:
+            result:=opnr=2;
           else
           else
-            internalerror(200401041);
+            result:=false;
         end;
         end;
       end;
       end;
 
 
 
 
+    function valid_shifter_operand(opc: tasmop; useszr, usessp, is64bit: boolean; sm: tshiftmode; shiftimm: longint): boolean;
+      begin
+        case opc of
+          A_ADD,
+          A_SUB,
+          A_NEG,
+          A_AND,
+          A_TST,
+          A_CMN,
+          A_CMP:
+            begin
+              result:=false;
+              if not useszr then
+                result:=
+                  (sm in shiftedregmodes) and
+                  ((shiftimm in [0..31]) or
+                   (is64bit and
+                    (shiftimm in [32..63])));
+              if not usessp then
+                result:=
+                  result or
+                  ((sm in extendedregmodes) and
+                   (shiftimm in [0..4]));
+            end;
+          A_BIC,
+          A_EON,
+          A_EOR,
+          A_MVN,
+          A_ORN,
+          A_ORR:
+            result:=
+              (sm in shiftedregmodes) and
+              (shiftimm in [0..31*(ord(is64bit)+1)+ord(is64bit)]);
+          A_MOVK,
+          A_MOVZ,
+          A_MOVN:
+            result:=
+              (sm=SM_LSL) and
+              ((shiftimm in [0,16]) or
+               (is64bit and
+                (shiftimm in [32,48])));
+          else
+            result:=false;
+        end;
+      end;
+
+
+    function spilling_create_load(const ref: treference; r: tregister): taicpu;
+      var
+        op: tasmop;
+      begin
+        if (ref.index<>NR_NO) or
+           (ref.offset<-256) or
+           (ref.offset>255) then
+          op:=A_LDR
+        else
+          op:=A_LDUR;
+        result:=spilling_create_op(op,ref,r);
+      end;
+
+
+    function spilling_create_store(r: tregister; const ref: treference): taicpu;
+      var
+        op: tasmop;
+      begin
+        if (ref.index<>NR_NO) or
+           (ref.offset<-256) or
+           (ref.offset>255) then
+          op:=A_STR
+        else
+          op:=A_STUR;
+        result:=spilling_create_op(op,ref,r);
+      end;
+
+
     function taicpu.spilling_get_operation_type(opnr: longint): topertype;
     function taicpu.spilling_get_operation_type(opnr: longint): topertype;
       begin
       begin
         case opcode of
         case opcode of
-          A_ADC,A_ADD,A_AND,A_BIC,
-          A_EOR,A_CLZ,A_RBIT,
-          A_LDR,
-          A_MOV,A_MVN,A_MUL,
-          A_ORR,A_SBC,A_SUB,
-          A_UXT,A_SXT:
+          A_B,A_BL,
+          A_CMN,A_CMP,
+          A_CCMN,A_CCMP,
+          A_TST:
+            result:=operand_read;
+          A_STR,A_STUR:
             if opnr=0 then
             if opnr=0 then
-              result:=operand_write
+              result:=operand_read
             else
             else
+              { check for pre/post indexed in spilling_get_operation_type_ref }
               result:=operand_read;
               result:=operand_read;
-          A_B,A_BL,
-          A_CMN,A_CMP,A_TST:
-            result:=operand_read;
-          A_STR:
-            { important is what happens with the involved registers }
+          A_STLXP,
+          A_STLXR,
+          A_STXP,
+          A_STXR:
             if opnr=0 then
             if opnr=0 then
-              result := operand_read
+              result:=operand_write
             else
             else
-              { check for pre/post indexed }
-              result := operand_read;
-          else
-            internalerror(200403151);
+              result:=operand_read;
+          A_STP:
+            begin
+              if opnr in [0,1] then
+                result:=operand_read
+              else
+                { check for pre/post indexed in spilling_get_operation_type_ref }
+                result:=operand_read;
+            end;
+           A_LDP,
+           A_LDXP:
+             begin
+               if opnr in [0,1] then
+                 result:=operand_write
+               else
+                 { check for pre/post indexed in spilling_get_operation_type_ref }
+                 result:=operand_read;
+             end;
+           else
+             if opnr=0 then
+               result:=operand_write
+             else
+               result:=operand_read;
         end;
         end;
       end;
       end;
 
 
 
 
+    function taicpu.spilling_get_operation_type_ref(opnr: longint; reg: tregister): topertype;
+      begin
+        result:=operand_read;
+        if (oper[opnr]^.ref^.base = reg) and
+          (oper[opnr]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) then
+           result:=operand_readwrite;
+      end;
+
+
     procedure BuildInsTabCache;
     procedure BuildInsTabCache;
       var
       var
         i : longint;
         i : longint;
@@ -1055,22 +1445,12 @@ implementation
         { we need to reset everything here, because the choosen insentry
         { we need to reset everything here, because the choosen insentry
           can be invalid for a new situation where the previously optimized
           can be invalid for a new situation where the previously optimized
           insentry is not correct }
           insentry is not correct }
-        InsEntry:=nil;
-        InsSize:=0;
-        LastInsOffset:=-1;
       end;
       end;
 
 
 
 
     procedure taicpu.ResetPass2;
     procedure taicpu.ResetPass2;
       begin
       begin
         { we are here in a second pass, check if the instruction can be optimized }
         { we are here in a second pass, check if the instruction can be optimized }
-        if assigned(InsEntry) and
-           ((InsEntry^.flags and IF_PASS2)<>0) then
-         begin
-           InsEntry:=nil;
-           InsSize:=0;
-         end;
-        LastInsOffset:=-1;
       end;
       end;
 
 
 
 
@@ -1083,18 +1463,15 @@ implementation
     function taicpu.Pass1(objdata:TObjData):longint;
     function taicpu.Pass1(objdata:TObjData):longint;
       begin
       begin
         Pass1:=0;
         Pass1:=0;
-        LastInsOffset:=-1;
       end;
       end;
 
 
 
 
     procedure taicpu.Pass2(objdata:TObjData);
     procedure taicpu.Pass2(objdata:TObjData);
       begin
       begin
         { error in pass1 ? }
         { error in pass1 ? }
-        if insentry=nil then
-         exit;
         current_filepos:=fileinfo;
         current_filepos:=fileinfo;
         { Generate the instruction }
         { Generate the instruction }
-        GenCode(objdata);
+        { GenCode(objdata); }
       end;
       end;
 
 
 
 
@@ -1118,1046 +1495,6 @@ implementation
       end;
       end;
 
 
 
 
-    function  taicpu.InsEnd:longint;
-      begin
-        Result:=0; { unimplemented }
-      end;
-
-
-    procedure taicpu.create_ot(objdata:TObjData);
-      begin
-      end;
-
-
-    function taicpu.Matches(p:PInsEntry):longint;
-      begin
-      end;
-
-
-    function  taicpu.calcsize(p:PInsEntry):shortint;
-      begin
-        result:=4;
-      end;
-
-
-    function  taicpu.NeedAddrPrefix(opidx:byte):boolean;
-      begin
-        Result:=False; { unimplemented }
-      end;
-
-
-    procedure taicpu.Swapoperands;
-      begin
-      end;
-
-
-    function taicpu.FindInsentry(objdata:TObjData):boolean;
-      begin
-      end;
-
-
-    procedure taicpu.gencode(objdata:TObjData);
-      var
-        bytes : dword;
-        i_field : byte;
-
-      procedure setshifterop(op : byte);
-        begin
-          case oper[op]^.typ of
-            top_const:
-              begin
-                i_field:=1;
-                bytes:=bytes or dword(oper[op]^.val and $fff);
-              end;
-            top_reg:
-              begin
-                i_field:=0;
-                bytes:=bytes or (getsupreg(oper[op]^.reg) shl 16);
-
-                { does a real shifter op follow? }
-                if (op+1<=op) and (oper[op+1]^.typ=top_shifterop) then
-                  begin
-                  end;
-              end;
-          else
-            internalerror(2005091103);
-          end;
-        end;
-
-      begin
-        bytes:=$0;
-        { evaluate and set condition code }
-
-        { condition code allowed? }
-
-        { setup rest of the instruction }
-        case insentry^.code[0] of
-          #$08:
-            begin
-              { set instruction code }
-              bytes:=bytes or (ord(insentry^.code[1]) shl 26);
-              bytes:=bytes or (ord(insentry^.code[2]) shl 21);
-
-              { set destination }
-              bytes:=bytes or (getsupreg(oper[0]^.reg) shl 12);
-
-              { create shifter op }
-              setshifterop(1);
-
-              { set i field }
-              bytes:=bytes or (i_field shl 25);
-
-              { set s if necessary }
-              if oppostfix=PF_S then
-                bytes:=bytes or (1 shl 20);
-            end;
-          #$ff:
-            internalerror(2005091101);
-          else
-            internalerror(2005091102);
-        end;
-        { we're finished, write code }
-        objdata.writebytes(bytes,sizeof(bytes));
-      end;
-
-
-{$ifdef dummy}
-(*
-static void gencode (long segment, long offset, int bits,
-                     insn *ins, char *codes, long insn_end)
-{
-    int has_S_code;             /* S - setflag */
-    int has_B_code;             /* B - setflag */
-    int has_T_code;             /* T - setflag */
-    int has_W_code;             /* ! => W flag */
-    int has_F_code;             /* ^ => S flag */
-    int keep;
-    unsigned char c;
-    unsigned char bytes[4];
-    long          data, size;
-    static int cc_code[] =      /* bit pattern of cc */
-  {                             /* order as enum in  */
-    0x0E, 0x03, 0x02, 0x00,     /* nasm.h            */
-    0x0A, 0x0C, 0x08, 0x0D,
-    0x09, 0x0B, 0x04, 0x01,
-    0x05, 0x07, 0x06,
-  };
-
-
-#ifdef DEBUG
-static char *CC[] =
-  {                                    /* condition code names */
-    "AL", "CC", "CS", "EQ",
-    "GE", "GT", "HI", "LE",
-    "LS", "LT", "MI", "NE",
-    "PL", "VC", "VS", "",
-    "S"
-};
-
-
-    has_S_code = (ins->condition & C_SSETFLAG);
-    has_B_code = (ins->condition & C_BSETFLAG);
-    has_T_code = (ins->condition & C_TSETFLAG);
-    has_W_code = (ins->condition & C_EXSETFLAG);
-    has_F_code = (ins->condition & C_FSETFLAG);
-    ins->condition = (ins->condition & 0x0F);
-
-
-    if (rt_debug)
-      {
-    printf ("gencode: instruction: %s%s", insn_names[ins->opcode],
-            CC[ins->condition & 0x0F]);
-    if (has_S_code)
-      printf ("S");
-    if (has_B_code)
-      printf ("B");
-    if (has_T_code)
-      printf ("T");
-    if (has_W_code)
-      printf ("!");
-    if (has_F_code)
-      printf ("^");
-
-    printf ("\n");
-
-    c = *codes;
-
-    printf ("   (%d)  decode - '0x%02X'\n", ins->operands, c);
-
-
-    bytes[0] = 0xB;
-    bytes[1] = 0xE;
-    bytes[2] = 0xE;
-    bytes[3] = 0xF;
-      }
-
-    // First condition code in upper nibble
-    if (ins->condition < C_NONE)
-      {
-        c = cc_code[ins->condition] << 4;
-      }
-    else
-      {
-        c = cc_code[C_AL] << 4; // is often ALWAYS but not always
-      }
-
-
-    switch (keep = *codes)
-      {
-        case 1:
-          // B, BL
-          ++codes;
-          c |= *codes++;
-          bytes[0] = c;
-
-          if (ins->oprs[0].segment != segment)
-            {
-              // fais une relocation
-              c = 1;
-              data = 0; // Let the linker locate ??
-            }
-          else
-            {
-              c = 0;
-              data = ins->oprs[0].offset - (offset + 8);
-
-              if (data % 4)
-                {
-                  errfunc (ERR_NONFATAL, "offset not aligned on 4 bytes");
-                }
-            }
-
-          if (data >= 0x1000)
-            {
-              errfunc (ERR_NONFATAL, "too long offset");
-            }
-
-          data = data >> 2;
-          bytes[1] = (data >> 16) & 0xFF;
-          bytes[2] = (data >> 8)  & 0xFF;
-          bytes[3] = (data )      & 0xFF;
-
-          if (c == 1)
-            {
-//            out (offset, segment, &bytes[0], OUT_RAWDATA+1, NO_SEG, NO_SEG);
-              out (offset, segment, &bytes[0], OUT_REL3ADR+4, ins->oprs[0].segment, NO_SEG);
-            }
-          else
-            {
-              out (offset, segment, &bytes[0], OUT_RAWDATA+4, NO_SEG, NO_SEG);
-            }
-          return;
-
-        case 2:
-          // SWI
-          ++codes;
-          c |= *codes++;
-          bytes[0] = c;
-          data = ins->oprs[0].offset;
-          bytes[1] = (data >> 16) & 0xFF;
-          bytes[2] = (data >> 8) & 0xFF;
-          bytes[3] = (data) & 0xFF;
-          out (offset, segment, &bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
-          return;
-        case 3:
-          // BX
-          ++codes;
-          c |= *codes++;
-          bytes[0] = c;
-          bytes[1] = *codes++;
-          bytes[2] = *codes++;
-          bytes[3] = *codes++;
-          c = regval (&ins->oprs[0],1);
-          if (c == 15)  // PC
-            {
-              errfunc (ERR_WARNING, "'BX' with R15 has undefined behaviour");
-            }
-          else if (c > 15)
-            {
-              errfunc (ERR_NONFATAL, "Illegal register specified for 'BX'");
-            }
-
-          bytes[3] |= (c & 0x0F);
-          out (offset, segment, bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
-          return;
-
-        case 4:         // AND Rd,Rn,Rm
-        case 5:         // AND Rd,Rn,Rm,<shift>Rs
-        case 6:         // AND Rd,Rn,Rm,<shift>imm
-        case 7:         // AND Rd,Rn,<shift>imm
-          ++codes;
-#ifdef DEBUG
-          if (rt_debug)
-            {
-              printf ("         decode - '0x%02X'\n", keep);
-              printf ("           code - '0x%02X'\n", (unsigned char) ( *codes));
-            }
-#endif
-          bytes[0] = c | *codes;
-          ++codes;
-
-          bytes[1] = *codes;
-          if (has_S_code)
-            bytes[1] |= 0x10;
-          c = regval (&ins->oprs[1],1);
-          // Rn in low nibble
-          bytes[1] |= c;
-
-          // Rd in high nibble
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-
-          if (keep != 7)
-            {
-              // Rm in low nibble
-              bytes[3] = regval (&ins->oprs[2],1);
-            }
-
-          // Shifts if any
-          if (keep == 5 || keep == 6)
-            {
-              // Shift in bytes 2 and 3
-              if (keep == 5)
-                {
-                  // Rs
-                  c = regval (&ins->oprs[3],1);
-                  bytes[2] |= c;
-
-                  c = 0x10;             // Set bit 4 in byte[3]
-                }
-              if (keep == 6)
-                {
-                  c = (ins->oprs[3].offset) & 0x1F;
-
-                  // #imm
-                  bytes[2] |= c >> 1;
-                  if (c & 0x01)
-                    {
-                      bytes[3] |= 0x80;
-                    }
-                  c = 0;                // Clr bit 4 in byte[3]
-                }
-              // <shift>
-              c |= shiftval (&ins->oprs[3]) << 5;
-
-              bytes[3] |= c;
-            }
-
-          // reg,reg,imm
-          if (keep == 7)
-            {
-              int shimm;
-
-              shimm = imm_shift (ins->oprs[2].offset);
-
-              if (shimm == -1)
-                {
-                  errfunc (ERR_NONFATAL, "cannot create that constant");
-                }
-              bytes[3] = shimm & 0xFF;
-              bytes[2] |= (shimm & 0xF00) >> 8;
-            }
-
-          out (offset, segment, bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
-          return;
-
-        case 8:         // MOV Rd,Rm
-        case 9:         // MOV Rd,Rm,<shift>Rs
-        case 0xA:       // MOV Rd,Rm,<shift>imm
-        case 0xB:       // MOV Rd,<shift>imm
-          ++codes;
-#ifdef DEBUG
-          if (rt_debug)
-            {
-              printf ("         decode - '0x%02X'\n", keep);
-              printf ("           code - '0x%02X'\n", (unsigned char) ( *codes));
-            }
-#endif
-          bytes[0] = c | *codes;
-          ++codes;
-
-          bytes[1] = *codes;
-          if (has_S_code)
-            bytes[1] |= 0x10;
-
-          // Rd in high nibble
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-
-          if (keep != 0x0B)
-            {
-              // Rm in low nibble
-              bytes[3] = regval (&ins->oprs[1],1);
-            }
-
-          // Shifts if any
-          if (keep == 0x09 || keep == 0x0A)
-            {
-              // Shift in bytes 2 and 3
-              if (keep == 0x09)
-                {
-                  // Rs
-                  c = regval (&ins->oprs[2],1);
-                  bytes[2] |= c;
-
-                  c = 0x10;             // Set bit 4 in byte[3]
-                }
-              if (keep == 0x0A)
-                {
-                  c = (ins->oprs[2].offset) & 0x1F;
-
-                  // #imm
-                  bytes[2] |= c >> 1;
-                  if (c & 0x01)
-                    {
-                      bytes[3] |= 0x80;
-                    }
-                  c = 0;                // Clr bit 4 in byte[3]
-                }
-              // <shift>
-              c |= shiftval (&ins->oprs[2]) << 5;
-
-              bytes[3] |= c;
-            }
-
-          // reg,imm
-          if (keep == 0x0B)
-            {
-              int shimm;
-
-              shimm = imm_shift (ins->oprs[1].offset);
-
-              if (shimm == -1)
-                {
-                  errfunc (ERR_NONFATAL, "cannot create that constant");
-                }
-              bytes[3] = shimm & 0xFF;
-              bytes[2] |= (shimm & 0xF00) >> 8;
-            }
-
-          out (offset, segment, bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
-          return;
-
-
-        case 0xC:       // CMP Rn,Rm
-        case 0xD:       // CMP Rn,Rm,<shift>Rs
-        case 0xE:       // CMP Rn,Rm,<shift>imm
-        case 0xF:       // CMP Rn,<shift>imm
-          ++codes;
-
-          bytes[0] = c | *codes++;
-
-          bytes[1] = *codes;
-
-          // Implicit S code
-          bytes[1] |= 0x10;
-
-          c = regval (&ins->oprs[0],1);
-          // Rn in low nibble
-          bytes[1] |= c;
-
-          // No destination
-          bytes[2] = 0;
-
-          if (keep != 0x0B)
-            {
-              // Rm in low nibble
-              bytes[3] = regval (&ins->oprs[1],1);
-            }
-
-          // Shifts if any
-          if (keep == 0x0D || keep == 0x0E)
-            {
-              // Shift in bytes 2 and 3
-              if (keep == 0x0D)
-                {
-                  // Rs
-                  c = regval (&ins->oprs[2],1);
-                  bytes[2] |= c;
-
-                  c = 0x10;             // Set bit 4 in byte[3]
-                }
-              if (keep == 0x0E)
-                {
-                  c = (ins->oprs[2].offset) & 0x1F;
-
-                  // #imm
-                  bytes[2] |= c >> 1;
-                  if (c & 0x01)
-                    {
-                      bytes[3] |= 0x80;
-                    }
-                  c = 0;                // Clr bit 4 in byte[3]
-                }
-              // <shift>
-              c |= shiftval (&ins->oprs[2]) << 5;
-
-              bytes[3] |= c;
-            }
-
-          // reg,imm
-          if (keep == 0x0F)
-            {
-              int shimm;
-
-              shimm = imm_shift (ins->oprs[1].offset);
-
-              if (shimm == -1)
-                {
-                  errfunc (ERR_NONFATAL, "cannot create that constant");
-                }
-              bytes[3] = shimm & 0xFF;
-              bytes[2] |= (shimm & 0xF00) >> 8;
-            }
-
-          out (offset, segment, bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
-          return;
-
-        case 0x10:      // MRS Rd,<psr>
-          ++codes;
-
-          bytes[0] = c | *codes++;
-
-          bytes[1] = *codes++;
-
-          // Rd
-          c = regval (&ins->oprs[0],1);
-
-          bytes[2] = c << 4;
-
-          bytes[3] = 0;
-
-          c = ins->oprs[1].basereg;
-
-          if (c == R_CPSR || c == R_SPSR)
-            {
-              if (c == R_SPSR)
-                {
-                  bytes[1] |= 0x40;
-                }
-            }
-          else
-            {
-              errfunc (ERR_NONFATAL, "CPSR or SPSR expected");
-            }
-
-          out (offset, segment, bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
-
-          return;
-
-        case 0x11:      // MSR <psr>,Rm
-        case 0x12:      // MSR <psrf>,Rm
-        case 0x13:      // MSR <psrf>,#expression
-          ++codes;
-
-          bytes[0] = c | *codes++;
-
-          bytes[1] = *codes++;
-
-          bytes[2] = *codes;
-
-
-          if (keep == 0x11 || keep == 0x12)
-            {
-              // Rm
-              c = regval (&ins->oprs[1],1);
-
-              bytes[3] = c;
-            }
-          else
-            {
-              int shimm;
-
-              shimm = imm_shift (ins->oprs[1].offset);
-
-              if (shimm == -1)
-                {
-                  errfunc (ERR_NONFATAL, "cannot create that constant");
-                }
-              bytes[3] = shimm & 0xFF;
-              bytes[2] |= (shimm & 0xF00) >> 8;
-            }
-
-          c = ins->oprs[0].basereg;
-
-          if ( keep == 0x11)
-            {
-              if ( c == R_CPSR || c == R_SPSR)
-                {
-                if ( c== R_SPSR)
-                  {
-                    bytes[1] |= 0x40;
-                  }
-                }
-            else
-              {
-                errfunc (ERR_NONFATAL, "CPSR or SPSR expected");
-              }
-            }
-          else
-            {
-              if ( c == R_CPSR_FLG || c == R_SPSR_FLG)
-                {
-                  if ( c== R_SPSR_FLG)
-                    {
-                      bytes[1] |= 0x40;
-                    }
-                }
-              else
-                {
-                  errfunc (ERR_NONFATAL, "CPSR_flg or SPSR_flg expected");
-                }
-            }
-          break;
-
-        case 0x14:      // MUL  Rd,Rm,Rs
-        case 0x15:      // MULA Rd,Rm,Rs,Rn
-          ++codes;
-
-          bytes[0] = c | *codes++;
-
-          bytes[1] = *codes++;
-
-          bytes[3] = *codes;
-
-          // Rd
-          bytes[1] |= regval (&ins->oprs[0],1);
-          if (has_S_code)
-            bytes[1] |= 0x10;
-
-          // Rm
-          bytes[3] |= regval (&ins->oprs[1],1);
-
-          // Rs
-          bytes[2] = regval (&ins->oprs[2],1);
-
-          if (keep == 0x15)
-            {
-              bytes[2] |= regval (&ins->oprs[3],1) << 4;
-            }
-          break;
-
-        case 0x16:      // SMLAL RdHi,RdLo,Rm,Rs
-          ++codes;
-
-          bytes[0] = c | *codes++;
-
-          bytes[1] = *codes++;
-
-          bytes[3] = *codes;
-
-          // RdHi
-          bytes[1] |= regval (&ins->oprs[1],1);
-          if (has_S_code)
-            bytes[1] |= 0x10;
-
-          // RdLo
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-          // Rm
-          bytes[3] |= regval (&ins->oprs[2],1);
-
-          // Rs
-          bytes[2] |= regval (&ins->oprs[3],1);
-
-          break;
-
-        case 0x17:      // LDR Rd, expression
-          ++codes;
-
-          bytes[0] = c | *codes++;
-
-          bytes[1] = *codes++;
-
-          // Rd
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-          if (has_B_code)
-            bytes[1] |= 0x40;
-          if (has_T_code)
-            {
-              errfunc (ERR_NONFATAL, "'T' not allowed in pre-index mode");
-            }
-          if (has_W_code)
-            {
-              errfunc (ERR_NONFATAL, "'!' not allowed");
-            }
-
-          // Rn - implicit R15
-          bytes[1] |= 0xF;
-
-          if (ins->oprs[1].segment != segment)
-            {
-              errfunc (ERR_NONFATAL, "label not in same segment");
-            }
-
-          data = ins->oprs[1].offset - (offset + 8);
-
-          if (data < 0)
-            {
-              data = -data;
-            }
-          else
-            {
-              bytes[1] |= 0x80;
-            }
-
-          if (data >= 0x1000)
-            {
-              errfunc (ERR_NONFATAL, "too long offset");
-            }
-
-          bytes[2] |= ((data & 0xF00) >> 8);
-          bytes[3] = data & 0xFF;
-          break;
-
-        case 0x18:      // LDR Rd, [Rn]
-          ++codes;
-
-          bytes[0] = c | *codes++;
-
-          bytes[1] = *codes++;
-
-          // Rd
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-          if (has_B_code)
-            bytes[1] |= 0x40;
-          if (has_T_code)
-            {
-              bytes[1] |= 0x20;         // write-back
-            }
-          else
-            {
-              bytes[0] |= 0x01;         // implicit pre-index mode
-            }
-
-          if (has_W_code)
-            {
-              bytes[1] |= 0x20;         // write-back
-            }
-
-          // Rn
-          c = regval (&ins->oprs[1],1);
-          bytes[1] |= c;
-
-          if (c == 0x15)                // R15
-            data = -8;
-          else
-            data = 0;
-
-          if (data < 0)
-            {
-              data = -data;
-            }
-          else
-            {
-              bytes[1] |= 0x80;
-            }
-
-          bytes[2] |= ((data & 0xF00) >> 8);
-          bytes[3] = data & 0xFF;
-          break;
-
-        case 0x19:      // LDR Rd, [Rn,#expression]
-        case 0x20:      // LDR Rd, [Rn,Rm]
-        case 0x21:      // LDR Rd, [Rn,Rm,shift]
-          ++codes;
-
-          bytes[0] = c | *codes++;
-
-          bytes[1] = *codes++;
-
-          // Rd
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-          if (has_B_code)
-            bytes[1] |= 0x40;
-
-          // Rn
-          c = regval (&ins->oprs[1],1);
-          bytes[1] |= c;
-
-          if (ins->oprs[ins->operands-1].bracket)       // FIXME: Bracket on last operand -> pre-index  <--
-            {
-              bytes[0] |= 0x01;         // pre-index mode
-              if (has_W_code)
-                {
-                  bytes[1] |= 0x20;
-                }
-              if (has_T_code)
-                {
-                  errfunc (ERR_NONFATAL, "'T' not allowed in pre-index mode");
-                }
-            }
-          else
-            {
-              if (has_T_code)           // Forced write-back in post-index mode
-                {
-                  bytes[1] |= 0x20;
-                }
-              if (has_W_code)
-                {
-                  errfunc (ERR_NONFATAL, "'!' not allowed in post-index mode");
-                }
-            }
-
-          if (keep == 0x19)
-            {
-              data = ins->oprs[2].offset;
-
-              if (data < 0)
-                {
-                  data = -data;
-                }
-              else
-                {
-                  bytes[1] |= 0x80;
-                }
-
-              if (data >= 0x1000)
-                {
-                  errfunc (ERR_NONFATAL, "too long offset");
-                }
-
-              bytes[2] |= ((data & 0xF00) >> 8);
-              bytes[3] = data & 0xFF;
-            }
-          else
-            {
-              if (ins->oprs[2].minus == 0)
-                {
-                  bytes[1] |= 0x80;
-                }
-              c = regval (&ins->oprs[2],1);
-              bytes[3] = c;
-
-              if (keep == 0x21)
-                {
-                  c = ins->oprs[3].offset;
-                  if (c > 0x1F)
-                    {
-                      errfunc (ERR_NONFATAL, "too large shiftvalue");
-                      c = c & 0x1F;
-                    }
-
-                  bytes[2] |= c >> 1;
-                  if (c & 0x01)
-                    {
-                      bytes[3] |= 0x80;
-                    }
-                  bytes[3] |= shiftval (&ins->oprs[3]) << 5;
-                }
-            }
-
-          break;
-
-        case 0x22:      // LDRH Rd, expression
-          ++codes;
-
-          bytes[0] = c | 0x01;          // Implicit pre-index
-
-          bytes[1] = *codes++;
-
-          // Rd
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-
-          // Rn - implicit R15
-          bytes[1] |= 0xF;
-
-          if (ins->oprs[1].segment != segment)
-            {
-              errfunc (ERR_NONFATAL, "label not in same segment");
-            }
-
-          data = ins->oprs[1].offset - (offset + 8);
-
-          if (data < 0)
-            {
-              data = -data;
-            }
-          else
-            {
-              bytes[1] |= 0x80;
-            }
-
-          if (data >= 0x100)
-            {
-              errfunc (ERR_NONFATAL, "too long offset");
-            }
-          bytes[3] = *codes++;
-
-          bytes[2] |= ((data & 0xF0) >> 4);
-          bytes[3] |= data & 0xF;
-          break;
-
-        case 0x23:      // LDRH Rd, Rn
-          ++codes;
-
-          bytes[0] = c | 0x01;          // Implicit pre-index
-
-          bytes[1] = *codes++;
-
-          // Rd
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-
-          // Rn
-          c = regval (&ins->oprs[1],1);
-          bytes[1] |= c;
-
-          if (c == 0x15)                // R15
-            data = -8;
-          else
-            data = 0;
-
-          if (data < 0)
-            {
-              data = -data;
-            }
-          else
-            {
-              bytes[1] |= 0x80;
-            }
-
-          if (data >= 0x100)
-            {
-              errfunc (ERR_NONFATAL, "too long offset");
-            }
-          bytes[3] = *codes++;
-
-          bytes[2] |= ((data & 0xF0) >> 4);
-          bytes[3] |= data & 0xF;
-          break;
-
-        case 0x24:      // LDRH Rd, Rn, expression
-        case 0x25:      // LDRH Rd, Rn, Rm
-          ++codes;
-
-          bytes[0] = c;
-
-          bytes[1] = *codes++;
-
-          // Rd
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-
-          // Rn
-          c = regval (&ins->oprs[1],1);
-          bytes[1] |= c;
-
-          if (ins->oprs[ins->operands-1].bracket)       // FIXME: Bracket on last operand -> pre-index  <--
-            {
-              bytes[0] |= 0x01;         // pre-index mode
-              if (has_W_code)
-                {
-                  bytes[1] |= 0x20;
-                }
-            }
-          else
-            {
-              if (has_W_code)
-                {
-                  errfunc (ERR_NONFATAL, "'!' not allowed in post-index mode");
-                }
-            }
-
-          bytes[3] = *codes++;
-
-          if (keep == 0x24)
-            {
-              data = ins->oprs[2].offset;
-
-              if (data < 0)
-                {
-                  data = -data;
-                }
-              else
-                {
-                  bytes[1] |= 0x80;
-                }
-
-              if (data >= 0x100)
-                {
-                  errfunc (ERR_NONFATAL, "too long offset");
-                }
-
-              bytes[2] |= ((data & 0xF0) >> 4);
-              bytes[3] |= data & 0xF;
-            }
-          else
-            {
-              if (ins->oprs[2].minus == 0)
-                {
-                  bytes[1] |= 0x80;
-                }
-              c = regval (&ins->oprs[2],1);
-              bytes[3] |= c;
-
-            }
-          break;
-
-        case 0x26:      // LDM/STM Rn, {reg-list}
-          ++codes;
-
-          bytes[0] = c;
-
-          bytes[0] |= ( *codes >> 4) & 0xF;
-          bytes[1] = ( *codes << 4) & 0xF0;
-          ++codes;
-
-          if (has_W_code)
-            {
-              bytes[1] |= 0x20;
-            }
-          if (has_F_code)
-            {
-              bytes[1] |= 0x40;
-            }
-
-          // Rn
-          bytes[1] |= regval (&ins->oprs[0],1);
-
-          data = ins->oprs[1].basereg;
-
-          bytes[2] = ((data >> 8) & 0xFF);
-          bytes[3] = (data & 0xFF);
-
-          break;
-
-        case 0x27:      // SWP Rd, Rm, [Rn]
-          ++codes;
-
-          bytes[0] = c;
-
-          bytes[0] |= *codes++;
-
-          bytes[1] = regval (&ins->oprs[2],1);
-          if (has_B_code)
-            {
-              bytes[1] |= 0x40;
-            }
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-          bytes[3] = *codes++;
-          bytes[3] |= regval (&ins->oprs[1],1);
-          break;
-
-        default:
-          errfunc (ERR_FATAL, "unknown decoding of instruction");
-
-          bytes[0] = c;
-          // And a fix nibble
-          ++codes;
-          bytes[0] |= *codes++;
-
-         if ( *codes == 0x01)           // An I bit
-           {
-
-           }
-         if ( *codes == 0x02)           // An I bit
-           {
-
-           }
-         ++codes;
-      }
-    out (offset, segment, bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
-}
-
-*)
-{$endif dummy}
-
 begin
 begin
   cai_align:=tai_align;
   cai_align:=tai_align;
 end.
 end.

+ 288 - 0
compiler/aarch64/agcpugas.pas

@@ -0,0 +1,288 @@
+{
+    Copyright (c) 2003,2014 by Florian Klaempfl and Jonas Maebe
+
+    This unit implements an asm for AArch64
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+{ This unit implements the GNU Assembler writer for AArch64
+}
+
+unit agcpugas;
+
+{$i fpcdefs.inc}
+
+  interface
+
+    uses
+       globtype,
+       aasmtai,
+       aggas,
+       cpubase,cpuinfo;
+
+    type
+      TAArch64InstrWriter=class(TCPUInstrWriter)
+        procedure WriteInstruction(hp : tai);override;
+      end;
+
+      TAArch64AppleAssembler=class(TAppleGNUassembler)
+        constructor create(smart: boolean); override;
+        function MakeCmdLine: TCmdStr; override;
+      end;
+
+
+    const
+      gas_shiftmode2str : array[tshiftmode] of string[4] = (
+        '','lsl','lsr','asr',
+        'uxtb','uxth','uxtw','uxtx',
+        'sxtb','sxth','sxtw','sxtx');
+
+    const 
+      cputype_to_gas_march : array[tcputype] of string = (
+        '', // cpu_none
+        'armv8'
+      );
+
+  implementation
+
+    uses
+       cutils,globals,verbose,
+       systems,
+       assemble,
+       aasmcpu,
+       itcpugas,
+       cgbase,cgutils;
+
+
+{****************************************************************************}
+{                      Apple AArch64 Assembler writer                        }
+{****************************************************************************}
+
+    constructor TAArch64AppleAssembler.create(smart: boolean);
+      begin
+        inherited create(smart);
+        InstrWriter := TAArch64InstrWriter.create(self);
+      end;
+
+    function TAArch64AppleAssembler.MakeCmdLine: TCmdStr;
+      begin
+        { 'as' calls through to clang for aarch64, and that one only supports
+          reading from standard input in case "-" is specified as input file
+          (in which case you also have to specify the language via -x) }
+        result:=inherited;
+{$ifdef hasunix}
+        if DoPipe then
+          result:=result+' -x assembler -'
+{$endif}
+      end;
+
+
+{****************************************************************************}
+{                  Helper routines for Instruction Writer                    }
+{****************************************************************************}
+
+    function getreferencestring(var ref : treference) : string;
+      const
+        darwin_addrpage2str: array[addr_page..addr_gotpageoffset] of string[11] =
+           ('@PAGE','@PAGEOFF','@GOTPAGE','@GOTPAGEOFF');
+      begin
+        if ref.base=NR_NO then
+          begin
+            case ref.refaddr of
+              addr_gotpage,
+              addr_page,
+              addr_gotpageoffset,
+              addr_pageoffset:
+                begin
+                  if not assigned(ref.symbol) or
+                     (ref.base<>NR_NO) or
+                     (ref.index<>NR_NO) or
+                     (ref.shiftmode<>SM_None) or
+                     (ref.offset<>0) then
+                    internalerror(2014121501);
+                  if target_asm.id=as_darwin then
+                    result:=ref.symbol.name+darwin_addrpage2str[ref.refaddr]
+                  else
+                    { todo }
+                    internalerror(2014121502);
+                end
+              else
+                internalerror(2015022301);
+            end
+          end
+        else
+          begin
+            result:='['+gas_regname(ref.base);
+            if ref.addressmode=AM_POSTINDEXED then
+              result:=result+']';
+            if ref.index<>NR_NO then
+              begin
+                if (ref.offset<>0) or
+                   assigned(ref.symbol) then
+                  internalerror(2014121504);
+                result:=result+', '+gas_regname(ref.index);
+                case ref.shiftmode of
+                  SM_None: ;
+                  SM_LSL,
+                  SM_UXTW, SM_UXTX, SM_SXTW, SM_SXTX:
+                    begin
+                      result:=result+', '+gas_shiftmode2str[ref.shiftmode];
+                      if (ref.shiftmode=SM_LSL) or
+                         (ref.shiftimm<>0) then
+                        result:=result+' #'+tostr(ref.shiftimm);
+                    end
+                  else
+                    internalerror(2014121505);
+                end;
+              end
+            else
+              begin
+                if assigned(ref.symbol) then
+                  begin
+                    case ref.refaddr of
+                      addr_gotpageoffset,
+                      addr_pageoffset:
+                        begin
+                          if target_asm.id=as_darwin then
+                            result:=result+', '+ref.symbol.name+darwin_addrpage2str[ref.refaddr]
+                          else
+                            { todo }
+                            internalerror(2014122510);
+                        end
+                      else
+                        { todo: not yet generated/don't know syntax }
+                        internalerror(2014121506);
+                    end;
+                  end
+                else
+                  begin
+                    if ref.refaddr<>addr_no then
+                      internalerror(2014121506);
+                    if (ref.offset<>0) then
+                      result:=result+', #'+tostr(ref.offset);
+                  end;
+              end;
+            case ref.addressmode of
+              AM_OFFSET:
+                result:=result+']';
+              AM_PREINDEXED:
+                result:=result+']!';
+            end;
+          end;
+      end;
+
+
+    function getopstr(hp: taicpu; opnr: longint; const o: toper): string;
+      begin
+        case o.typ of
+          top_reg:
+            { we cannot yet represent "umov w0, v4.s[0]" or "ins v4.d[0], x1",
+              so for now we use "s4" or "d4" instead -> translate here }
+            if ((hp.opcode=A_INS) or
+                (hp.opcode=A_UMOV)) and
+               (getregtype(hp.oper[opnr]^.reg)=R_MMREGISTER) then
+              begin
+                case getsubreg(hp.oper[opnr]^.reg) of
+                  R_SUBMMS:
+                    getopstr:='v'+tostr(getsupreg(hp.oper[opnr]^.reg))+'.S[0]';
+                  R_SUBMMD:
+                    getopstr:='v'+tostr(getsupreg(hp.oper[opnr]^.reg))+'.D[0]';
+                  else
+                    internalerror(2014122907);
+                end;
+              end
+            else
+              getopstr:=gas_regname(o.reg);
+          top_shifterop:
+            begin
+              getopstr:=gas_shiftmode2str[o.shifterop^.shiftmode];
+              if o.shifterop^.shiftimm<>0 then
+                getopstr:=getopstr+' #'+tostr(o.shifterop^.shiftimm)
+            end;
+          top_const:
+            if o.val>=0 then
+              getopstr:='#'+tostr(o.val)
+            else
+              getopstr:='#0x'+hexStr(o.val,16);
+          top_conditioncode:
+            getopstr:=cond2str[o.cc];
+          top_ref:
+            if is_calljmp(hp.opcode) then
+              begin
+                if o.ref^.refaddr<>addr_full then
+                  internalerror(2014122220);
+                if not assigned(o.ref^.symbol) or
+                   assigned(o.ref^.relsymbol) or
+                   (o.ref^.base<>NR_NO) or
+                   (o.ref^.index<>NR_NO) or
+                   (o.ref^.offset<>0) then
+                  internalerror(2014122221);
+                getopstr:=o.ref^.symbol.name;
+              end
+            else
+              getopstr:=getreferencestring(o.ref^);
+          else
+            internalerror(2014121507);
+        end;
+      end;
+
+
+    procedure TAArch64InstrWriter.WriteInstruction(hp : tai);
+      var
+        op: TAsmOp;
+        s: string;
+        i: byte;
+        sep: string[3];
+      begin
+        op:=taicpu(hp).opcode;
+        s:=#9+gas_op2str[op]+oppostfix2str[taicpu(hp).oppostfix];
+        if taicpu(hp).condition<>C_NONE then
+          s:=s+'.'+cond2str[taicpu(hp).condition];
+        if taicpu(hp).ops<>0 then
+          begin
+            sep:=#9;
+            for i:=0 to taicpu(hp).ops-1 do
+              begin
+                 // debug code
+                 // writeln(s);
+                 // writeln(taicpu(hp).fileinfo.line);
+                 s:=s+sep+getopstr(taicpu(hp),i,taicpu(hp).oper[i]^);
+                 sep:=',';
+              end;
+          end;
+        owner.AsmWriteLn(s);
+      end;
+
+
+    const
+       as_aarch64_gas_darwin_info : tasminfo =
+          (
+            id     : as_darwin;
+            idtxt  : 'AS-Darwin';
+            asmbin : 'as';
+            asmcmd : '-o $OBJ $EXTRAOPT $ASM -arch arm64';
+            supported_targets : [system_aarch64_darwin];
+            flags : [af_needar,af_smartlink_sections,af_supports_dwarf,af_stabs_use_function_absolute_addresses];
+            labelprefix : 'L';
+            comment : '# ';
+            dollarsign: '$';
+          );
+
+
+begin
+  RegisterAssembler(as_aarch64_gas_darwin_info,TAArch64AppleAssembler);
+end.

+ 25 - 7
compiler/aarch64/aoptcpub.pas

@@ -118,17 +118,35 @@ Implementation
     End;
     End;
 
 
 
 
-  function TAoptBaseCpu.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;
+  function TAoptBaseCpu.RegModifiedByInstruction(reg: tregister; p1: tai): boolean;
     var
     var
-      i : Longint;
+      i: longint;
+      preg: tregister;
     begin
     begin
       result:=false;
       result:=false;
       for i:=0 to taicpu(p1).ops-1 do
       for i:=0 to taicpu(p1).ops-1 do
-        if (taicpu(p1).oper[i]^.typ=top_reg) and (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
-          begin
-            result:=true;
-            exit;
-          end;
+        case taicpu(p1).oper[i]^.typ of
+          top_reg:
+            begin
+              preg:=taicpu(p1).oper[i]^.reg;
+              if (getregtype(preg)=getregtype(reg)) and
+                 (getsupreg(preg)=getsupreg(reg)) and
+                 (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
+                begin
+                  result:=true;
+                  exit;
+                end;
+            end;
+          top_ref:
+            begin
+              if (taicpu(p1).oper[i]^.ref^.addressmode<>am_offset) and
+                 (reg=taicpu(p1).oper[i]^.ref^.base) then
+                begin
+                  result:=true;
+                  exit
+                end;
+            end;
+        end;
     end;
     end;
 
 
 End.
 End.

+ 2217 - 0
compiler/aarch64/cgcpu.pas

@@ -0,0 +1,2217 @@
+{
+    Copyright (c) 2014 by Jonas Maebe
+
+    This unit implements the code generator for AArch64
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit cgcpu;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+       globtype,parabase,
+       cgbase,cgutils,cgobj,
+       aasmbase,aasmtai,aasmdata,aasmcpu,
+       cpubase,cpuinfo,
+       node,symconst,SymType,symdef,
+       rgcpu;
+
+    type
+      tcgaarch64=class(tcg)
+       protected
+        { changes register size without adding register allocation info }
+        function makeregsize(reg: tregister; size: tcgsize): tregister; overload;
+       public
+        { simplifies "ref" so it can be used with "op". If "ref" can be used
+          with a different load/Store operation that has the same meaning as the
+          original one, "op" will be replaced with the alternative }
+        procedure make_simple_ref(list:TAsmList; var op: tasmop; size: tcgsize; oppostfix: toppostfix; var ref: treference; preferred_newbasereg: tregister);
+        function getfpuregister(list: TAsmList; size: Tcgsize): Tregister; override;
+        procedure handle_reg_imm12_reg(list: TAsmList; op: Tasmop; size: tcgsize; src: tregister; a: tcgint; dst: tregister; tmpreg: tregister; setflags, usedest: boolean);
+        procedure init_register_allocators;override;
+        procedure done_register_allocators;override;
+        function  getmmregister(list:TAsmList;size:tcgsize):tregister;override;
+        function handle_load_store(list:TAsmList; op: tasmop; size: tcgsize; oppostfix: toppostfix; reg: tregister; ref: treference):treference;
+        procedure a_call_name(list:TAsmList;const s:string; weak: boolean);override;
+        procedure a_call_reg(list:TAsmList;Reg:tregister);override;
+        { General purpose instructions }
+        procedure maybeadjustresult(list: TAsmList; op: topcg; size: tcgsize; dst: tregister);
+        procedure a_op_const_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; reg: tregister);override;
+        procedure a_op_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src, dst: tregister);override;
+        procedure a_op_const_reg_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister);override;
+        procedure a_op_reg_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister);override;
+        procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister; setflags : boolean; var ovloc : tlocation);override;
+        procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister; setflags : boolean; var ovloc : tlocation);override;
+        { move instructions }
+        procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg: tregister);override;
+        procedure a_load_const_ref(list: TAsmList; size: tcgsize; a: tcgint; const ref: treference); override;
+        procedure a_load_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister;const ref: TReference);override;
+        procedure a_load_reg_ref_unaligned(list: TAsmList; fromsize, tosize: tcgsize; register: tregister; const ref: treference); override;
+        procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: TReference; reg: tregister);override;
+        procedure a_load_ref_reg_unaligned(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; register: tregister); override;
+        procedure a_load_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister);override;
+        procedure a_loadaddr_ref_reg(list: TAsmList; const ref: TReference; r: tregister);override;
+        { fpu move instructions (not used, all floating point is vector unit-based) }
+        procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
+        procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
+        procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
+        procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister;shuffle : pmmshuffle);override;
+        procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: TReference; reg: tregister; shuffle: pmmshuffle);override;
+        procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: TReference; shuffle: pmmshuffle);override;
+
+        procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle); override;
+        procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle); override;
+
+        procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle); override;
+
+        procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); override;
+        { comparison operations }
+        procedure a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel);override;
+        procedure a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel);override;
+        procedure a_jmp_always(list: TAsmList; l: TAsmLabel);override;
+        procedure a_jmp_name(list: TAsmList; const s: string);override;
+        procedure a_jmp_cond(list: TAsmList; cond: TOpCmp; l: tasmlabel);{ override;}
+        procedure a_jmp_flags(list: TAsmList; const f: tresflags; l: tasmlabel);override;
+        procedure g_flags2reg(list: TAsmList; size: tcgsize; const f:tresflags; reg: tregister);override;
+        procedure g_overflowcheck(list: TAsmList; const loc: tlocation; def: tdef);override;
+        procedure g_overflowcheck_loc(list: TAsmList; const loc: tlocation; def: tdef; ovloc: tlocation);override;
+        procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);override;
+        procedure g_proc_exit(list: TAsmList; parasize: longint; nostackframe: boolean);override;
+        procedure g_maybe_got_init(list: TAsmList); override;
+        procedure g_restore_registers(list: TAsmList);override;
+        procedure g_save_registers(list: TAsmList);override;
+        procedure g_concatcopy_move(list: TAsmList; const source, dest: treference; len: tcgint);
+        procedure g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);override;
+        procedure g_adjust_self_value(list: TAsmList; procdef: tprocdef; ioffset: tcgint);override;
+       private
+        function save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
+        procedure load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
+      end;
+
+    procedure create_codegen;
+
+    const
+      TOpCG2AsmOpReg: array[topcg] of TAsmOp = (
+        A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NEG,A_MVN,A_ORR,A_ASRV,A_LSLV,A_LSRV,A_SUB,A_EOR,A_NONE,A_RORV
+      );
+      TOpCG2AsmOpImm: array[topcg] of TAsmOp = (
+        A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NEG,A_MVN,A_ORR,A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR
+      );
+      TOpCmp2AsmCond: array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
+        C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI
+      );
+
+
+implementation
+
+  uses
+    globals,verbose,systems,cutils,
+    paramgr,fmodule,
+    symtable,symsym,
+    tgobj,
+    procinfo,cpupi;
+
+
+    procedure tcgaarch64.make_simple_ref(list:TAsmList; var op: tasmop; size: tcgsize; oppostfix: toppostfix; var ref: treference; preferred_newbasereg: tregister);
+      var
+        href: treference;
+        so: tshifterop;
+        accesssize: longint;
+      begin
+        if (ref.base=NR_NO) then
+          begin
+            if ref.shiftmode<>SM_None then
+              internalerror(2014110701);
+            ref.base:=ref.index;
+            ref.index:=NR_NO;
+          end;
+        { no abitrary scale factor support (the generic code doesn't set it,
+          AArch-specific code shouldn't either) }
+        if not(ref.scalefactor in [0,1]) then
+          internalerror(2014111002);
+
+        case simple_ref_type(op,size,oppostfix,ref) of
+          sr_simple:
+            exit;
+          sr_internal_illegal:
+            internalerror(2014121702);
+          sr_complex:
+            { continue } ;
+        end;
+
+        if assigned(ref.symbol) then
+          begin
+            { internal "load symbol" instructions should already be valid }
+            if assigned(ref.symboldata) or
+               (ref.refaddr in [addr_pic,addr_gotpage,addr_gotpageoffset,addr_page,addr_pageoffset]) then
+              internalerror(2014110802);
+            { no relative symbol support (needed) yet }
+            if assigned(ref.relsymbol) then
+              internalerror(2014111001);
+            { on Darwin: load the address from the GOT. There does not appear to
+              be a non-GOT variant. This consists of first loading the address
+              of the page containing the GOT entry for this variable, and then
+              the address of the entry itself from that page (can be relaxed by
+              the linker in case the variable itself can be stored directly in
+              the GOT) }
+            if target_info.system in systems_darwin then
+              begin
+                if (preferred_newbasereg=NR_NO) or
+                   (ref.base=preferred_newbasereg) or
+                   (ref.index=preferred_newbasereg) then
+                  preferred_newbasereg:=getaddressregister(list);
+                { load the (GOT) page }
+                reference_reset_symbol(href,ref.symbol,0,8);
+                if ((ref.symbol.typ in [AT_FUNCTION,AT_LABEL]) and
+                    (ref.symbol.bind in [AB_LOCAL,AB_GLOBAL])) or
+                   ((ref.symbol.typ=AT_DATA) and
+                    (ref.symbol.bind=AB_LOCAL)) then
+                  href.refaddr:=addr_page
+                else
+                  href.refaddr:=addr_gotpage;
+                list.concat(taicpu.op_reg_ref(A_ADRP,preferred_newbasereg,href));
+                { load the GOT entry (= address of the variable) }
+                reference_reset_base(href,preferred_newbasereg,0,sizeof(pint));
+                href.symbol:=ref.symbol;
+                { code symbols defined in the current compilation unit do not
+                  have to be accessed via the GOT }
+                if ((ref.symbol.typ in [AT_FUNCTION,AT_LABEL]) and
+                    (ref.symbol.bind in [AB_LOCAL,AB_GLOBAL])) or
+                   ((ref.symbol.typ=AT_DATA) and
+                    (ref.symbol.bind=AB_LOCAL)) then
+                  begin
+                    href.base:=NR_NO;
+                    href.refaddr:=addr_pageoffset;
+                    list.concat(taicpu.op_reg_reg_ref(A_ADD,preferred_newbasereg,preferred_newbasereg,href));
+                  end
+                else
+                  begin
+                    href.refaddr:=addr_gotpageoffset;
+                    { use a_load_ref_reg() rather than directly encoding the LDR,
+                      so that we'll check the validity of the reference }
+                    a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,preferred_newbasereg);
+                  end;
+                { set as new base register }
+                if ref.base=NR_NO then
+                  ref.base:=preferred_newbasereg
+                else if ref.index=NR_NO then
+                  ref.index:=preferred_newbasereg
+                else
+                  begin
+                    { make sure it's valid in case ref.base is SP -> make it
+                      the second operand}
+                    a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,preferred_newbasereg,ref.base,preferred_newbasereg);
+                    ref.base:=preferred_newbasereg
+                  end;
+                ref.symbol:=nil;
+              end
+            else
+              { todo }
+              internalerror(2014111003);
+          end;
+
+        { base & index }
+        if (ref.base<>NR_NO) and
+           (ref.index<>NR_NO) then
+          begin
+            case op of
+              A_LDR, A_STR:
+                begin
+                  if (ref.shiftmode=SM_None) and
+                     (ref.shiftimm<>0) then
+                    internalerror(2014110805);
+                  { wrong shift? (possible in case of something like
+                     array_of_2byte_rec[x].bytefield -> shift will be set 1, but
+                     the final load is a 1 byte -> can't use shift after all }
+                  if (ref.shiftmode in [SM_LSL,SM_UXTW,SM_SXTW]) and
+                     ((ref.shiftimm<>BsfDWord(tcgsizep2size[size])) or
+                      (ref.offset<>0)) then
+                    begin
+                      if preferred_newbasereg=NR_NO then
+                        preferred_newbasereg:=getaddressregister(list);
+                      { "add" supports a superset of the shift modes supported by
+                        load/store instructions }
+                      shifterop_reset(so);
+                      so.shiftmode:=ref.shiftmode;
+                      so.shiftimm:=ref.shiftimm;
+                      list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,preferred_newbasereg,ref.base,ref.index,so));
+                      reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.alignment);
+                      { possibly still an invalid offset -> fall through }
+                    end
+                  else if ref.offset<>0 then
+                    begin
+                      if (preferred_newbasereg=NR_NO) or
+                         { we keep ref.index, so it must not be overwritten }
+                         (ref.index=preferred_newbasereg) then
+                        preferred_newbasereg:=getaddressregister(list);
+                      { add to the base and not to the index, because the index
+                        may be scaled; this works even if the base is SP }
+                      a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
+                      ref.offset:=0;
+                      ref.base:=preferred_newbasereg;
+                      { finished }
+                      exit;
+                    end
+                  else
+                    { valid -> exit }
+                    exit;
+                end;
+              { todo }
+              A_LD1,A_LD2,A_LD3,A_LD4,
+              A_ST1,A_ST2,A_ST3,A_ST4:
+                internalerror(2014110704);
+              { these don't support base+index }
+              A_LDUR,A_STUR,
+              A_LDP,A_STP:
+                begin
+                  { these either don't support pre-/post-indexing, or don't
+                    support it with base+index }
+                  if ref.addressmode<>AM_OFFSET then
+                    internalerror(2014110911);
+                  if preferred_newbasereg=NR_NO then
+                    preferred_newbasereg:=getaddressregister(list);
+                  if ref.shiftmode<>SM_None then
+                    begin
+                      { "add" supports a superset of the shift modes supported by
+                        load/store instructions }
+                      shifterop_reset(so);
+                      so.shiftmode:=ref.shiftmode;
+                      so.shiftimm:=ref.shiftimm;
+                      list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,preferred_newbasereg,ref.base,ref.index,so));
+                    end
+                  else
+                    a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,ref.index,ref.base,preferred_newbasereg);
+                  reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.alignment);
+                  { fall through to the handling of base + offset, since the
+                    offset may still be too big }
+                end;
+              else
+                internalerror(2014110901);
+            end;
+          end;
+
+        { base + offset }
+        if ref.base<>NR_NO then
+          begin
+            { valid offset for LDUR/STUR -> use that }
+            if (ref.addressmode=AM_OFFSET) and
+               (op in [A_LDR,A_STR]) and
+               (ref.offset>=-256) and
+               (ref.offset<=255) then
+              begin
+                if op=A_LDR then
+                  op:=A_LDUR
+                else
+                  op:=A_STUR
+              end
+            { if it's not a valid LDUR/STUR, use LDR/STR }
+            else if (op in [A_LDUR,A_STUR]) and
+               ((ref.offset<-256) or
+                (ref.offset>255) or
+                (ref.addressmode<>AM_OFFSET)) then
+              begin
+                if op=A_LDUR then
+                  op:=A_LDR
+                else
+                  op:=A_STR
+              end;
+            case op of
+              A_LDR,A_STR:
+                begin
+                  case ref.addressmode of
+                    AM_PREINDEXED:
+                      begin
+                        { since the loaded/stored register cannot be the same
+                          as the base register, we can safely add the
+                          offset to the base if it doesn't fit}
+                        if (ref.offset<-256) or
+                            (ref.offset>255) then
+                          begin
+                            a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base);
+                            ref.offset:=0;
+                          end;
+                      end;
+                    AM_POSTINDEXED:
+                      begin
+                        { cannot emulate post-indexing if we have to fold the
+                          offset into the base register }
+                        if (ref.offset<-256) or
+                            (ref.offset>255) then
+                          internalerror(2014110909);
+                        { ok }
+                      end;
+                    AM_OFFSET:
+                      begin
+                        { unsupported offset -> fold into base register }
+                        accesssize:=1 shl tcgsizep2size[size];
+                        if (ref.offset<0) or
+                           (ref.offset>(((1 shl 12)-1)*accesssize)) or
+                           ((ref.offset mod accesssize)<>0) then
+                          begin
+                            if preferred_newbasereg=NR_NO then
+                              preferred_newbasereg:=getaddressregister(list);
+                            { can we split the offset beween an
+                              "add/sub (imm12 shl 12)" and the load (also an
+                              imm12)?
+                              -- the offset from the load will always be added,
+                                that's why the lower bound has a smaller range
+                                than the upper bound; it must also be a multiple
+                                of the access size }
+                            if (ref.offset>=-(((1 shl 12)-1) shl 12)) and
+                               (ref.offset<=((1 shl 12)-1) shl 12 + ((1 shl 12)-1)) and
+                               ((ref.offset mod accesssize)=0) then
+                              begin
+                                a_op_const_reg_reg(list,OP_ADD,OS_ADDR,(ref.offset shr 12) shl 12,ref.base,preferred_newbasereg);
+                                ref.offset:=ref.offset-(ref.offset shr 12) shl 12;
+                              end
+                            else
+                              begin
+                                a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
+                                ref.offset:=0;
+                              end;
+                            reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.alignment);
+                          end;
+                      end
+                    else
+                      internalerror(2014110904);
+                  end;
+                end;
+              A_LDP,A_STP:
+                begin
+                  { unsupported offset -> fold into base register (these
+                    instructions support all addressmodes) }
+                  if (ref.offset<-(1 shl (6+tcgsizep2size[size]))) or
+                     (ref.offset>(1 shl (6+tcgsizep2size[size]))-1) then
+                    begin
+                      case ref.addressmode of
+                        AM_POSTINDEXED:
+                          { don't emulate post-indexing if we have to fold the
+                            offset into the base register }
+                          internalerror(2014110910);
+                        AM_PREINDEXED:
+                          { this means the offset must be added to the current
+                            base register }
+                          preferred_newbasereg:=ref.base;
+                        AM_OFFSET:
+                          if preferred_newbasereg=NR_NO then
+                            preferred_newbasereg:=getaddressregister(list);
+                      end;
+                      a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
+                      reference_reset_base(ref,preferred_newbasereg,0,ref.alignment);
+                    end
+                end;
+              A_LDUR,A_STUR:
+                begin
+                  { valid, checked above }
+                end;
+              { todo }
+              A_LD1,A_LD2,A_LD3,A_LD4,
+              A_ST1,A_ST2,A_ST3,A_ST4:
+                internalerror(2014110908);
+              else
+                internalerror(2014110708);
+            end;
+            { done }
+            exit;
+          end;
+
+        { only an offset -> change to base (+ offset 0) }
+        if preferred_newbasereg=NR_NO then
+          preferred_newbasereg:=getaddressregister(list);
+        a_load_const_reg(list,OS_ADDR,ref.offset,preferred_newbasereg);
+        reference_reset_base(ref,preferred_newbasereg,0,newalignment(8,ref.offset));
+      end;
+
+
+    function tcgaarch64.makeregsize(reg: tregister; size: tcgsize): tregister;
+      var
+        subreg:Tsubregister;
+      begin
+        subreg:=cgsize2subreg(getregtype(reg),size);
+        result:=reg;
+        setsubreg(result,subreg);
+      end;
+
+
+    function tcgaarch64.getfpuregister(list: TAsmList; size: Tcgsize): Tregister;
+      begin
+        internalerror(2014122110);
+        { squash warning }
+        result:=NR_NO;
+      end;
+
+
+    function tcgaarch64.handle_load_store(list: TAsmList; op: tasmop; size: tcgsize; oppostfix: toppostfix; reg: tregister; ref: treference):treference;
+      begin
+        make_simple_ref(list,op,size,oppostfix,ref,NR_NO);
+        list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
+        result:=ref;
+      end;
+
+
+    procedure tcgaarch64.handle_reg_imm12_reg(list: TAsmList; op: Tasmop; size: tcgsize; src: tregister; a: tcgint; dst: tregister; tmpreg: tregister; setflags, usedest: boolean);
+      var
+        instr: taicpu;
+        so: tshifterop;
+        hadtmpreg: boolean;
+      begin
+        { imm12 }
+        if (a>=0) and
+           (a<=((1 shl 12)-1)) then
+          if usedest then
+            instr:=taicpu.op_reg_reg_const(op,dst,src,a)
+          else
+            instr:=taicpu.op_reg_const(op,src,a)
+        { imm12 lsl 12 }
+        else if (a and not(((tcgint(1) shl 12)-1) shl 12))=0 then
+          begin
+            so.shiftmode:=SM_LSL;
+            so.shiftimm:=12;
+            if usedest then
+              instr:=taicpu.op_reg_reg_const_shifterop(op,dst,src,a shr 12,so)
+            else
+              instr:=taicpu.op_reg_const_shifterop(op,src,a shr 12,so)
+          end
+        else
+          begin
+            { todo: other possible optimizations (e.g. load 16 bit constant in
+                register and then add/sub/cmp/cmn shifted the rest) }
+            if tmpreg=NR_NO then
+              begin
+                hadtmpreg:=false;
+                tmpreg:=getintregister(list,size);
+              end
+            else
+              begin
+                hadtmpreg:=true;
+                getcpuregister(list,tmpreg);
+              end;
+            a_load_const_reg(list,size,a,tmpreg);
+            if usedest then
+              instr:=taicpu.op_reg_reg_reg(op,dst,src,tmpreg)
+            else
+              instr:=taicpu.op_reg_reg(op,src,tmpreg);
+            if hadtmpreg then
+              ungetcpuregister(list,tmpreg);
+          end;
+        if setflags then
+          setoppostfix(instr,PF_S);
+        list.concat(instr);
+      end;
+
+
+{****************************************************************************
+                              Assembler code
+****************************************************************************}
+
+    procedure tcgaarch64.init_register_allocators;
+      begin
+        inherited init_register_allocators;
+
+        rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
+            [RS_X0,RS_X1,RS_X2,RS_X3,RS_X4,RS_X5,RS_X6,RS_X7,RS_X8,
+             RS_X9,RS_X10,RS_X11,RS_X12,RS_X13,RS_X14,RS_X15,RS_X16,RS_X17,
+             RS_X19,RS_X20,RS_X21,RS_X22,RS_X23,RS_X24,RS_X25,RS_X26,RS_X27,RS_X28
+             { maybe we can enable this in the future for leaf functions (it's
+               the frame pointer)
+             ,RS_X29 }],
+            first_int_imreg,[]);
+
+        rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBMMD,
+            [RS_Q0,RS_Q1,RS_Q2,RS_Q3,RS_Q4,RS_Q5,RS_Q6,RS_Q7,
+             RS_Q8,RS_Q9,RS_Q10,RS_Q11,RS_Q12,RS_Q13,RS_Q14,RS_Q15,
+             RS_Q16,RS_Q17,RS_Q18,RS_Q19,RS_Q20,RS_Q21,RS_Q22,RS_Q23,
+             RS_Q24,RS_Q25,RS_Q26,RS_Q27,RS_Q28,RS_Q29,RS_Q30,RS_Q31],
+            first_mm_imreg,[]);
+      end;
+
+
+    procedure tcgaarch64.done_register_allocators;
+      begin
+        rg[R_INTREGISTER].free;
+        rg[R_FPUREGISTER].free;
+        rg[R_MMREGISTER].free;
+        inherited done_register_allocators;
+      end;
+
+
+    function tcgaarch64.getmmregister(list: TAsmList; size: tcgsize):tregister;
+      begin
+        case size of
+          OS_F32:
+            result:=rg[R_MMREGISTER].getregister(list,R_SUBMMS);
+          OS_F64:
+            result:=rg[R_MMREGISTER].getregister(list,R_SUBMMD)
+          else
+            internalerror(2014102701);
+        end;
+      end;
+
+
+    procedure tcgaarch64.a_call_name(list: TAsmList; const s: string; weak: boolean);
+      begin
+        if not weak then
+          list.concat(taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(s)))
+        else
+          list.concat(taicpu.op_sym(A_BL,current_asmdata.WeakRefAsmSymbol(s)));
+      end;
+
+
+    procedure tcgaarch64.a_call_reg(list:TAsmList;Reg:tregister);
+      begin
+        list.concat(taicpu.op_reg(A_BLR,reg));
+      end;
+
+
+    {********************** load instructions ********************}
+
+    procedure tcgaarch64.a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg : tregister);
+      var
+        preva: tcgint;
+        opc: tasmop;
+        shift,maxshift: byte;
+        so: tshifterop;
+        reginited: boolean;
+        mask: tcgint;
+      begin
+        { if we load a value into a 32 bit register, it is automatically
+          zero-extended to 64 bit }
+        if (high(a)=0) and
+           (size in [OS_64,OS_S64]) then
+          begin
+            size:=OS_32;
+            reg:=makeregsize(reg,size);
+          end;
+        { values <= 32 bit are stored in a 32 bit register }
+        if not(size in [OS_64,OS_S64]) then
+          a:=cardinal(a);
+
+        if size in [OS_64,OS_S64] then
+          begin
+            mask:=-1;
+            maxshift:=64;
+          end
+        else
+          begin
+            mask:=$ffffffff;
+            maxshift:=32;
+          end;
+        { single movn enough? (to be extended) }
+        shift:=16;
+        preva:=a;
+        repeat
+          if (a shr shift)=(mask shr shift) then
+            begin
+              if shift=16 then
+                list.concat(taicpu.op_reg_const(A_MOVN,reg,not(word(preva))))
+              else
+                begin
+                  shifterop_reset(so);
+                  so.shiftmode:=SM_LSL;
+                  so.shiftimm:=shift-16;
+                  list.concat(taicpu.op_reg_const_shifterop(A_MOVN,reg,not(word(preva)),so));
+                end;
+              exit;
+            end;
+          { only try the next 16 bits if the current one is all 1 bits, since
+            the movn will set all lower bits to 1 }
+          if word(a shr (shift-16))<>$ffff then
+            break;
+          inc(shift,16);
+        until shift=maxshift;
+        reginited:=false;
+        shift:=0;
+        { can be optimized later to use more movn }
+        repeat
+          { leftover is shifterconst? (don't check if we can represent it just
+            as effectively with movz/movk, as this check is expensive) }
+          if ((shift<tcgsize2size[size]*(8 div 2)) and
+              (word(a)<>0) and
+              ((a shr 16)<>0)) and
+             is_shifter_const(a shl shift,size) then
+            begin
+              if reginited then
+                list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,a shl shift))
+              else
+                list.concat(taicpu.op_reg_reg_const(A_ORR,reg,makeregsize(NR_XZR,size),a shl shift));
+              exit;
+            end;
+          { set all 16 bit parts <> 0 }
+          if (word(a)<>0) or
+             ((shift=0) and
+              (a=0)) then
+            if shift=0 then
+              begin
+                list.concat(taicpu.op_reg_const(A_MOVZ,reg,word(a)));
+                reginited:=true;
+              end
+            else
+              begin
+                shifterop_reset(so);
+                so.shiftmode:=SM_LSL;
+                so.shiftimm:=shift;
+                if not reginited then
+                  begin
+                    opc:=A_MOVZ;
+                    reginited:=true;
+                  end
+                else
+                  opc:=A_MOVK;
+                list.concat(taicpu.op_reg_const_shifterop(opc,reg,word(a),so));
+              end;
+            preva:=a;
+            a:=a shr 16;
+           inc(shift,16);
+        until word(preva)=preva;
+        if not reginited then
+          internalerror(2014102702);
+      end;
+
+
+    procedure tcgaarch64.a_load_const_ref(list: TAsmList; size: tcgsize; a: tcgint; const ref: treference);
+      var
+        reg: tregister;
+      begin
+        { use the zero register if possible }
+        if a=0 then
+          begin
+            if size in [OS_64,OS_S64] then
+              reg:=NR_XZR
+            else
+              reg:=NR_WZR;
+            a_load_reg_ref(list,size,size,reg,ref);
+          end
+        else
+          inherited;
+      end;
+
+
+    procedure tcgaarch64.a_load_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
+      var
+        oppostfix:toppostfix;
+        hreg: tregister;
+      begin
+        if tcgsize2Size[fromsize]>=tcgsize2Size[tosize] then
+          fromsize:=tosize
+        { have a 32 bit register but need a 64 bit one? }
+        else if tosize in [OS_64,OS_S64] then
+          begin
+            { sign extend if necessary }
+            if fromsize in [OS_S8,OS_S16,OS_S32] then
+              begin
+                { can't overwrite reg, may be a constant reg }
+                hreg:=getintregister(list,tosize);
+                a_load_reg_reg(list,fromsize,tosize,reg,hreg);
+                reg:=hreg;
+              end
+            else
+              { top 32 bit are zero by default }
+              reg:=makeregsize(reg,OS_64);
+            fromsize:=tosize;
+          end;
+        if (ref.alignment<>0) and
+           (ref.alignment<tcgsize2size[tosize]) then
+          begin
+            a_load_reg_ref_unaligned(list,fromsize,tosize,reg,ref);
+          end
+        else
+          begin
+            case tosize of
+              { signed integer registers }
+              OS_8,
+              OS_S8:
+                oppostfix:=PF_B;
+              OS_16,
+              OS_S16:
+                oppostfix:=PF_H;
+              OS_32,
+              OS_S32,
+              OS_64,
+              OS_S64:
+                oppostfix:=PF_None;
+              else
+                InternalError(200308299);
+            end;
+            handle_load_store(list,A_STR,tosize,oppostfix,reg,ref);
+          end;
+      end;
+
+
+    procedure tcgaarch64.a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister);
+      var
+        oppostfix:toppostfix;
+      begin
+        if tcgsize2Size[fromsize]>=tcgsize2Size[tosize] then
+          fromsize:=tosize;
+        { ensure that all bits of the 32/64 register are always correctly set:
+           * default behaviour is always to zero-extend to the entire (64 bit)
+             register -> unsigned 8/16/32 bit loads only exist with a 32 bit
+             target register, as the upper 32 bit will be zeroed implicitly
+             -> always make target register 32 bit
+           * signed loads exist both with 32 and 64 bit target registers,
+             depending on whether the value should be sign extended to 32 or
+             to 64 bit (if sign extended to 32 bit, the upper 32 bits of the
+             corresponding 64 bit register are again zeroed) -> no need to
+             change anything (we only have 32 and 64 bit registers), except that
+             when loading an OS_S32 to a 32 bit register, we don't need/can't
+             use sign extension
+        }
+        if fromsize in [OS_8,OS_16,OS_32] then
+          reg:=makeregsize(reg,OS_32);
+        if (ref.alignment<>0) and
+           (ref.alignment<tcgsize2size[fromsize]) then
+          begin
+            a_load_ref_reg_unaligned(list,fromsize,tosize,ref,reg);
+            exit;
+          end;
+        case fromsize of
+          { signed integer registers }
+          OS_8:
+            oppostfix:=PF_B;
+          OS_S8:
+            oppostfix:=PF_SB;
+          OS_16:
+            oppostfix:=PF_H;
+          OS_S16:
+            oppostfix:=PF_SH;
+          OS_S32:
+            if getsubreg(reg)=R_SUBD then
+              oppostfix:=PF_NONE
+            else
+              oppostfix:=PF_SW;
+          OS_32,
+          OS_64,
+          OS_S64:
+            oppostfix:=PF_None;
+          else
+            InternalError(200308297);
+        end;
+        handle_load_store(list,A_LDR,fromsize,oppostfix,reg,ref);
+
+        { clear upper 16 bits if the value was negative }
+        if (fromsize=OS_S8) and (tosize=OS_16) then
+          a_load_reg_reg(list,fromsize,tosize,reg,reg);
+      end;
+
+
+    procedure tcgaarch64.a_load_ref_reg_unaligned(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; register: tregister);
+      var
+        href: treference;
+        hreg1, hreg2, tmpreg: tregister;
+      begin
+        if fromsize in [OS_64,OS_S64] then
+          begin
+            { split into two 32 bit loads }
+            hreg1:=makeregsize(register,OS_32);
+            hreg2:=getintregister(list,OS_32);
+            if target_info.endian=endian_big then
+              begin
+                tmpreg:=hreg1;
+                hreg1:=hreg2;
+                hreg2:=tmpreg;
+              end;
+            { can we use LDP? }
+            if (ref.alignment=4) and
+               (simple_ref_type(A_LDP,OS_32,PF_None,ref)=sr_simple) then
+              list.concat(taicpu.op_reg_reg_ref(A_LDP,hreg1,hreg2,ref))
+            else
+              begin
+                a_load_ref_reg(list,OS_32,OS_32,ref,hreg1);
+                href:=ref;
+                inc(href.offset,4);
+                a_load_ref_reg(list,OS_32,OS_32,href,hreg2);
+              end;
+            list.concat(taicpu.op_reg_reg_const_const(A_BFI,register,makeregsize(hreg2,OS_64),32,32));
+          end
+       else
+         inherited;
+      end;
+
+
+    procedure tcgaarch64.a_load_reg_reg(list:TAsmList;fromsize,tosize:tcgsize;reg1,reg2:tregister);
+      var
+        instr: taicpu;
+      begin
+        { we use both 32 and 64 bit registers -> insert conversion when when
+          we have to truncate/sign extend inside the (32 or 64 bit) register
+          holding the value, and when we sign extend from a 32 to a 64 bit
+          register }
+        if (tcgsize2size[fromsize]>tcgsize2size[tosize]) or
+           ((tcgsize2size[fromsize]=tcgsize2size[tosize]) and
+            (fromsize<>tosize) and
+            not(fromsize in [OS_32,OS_S32,OS_64,OS_S64])) or
+           ((fromsize in [OS_S8,OS_S16,OS_S32]) and
+            (tosize in [OS_64,OS_S64])) or
+           { needs to mask out the sign in the top 16 bits }
+           ((fromsize=OS_S8) and
+            (tosize=OS_16)) then
+          begin
+            case tosize of
+              OS_8:
+                list.concat(setoppostfix(taicpu.op_reg_reg(A_UXT,reg2,makeregsize(reg1,OS_32)),PF_B));
+              OS_16:
+                list.concat(setoppostfix(taicpu.op_reg_reg(A_UXT,reg2,makeregsize(reg1,OS_32)),PF_H));
+              OS_S8:
+                list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_B));
+              OS_S16:
+                list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_H));
+              { while "mov wN, wM" automatically inserts a zero-extension and
+                hence we could encode a 64->32 bit move like that, the problem
+                is that we then can't distinguish 64->32 from 32->32 moves, and
+                the 64->32 truncation could be removed altogether... So use a
+                different instruction }
+              OS_32,
+              OS_S32:
+                { in theory, reg1 should be 64 bit here (since fromsize>tosize),
+                  but because of the way location_force_register() tries to
+                  avoid superfluous zero/sign extensions, it's not always the
+                  case -> also force reg1 to to 64 bit }
+                list.concat(taicpu.op_reg_reg_const_const(A_UBFIZ,makeregsize(reg2,OS_64),makeregsize(reg1,OS_64),0,32));
+              OS_64,
+              OS_S64:
+                list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_W));
+              else
+                internalerror(2002090901);
+            end;
+          end
+        else
+          begin
+            { 32 -> 32 bit move implies zero extension (sign extensions have
+              been handled above) -> also use for 32 <-> 64 bit moves }
+            if not(fromsize in [OS_64,OS_S64]) or
+               not(tosize in [OS_64,OS_S64]) then
+              instr:=taicpu.op_reg_reg(A_MOV,makeregsize(reg2,OS_32),makeregsize(reg1,OS_32))
+            else
+              instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
+            list.Concat(instr);
+            { Notify the register allocator that we have written a move instruction so
+             it can try to eliminate it. }
+            add_move_instruction(instr);
+          end;
+      end;
+
+
+    procedure tcgaarch64.a_loadaddr_ref_reg(list: TAsmList; const ref: treference; r: tregister);
+      var
+         href: treference;
+         so: tshifterop;
+         op: tasmop;
+      begin
+        op:=A_LDR;
+        href:=ref;
+        { simplify as if we're going to perform a regular 64 bit load, using
+          "r" as the new base register if possible/necessary }
+        make_simple_ref(list,op,OS_ADDR,PF_None,href,r);
+        { load literal? }
+        if assigned(href.symbol) then
+          begin
+            if (href.base<>NR_NO) or
+               (href.index<>NR_NO) or
+               not assigned(href.symboldata) then
+              internalerror(2014110912);
+            list.concat(taicpu.op_reg_sym_ofs(A_ADR,r,href.symbol,href.offset));
+          end
+        else
+          begin
+            if href.index<>NR_NO then
+              begin
+                if href.shiftmode<>SM_None then
+                  begin
+                    { "add" supports a supperset of the shift modes supported by
+                      load/store instructions }
+                    shifterop_reset(so);
+                    so.shiftmode:=href.shiftmode;
+                    so.shiftimm:=href.shiftimm;
+                    list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,r,href.base,href.index,so));
+                  end
+                else
+                  a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,href.index,href.base,r);
+              end
+            else if href.offset<>0 then
+              a_op_const_reg_reg(list,OP_ADD,OS_ADDR,href.offset,href.base,r)
+            else
+              a_load_reg_reg(list,OS_ADDR,OS_ADDR,href.base,r);
+          end;
+      end;
+
+
+    procedure tcgaarch64.a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister);
+      begin
+        internalerror(2014122107)
+      end;
+
+
+    procedure tcgaarch64.a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister);
+      begin
+        internalerror(2014122108)
+      end;
+
+
+    procedure tcgaarch64.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
+      begin
+        internalerror(2014122109)
+      end;
+
+
+    procedure tcgaarch64.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
+      var
+        instr: taicpu;
+      begin
+        if assigned(shuffle) and
+           not shufflescalar(shuffle) then
+          internalerror(2014122104);
+        if fromsize=tosize then
+          begin
+            instr:=taicpu.op_reg_reg(A_FMOV,reg2,reg1);
+            { Notify the register allocator that we have written a move
+              instruction so it can try to eliminate it. }
+            add_move_instruction(instr);
+          end
+        else
+          begin
+            if (reg_cgsize(reg1)<>fromsize) or
+               (reg_cgsize(reg2)<>tosize) then
+              internalerror(2014110913);
+            instr:=taicpu.op_reg_reg(A_FCVT,reg2,reg1);
+          end;
+        list.Concat(instr);
+      end;
+
+
+    procedure tcgaarch64.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
+       var
+         tmpreg: tregister;
+       begin
+         if assigned(shuffle) and
+            not shufflescalar(shuffle) then
+           internalerror(2014122105);
+         tmpreg:=NR_NO;
+         if (fromsize<>tosize) then
+           begin
+             tmpreg:=reg;
+             reg:=getmmregister(list,fromsize);
+           end;
+         handle_load_store(list,A_LDR,fromsize,PF_None,reg,ref);
+         if (fromsize<>tosize) then
+           a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpreg,nil);
+       end;
+
+
+     procedure tcgaarch64.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
+       var
+         tmpreg: tregister;
+       begin
+         if assigned(shuffle) and
+            not shufflescalar(shuffle) then
+           internalerror(2014122106);
+         if (fromsize<>tosize) then
+           begin
+             tmpreg:=getmmregister(list,tosize);
+             a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpreg,nil);
+             reg:=tmpreg;
+           end;
+         handle_load_store(list,A_STR,tosize,PF_NONE,reg,ref);
+       end;
+
+
+     procedure tcgaarch64.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
+       begin
+         if not shufflescalar(shuffle) then
+           internalerror(2014122801);
+         if not(tcgsize2size[fromsize] in [4,8]) or
+            (tcgsize2size[fromsize]<>tcgsize2size[tosize]) then
+           internalerror(2014122803);
+         list.concat(taicpu.op_reg_reg(A_INS,mmreg,intreg));
+       end;
+
+
+     procedure tcgaarch64.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
+       begin
+         if not shufflescalar(shuffle) then
+           internalerror(2014122802);
+         if not(tcgsize2size[fromsize] in [4,8]) or
+            (tcgsize2size[fromsize]<>tcgsize2size[tosize]) then
+           internalerror(2014122804);
+         list.concat(taicpu.op_reg_reg(A_UMOV,intreg,mmreg));
+       end;
+
+
+    procedure tcgaarch64.a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle);
+      begin
+        case op of
+          { "xor Vx,Vx" is used to initialize global regvars to 0 }
+          OP_XOR:
+            begin
+              if (src<>dst) or
+                 (reg_cgsize(src)<>size) or
+                 assigned(shuffle) then
+                internalerror(2015011401);
+              case size of
+                OS_F32,
+                OS_F64:
+                  list.concat(taicpu.op_reg_const(A_MOVI,makeregsize(dst,OS_F64),0));
+                else
+                  internalerror(2015011402);
+              end;
+            end
+          else
+            internalerror(2015011403);
+        end;
+      end;
+
+
+    procedure tcgaarch64.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister);
+      var
+        bitsize,
+        signbit: longint;
+      begin
+        if srcsize in [OS_64,OS_S64] then
+          begin
+            bitsize:=64;
+            signbit:=6;
+          end
+        else
+          begin
+            bitsize:=32;
+            signbit:=5;
+          end;
+        { source is 0 -> dst will have to become 255 }
+        list.concat(taicpu.op_reg_const(A_CMP,src,0));
+        if reverse then
+          begin
+            list.Concat(taicpu.op_reg_reg(A_CLZ,makeregsize(dst,srcsize),src));
+            { xor 31/63 is the same as setting the lower 5/6 bits to
+              "31/63-(lower 5/6 bits of dst)" }
+            list.Concat(taicpu.op_reg_reg_const(A_EOR,dst,dst,bitsize-1));
+          end
+        else
+          begin
+            list.Concat(taicpu.op_reg_reg(A_RBIT,makeregsize(dst,srcsize),src));
+            list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
+          end;
+        { set dst to -1 if src was 0 }
+        list.Concat(taicpu.op_reg_reg_reg_cond(A_CSINV,dst,dst,makeregsize(NR_XZR,dstsize),C_NE));
+        { mask the -1 to 255 if src was 0 (anyone find a two-instruction
+          branch-free version? All of mine are 3...) }
+        list.Concat(setoppostfix(taicpu.op_reg_reg(A_UXT,dst,dst),PF_B));
+      end;
+
+
+    procedure tcgaarch64.a_load_reg_ref_unaligned(list: TAsmList; fromsize, tosize: tcgsize; register: tregister; const ref: treference);
+      var
+        href: treference;
+        hreg1, hreg2, tmpreg: tregister;
+      begin
+        if fromsize in [OS_64,OS_S64] then
+          begin
+            { split into two 32 bit stores }
+            hreg1:=makeregsize(register,OS_32);
+            hreg2:=getintregister(list,OS_32);
+            a_op_const_reg_reg(list,OP_SHR,OS_64,32,register,makeregsize(hreg2,OS_64));
+            if target_info.endian=endian_big then
+              begin
+                tmpreg:=hreg1;
+                hreg1:=hreg2;
+                hreg2:=tmpreg;
+              end;
+            { can we use STP? }
+            if (ref.alignment=4) and
+               (simple_ref_type(A_STP,OS_32,PF_None,ref)=sr_simple) then
+              list.concat(taicpu.op_reg_reg_ref(A_STP,hreg1,hreg2,ref))
+            else
+              begin
+                a_load_reg_ref(list,OS_32,OS_32,hreg1,ref);
+                href:=ref;
+                inc(href.offset,4);
+                a_load_reg_ref(list,OS_32,OS_32,hreg2,href);
+              end;
+          end
+       else
+         inherited;
+      end;
+
+
+    procedure tcgaarch64.maybeadjustresult(list: TAsmList; op: topcg; size: tcgsize; dst: tregister);
+      const
+        overflowops = [OP_MUL,OP_IMUL,OP_SHL,OP_ADD,OP_SUB,OP_NOT,OP_NEG];
+      begin
+        if (op in overflowops) and
+           (size in [OS_8,OS_S8,OS_16,OS_S16]) then
+          a_load_reg_reg(list,OS_32,size,makeregsize(dst,OS_32),makeregsize(dst,OS_32))
+      end;
+
+
+    procedure tcgaarch64.a_op_const_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; reg: tregister);
+      begin
+        optimize_op_const(size,op,a);
+        case op of
+          OP_NONE:
+            exit;
+          OP_MOVE:
+            a_load_const_reg(list,size,a,reg);
+          OP_NEG,OP_NOT:
+            internalerror(200306011);
+          else
+            a_op_const_reg_reg(list,op,size,a,reg,reg);
+        end;
+      end;
+
+
+    procedure tcgaarch64.a_op_reg_reg(list:TAsmList;op:topcg;size:tcgsize;src,dst:tregister);
+      begin
+        Case op of
+          OP_NEG,
+          OP_NOT:
+            begin
+              list.concat(taicpu.op_reg_reg(TOpCG2AsmOpReg[op],dst,src));
+              maybeadjustresult(list,op,size,dst);
+            end
+          else
+            a_op_reg_reg_reg(list,op,size,src,dst,dst);
+        end;
+      end;
+
+
+    procedure tcgaarch64.a_op_const_reg_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister);
+      var
+        l: tlocation;
+      begin
+        a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,l);
+      end;
+
+
+    procedure tcgaarch64.a_op_reg_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister);
+      var
+        hreg: tregister;
+      begin
+        { no ROLV opcode... }
+        if op=OP_ROL then
+          begin
+            case size of
+              OS_32,OS_S32,
+              OS_64,OS_S64:
+                begin
+                  hreg:=getintregister(list,size);
+                  a_load_const_reg(list,size,tcgsize2size[size]*8,hreg);
+                  a_op_reg_reg(list,OP_SUB,size,src1,hreg);
+                  a_op_reg_reg_reg(list,OP_ROR,size,hreg,src2,dst);
+                  exit;
+                end;
+              else
+                internalerror(2014111005);
+            end;
+          end
+        else if (op=OP_ROR) and
+           not(size in [OS_32,OS_S32,OS_64,OS_S64]) then
+          internalerror(2014111006);
+        if TOpCG2AsmOpReg[op]=A_NONE then
+          internalerror(2014111007);
+        list.concat(taicpu.op_reg_reg_reg(TOpCG2AsmOpReg[op],dst,src2,src1));
+        maybeadjustresult(list,op,size,dst);
+      end;
+
+
+    procedure tcgaarch64.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister; setflags : boolean; var ovloc : tlocation);
+      var
+        shiftcountmask: longint;
+        constreg: tregister;
+      begin
+        { add/sub instructions have only positive immediate operands }
+        if (op in [OP_ADD,OP_SUB]) and
+           (a<0) then
+          begin
+            if op=OP_ADD then
+              op:=op_SUB
+            else
+              op:=OP_ADD;
+            { avoid range/overflow error in case a = low(tcgint) }
+{$push}{$r-}{$q-}
+            a:=-a;
+{$pop}
+          end;
+        ovloc.loc:=LOC_VOID;
+        optimize_op_const(size,op,a);
+        case op of
+          OP_NONE:
+            begin
+              a_load_reg_reg(list,size,size,src,dst);
+              exit;
+            end;
+          OP_MOVE:
+            begin
+              a_load_const_reg(list,size,a,dst);
+              exit;
+            end;
+        end;
+        case op of
+          OP_ADD,
+          OP_SUB:
+            begin
+              handle_reg_imm12_reg(list,TOpCG2AsmOpImm[op],size,src,a,dst,NR_NO,setflags,true);
+              { on a 64 bit target, overflows with smaller data types
+                are handled via range errors }
+              if setflags and
+                 (size in [OS_64,OS_S64]) then
+                begin
+                  location_reset(ovloc,LOC_FLAGS,OS_8);
+                  if size=OS_64 then
+                    if op=OP_ADD then
+                      ovloc.resflags:=F_CS
+                    else
+                      ovloc.resflags:=F_CC
+                  else
+                    ovloc.resflags:=F_VS;
+                end;
+            end;
+          OP_OR,
+          OP_AND,
+          OP_XOR:
+            begin
+              if not(size in [OS_64,OS_S64]) then
+                a:=cardinal(a);
+              if is_shifter_const(a,size) then
+                list.concat(taicpu.op_reg_reg_const(TOpCG2AsmOpReg[op],dst,src,a))
+              else
+                begin
+                  constreg:=getintregister(list,size);
+                  a_load_const_reg(list,size,a,constreg);
+                  a_op_reg_reg_reg(list,op,size,constreg,src,dst);
+                end;
+            end;
+          OP_SHL,
+          OP_SHR,
+          OP_SAR:
+            begin
+              if size in [OS_64,OS_S64] then
+                shiftcountmask:=63
+              else
+                shiftcountmask:=31;
+              if (a and shiftcountmask)<>0 Then
+                list.concat(taicpu.op_reg_reg_const(
+                  TOpCG2AsmOpImm[Op],dst,src,a and shiftcountmask))
+              else
+                a_load_reg_reg(list,size,size,src,dst);
+              if (a and not(tcgint(shiftcountmask)))<>0 then
+                internalError(2014112101);
+            end;
+          OP_ROL,
+          OP_ROR:
+            begin
+              case size of
+                OS_32,OS_S32:
+                  if (a and not(tcgint(31)))<>0 then
+                    internalError(2014112102);
+                OS_64,OS_S64:
+                  if (a and not(tcgint(63)))<>0 then
+                    internalError(2014112103);
+                else
+                  internalError(2014112104);
+              end;
+              { there's only a ror opcode }
+              if op=OP_ROL then
+                a:=(tcgsize2size[size]*8)-a;
+              list.concat(taicpu.op_reg_reg_const(A_ROR,dst,src,a));
+            end;
+          OP_MUL,
+          OP_IMUL,
+          OP_DIV,
+          OP_IDIV:
+            begin
+              constreg:=getintregister(list,size);
+              a_load_const_reg(list,size,a,constreg);
+              a_op_reg_reg_reg_checkoverflow(list,op,size,constreg,src,dst,setflags,ovloc);
+            end;
+          else
+            internalerror(2014111403);
+        end;
+        maybeadjustresult(list,op,size,dst);
+      end;
+
+
+    procedure tcgaarch64.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister; setflags : boolean; var ovloc : tlocation);
+      var
+        tmpreg1: tregister;
+      begin
+        ovloc.loc:=LOC_VOID;
+        { overflow can only occur with 64 bit calculations on 64 bit cpus }
+        if setflags and
+           (size in [OS_64,OS_S64]) then
+          begin
+            case op of
+              OP_ADD,
+              OP_SUB:
+                begin
+                  list.concat(setoppostfix(taicpu.op_reg_reg_reg(TOpCG2AsmOpReg[op],dst,src2,src1),PF_S));
+                  ovloc.loc:=LOC_FLAGS;
+                  if size=OS_64 then
+                    if op=OP_ADD then
+                      ovloc.resflags:=F_CS
+                    else
+                      ovloc.resflags:=F_CC
+                  else
+                    ovloc.resflags:=F_VS;
+                  { finished; since we won't call through to a_op_reg_reg_reg,
+                    adjust the result here if necessary }
+                  maybeadjustresult(list,op,size,dst);
+                  exit;
+                end;
+              OP_MUL:
+                begin
+                  { check whether the upper 64 bit of the 128 bit product is 0 }
+                  tmpreg1:=getintregister(list,OS_64);
+                  list.concat(taicpu.op_reg_reg_reg(A_UMULH,tmpreg1,src2,src1));
+                  list.concat(taicpu.op_reg_const(A_CMP,tmpreg1,0));
+                  ovloc.loc:=LOC_FLAGS;
+                  ovloc.resflags:=F_NE;
+                  { still have to perform the actual multiplication  }
+                end;
+              OP_IMUL:
+                begin
+                  { check whether the sign bit of the (128 bit) result is the
+                    same as "sign bit of src1" xor "signbit of src2" (if so, no
+                    overflow and the xor-product of all sign bits is 0) }
+                  tmpreg1:=getintregister(list,OS_64);
+                  list.concat(taicpu.op_reg_reg_reg(A_SMULH,tmpreg1,src2,src1));
+                  list.concat(taicpu.op_reg_reg_reg(A_EOR,tmpreg1,tmpreg1,src1));
+                  list.concat(taicpu.op_reg_reg_reg(A_EOR,tmpreg1,tmpreg1,src2));
+                  list.concat(taicpu.op_reg_const(A_TST,tmpreg1,$80000000));
+                  ovloc.loc:=LOC_FLAGS;
+                  ovloc.resflags:=F_NE;
+                  { still have to perform the actual multiplication }
+                end;
+              OP_IDIV,
+              OP_DIV:
+                begin
+                  { not handled here, needs div-by-zero check (dividing by zero
+                    just gives a 0 result on aarch64), and low(int64) div -1
+                    check for overflow) }
+                  internalerror(2014122101);
+                end;
+            end;
+          end;
+        a_op_reg_reg_reg(list,op,size,src1,src2,dst);
+      end;
+
+
+
+  {*************** compare instructructions ****************}
+
+    procedure tcgaarch64.a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel);
+      var
+        op: tasmop;
+      begin
+        if a>=0 then
+          op:=A_CMP
+        else
+          op:=A_CMN;
+        { avoid range/overflow error in case a=low(tcgint) }
+{$push}{$r-}{$q-}
+        handle_reg_imm12_reg(list,op,size,reg,abs(a),NR_XZR,NR_NO,false,false);
+{$pop}
+        a_jmp_cond(list,cmp_op,l);
+      end;
+
+
+    procedure tcgaarch64.a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1,reg2: tregister; l: tasmlabel);
+      begin
+        list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
+        a_jmp_cond(list,cmp_op,l);
+      end;
+
+
+    procedure tcgaarch64.a_jmp_always(list: TAsmList; l: TAsmLabel);
+      var
+        ai: taicpu;
+      begin
+        ai:=TAiCpu.op_sym(A_B,current_asmdata.RefAsmSymbol(l.name));
+        ai.is_jmp:=true;
+        list.Concat(ai);
+      end;
+
+
+    procedure tcgaarch64.a_jmp_name(list: TAsmList; const s: string);
+      var
+        ai: taicpu;
+      begin
+        ai:=TAiCpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s));
+        ai.is_jmp:=true;
+        list.Concat(ai);
+      end;
+
+
+    procedure tcgaarch64.a_jmp_cond(list: TAsmList; cond: TOpCmp; l: TAsmLabel);
+      var
+        ai: taicpu;
+      begin
+        ai:=TAiCpu.op_sym(A_B,l);
+        ai.is_jmp:=true;
+        ai.SetCondition(TOpCmp2AsmCond[cond]);
+        list.Concat(ai);
+      end;
+
+
+    procedure tcgaarch64.a_jmp_flags(list: TAsmList; const f: tresflags; l: tasmlabel);
+      var
+        ai : taicpu;
+      begin
+        ai:=Taicpu.op_sym(A_B,l);
+        ai.is_jmp:=true;
+        ai.SetCondition(flags_to_cond(f));
+        list.Concat(ai);
+      end;
+
+
+    procedure tcgaarch64.g_flags2reg(list: TAsmList; size: tcgsize; const f: tresflags; reg: tregister);
+      begin
+        list.concat(taicpu.op_reg_cond(A_CSET,reg,flags_to_cond(f)));
+      end;
+
+
+    procedure tcgaarch64.g_overflowcheck(list: TAsmList; const loc: tlocation; def: tdef);
+      begin
+        { we need an explicit overflow location, because there are many
+          possibilities (not just the overflow flag, which is only used for
+          signed add/sub) }
+        internalerror(2014112303);
+      end;
+
+
+    procedure tcgaarch64.g_overflowcheck_loc(list: TAsmList; const loc: tlocation; def: tdef; ovloc : tlocation);
+      var
+        hl : tasmlabel;
+        hflags : tresflags;
+      begin
+        if not(cs_check_overflow in current_settings.localswitches) then
+          exit;
+        current_asmdata.getjumplabel(hl);
+        case ovloc.loc of
+          LOC_FLAGS:
+            begin
+              hflags:=ovloc.resflags;
+              inverse_flags(hflags);
+              cg.a_jmp_flags(list,hflags,hl);
+            end;
+          else
+            internalerror(2014112304);
+        end;
+        a_call_name(list,'FPC_OVERFLOW',false);
+        a_label(list,hl);
+      end;
+
+  { *********** entry/exit code and address loading ************ }
+
+    function tcgaarch64.save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
+      var
+        ref: treference;
+        sr: tsuperregister;
+        pairreg: tregister;
+      begin
+        result:=0;
+        reference_reset_base(ref,NR_SP,-16,16);
+        ref.addressmode:=AM_PREINDEXED;
+        pairreg:=NR_NO;
+        { store all used registers pairwise }
+        for sr:=lowsr to highsr do
+          if sr in rg[rt].used_in_proc then
+            if pairreg=NR_NO then
+              pairreg:=newreg(rt,sr,sub)
+            else
+              begin
+                inc(result,16);
+                list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,newreg(rt,sr,sub),ref));
+                pairreg:=NR_NO
+              end;
+        { one left -> store twice (stack must be 16 bytes aligned) }
+        if pairreg<>NR_NO then
+          begin
+            list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,pairreg,ref));
+            inc(result,16);
+          end;
+      end;
+
+
+    procedure FixupOffsets(p:TObject;arg:pointer);
+      var
+        sym: tabstractnormalvarsym absolute p;
+      begin
+        if (tsym(p).typ in [paravarsym,localvarsym]) and
+          (sym.localloc.loc=LOC_REFERENCE) and
+          (sym.localloc.reference.base=NR_STACK_POINTER_REG) then
+          begin
+            sym.localloc.reference.base:=NR_FRAME_POINTER_REG;
+            dec(sym.localloc.reference.offset,PLongint(arg)^);
+          end;
+      end;
+
+
+    procedure tcgaarch64.g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);
+      var
+        ref: treference;
+        totalstackframesize: longint;
+      begin
+        if nostackframe then
+          exit;
+        { stack pointer has to be aligned to 16 bytes at all times }
+        localsize:=align(localsize,16);
+
+        { save stack pointer and return address }
+        reference_reset_base(ref,NR_SP,-16,16);
+        ref.addressmode:=AM_PREINDEXED;
+        list.concat(taicpu.op_reg_reg_ref(A_STP,NR_FP,NR_LR,ref));
+        { initialise frame pointer }
+        a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_SP,NR_FP);
+
+        totalstackframesize:=localsize;
+        { save modified integer registers }
+        inc(totalstackframesize,
+          save_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE));
+        { only the lower 64 bits of the modified vector registers need to be
+          saved; if the caller needs the upper 64 bits, it has to save them
+          itself }
+        inc(totalstackframesize,
+          save_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD));
+
+        { allocate stack space }
+        if localsize<>0 then
+          begin
+            localsize:=align(localsize,16);
+            current_procinfo.final_localsize:=localsize;
+            handle_reg_imm12_reg(list,A_SUB,OS_ADDR,NR_SP,localsize,NR_SP,NR_IP0,false,true);
+          end;
+        { By default, we use the frame pointer to access parameters passed via
+          the stack and the stack pointer to address local variables and temps
+          because
+           a) we can use bigger positive than negative offsets (so accessing
+              locals via negative offsets from the frame pointer would be less
+              efficient)
+           b) we don't know the local size while generating the code, so
+              accessing the parameters via the stack pointer is not possible
+              without copying them
+          The problem with this is the get_frame() intrinsic:
+           a) it must return the same value as what we pass as parentfp
+              parameter, since that's how it's used in the TP-style objects unit
+           b) its return value must usable to access all local data from a
+              routine (locals and parameters), since it's all the nested
+              routines have access to
+           c) its return value must be usable to construct a backtrace, as it's
+              also used by the exception handling routines
+
+          The solution we use here, based on something similar that's done in
+          the MIPS port, is to generate all accesses to locals in the routine
+          itself SP-relative, and then after the code is generated and the local
+          size is known (namely, here), we change all SP-relative variables/
+          parameters into FP-relative ones. This means that they'll be accessed
+          less efficiently from nested routines, but those accesses are indirect
+          anyway and at least this way they can be accessed at all
+        }
+        if current_procinfo.has_nestedprocs then
+          begin
+            current_procinfo.procdef.localst.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
+            current_procinfo.procdef.parast.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
+          end;
+      end;
+
+
+    procedure tcgaarch64.g_maybe_got_init(list : TAsmList);
+      begin
+        { nothing to do on Darwin; check on ELF targets }
+        if not(target_info.system in systems_darwin) then
+          internalerror(2014112601);
+      end;
+
+
+    procedure tcgaarch64.g_restore_registers(list:TAsmList);
+      begin
+        { done in g_proc_exit }
+      end;
+
+
+    procedure tcgaarch64.load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
+      var
+        ref: treference;
+        sr, highestsetsr: tsuperregister;
+        pairreg: tregister;
+        regcount: longint;
+      begin
+        reference_reset_base(ref,NR_SP,16,16);
+        ref.addressmode:=AM_POSTINDEXED;
+        { highest reg stored twice? }
+        regcount:=0;
+        highestsetsr:=RS_NO;
+        for sr:=lowsr to highsr do
+          if sr in rg[rt].used_in_proc then
+            begin
+              inc(regcount);
+              highestsetsr:=sr;
+            end;
+        if odd(regcount) then
+          begin
+            list.concat(taicpu.op_reg_ref(A_LDR,newreg(rt,highestsetsr,sub),ref));
+            highestsetsr:=pred(highestsetsr);
+          end;
+        { load all (other) used registers pairwise }
+        pairreg:=NR_NO;
+        for sr:=highestsetsr downto lowsr do
+          if sr in rg[rt].used_in_proc then
+            if pairreg=NR_NO then
+              pairreg:=newreg(rt,sr,sub)
+            else
+              begin
+                list.concat(taicpu.op_reg_reg_ref(A_LDP,newreg(rt,sr,sub),pairreg,ref));
+                pairreg:=NR_NO
+              end;
+        { There can't be any register left }
+        if pairreg<>NR_NO then
+          internalerror(2014112602);
+      end;
+
+
+
+    procedure tcgaarch64.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
+      var
+        ref: treference;
+        regsstored: boolean;
+        sr: tsuperregister;
+      begin
+        if not nostackframe then
+          begin
+            { if no registers have been stored, we don't have to subtract the
+              allocated temp space from the stack pointer }
+            regsstored:=false;
+            for sr:=RS_X19 to RS_X28 do
+              if sr in rg[R_INTREGISTER].used_in_proc then
+                begin
+                  regsstored:=true;
+                  break;
+                end;
+            if not regsstored then
+              for sr:=RS_D8 to RS_D15 do
+                if sr in rg[R_MMREGISTER].used_in_proc then
+                  begin
+                    regsstored:=true;
+                    break;
+                  end;
+            { restore registers (and stack pointer) }
+            if regsstored then
+              begin
+                if current_procinfo.final_localsize<>0 then
+                  handle_reg_imm12_reg(list,A_ADD,OS_ADDR,NR_SP,current_procinfo.final_localsize,NR_SP,NR_IP0,false,true);
+                load_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD);
+                load_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE);
+              end
+            else if current_procinfo.final_localsize<>0 then
+              { restore stack pointer }
+              a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_FP,NR_SP);
+
+            { restore framepointer and return address }
+            reference_reset_base(ref,NR_SP,16,16);
+            ref.addressmode:=AM_POSTINDEXED;
+            list.concat(taicpu.op_reg_reg_ref(A_LDP,NR_FP,NR_LR,ref));
+          end;
+
+        { return }
+        list.concat(taicpu.op_none(A_RET));
+      end;
+
+
+    procedure tcgaarch64.g_save_registers(list : TAsmList);
+      begin
+        { done in g_proc_entry }
+      end;
+
+
+    { ************* concatcopy ************ }
+
+    procedure tcgaarch64.g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
+      var
+        paraloc1,paraloc2,paraloc3 : TCGPara;
+        pd : tprocdef;
+      begin
+        pd:=search_system_proc('MOVE');
+        paraloc1.init;
+        paraloc2.init;
+        paraloc3.init;
+        paramanager.getintparaloc(pd,1,paraloc1);
+        paramanager.getintparaloc(pd,2,paraloc2);
+        paramanager.getintparaloc(pd,3,paraloc3);
+        a_load_const_cgpara(list,OS_SINT,len,paraloc3);
+        a_loadaddr_ref_cgpara(list,dest,paraloc2);
+        a_loadaddr_ref_cgpara(list,source,paraloc1);
+        paramanager.freecgpara(list,paraloc3);
+        paramanager.freecgpara(list,paraloc2);
+        paramanager.freecgpara(list,paraloc1);
+        alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
+        alloccpuregisters(list,R_MMREGISTER,paramanager.get_volatile_registers_mm(pocall_default));
+        a_call_name(list,'FPC_MOVE',false);
+        dealloccpuregisters(list,R_MMREGISTER,paramanager.get_volatile_registers_mm(pocall_default));
+        dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
+        paraloc3.done;
+        paraloc2.done;
+        paraloc1.done;
+      end;
+
+
+    procedure tcgaarch64.g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);
+
+      var
+        sourcebasereplaced, destbasereplaced: boolean;
+
+      { get optimal memory operation to use for loading/storing data
+        in an unrolled loop }
+      procedure getmemop(scaledop, unscaledop: tasmop; const startref, endref: treference; opsize: tcgsize; postfix: toppostfix; out memop: tasmop; out needsimplify: boolean);
+        begin
+          if (simple_ref_type(scaledop,opsize,postfix,startref)=sr_simple) and
+             (simple_ref_type(scaledop,opsize,postfix,endref)=sr_simple) then
+            begin
+              memop:=unscaledop;
+              needsimplify:=true;
+            end
+          else if (unscaledop<>A_NONE) and
+             (simple_ref_type(unscaledop,opsize,postfix,startref)=sr_simple) and
+             (simple_ref_type(unscaledop,opsize,postfix,endref)=sr_simple) then
+            begin
+              memop:=unscaledop;
+              needsimplify:=false;
+            end
+          else
+            begin
+              memop:=scaledop;
+              needsimplify:=true;
+            end;
+        end;
+
+      { adjust the offset and/or addressing mode after a load/store so it's
+        correct for the next one of the same size }
+      procedure updaterefafterloadstore(var ref: treference; oplen: longint);
+        begin
+          case ref.addressmode of
+            AM_OFFSET:
+              inc(ref.offset,oplen);
+            AM_POSTINDEXED:
+              { base register updated by instruction, next offset can remain
+                the same }
+              ;
+            AM_PREINDEXED:
+              begin
+                { base register updated by instruction -> next instruction can
+                  use post-indexing with offset = sizeof(operation) }
+                ref.offset:=0;
+                ref.addressmode:=AM_OFFSET;
+              end;
+          end;
+        end;
+
+      { generate a load/store and adjust the reference offset to the next
+        memory location if necessary }
+      procedure genloadstore(list: TAsmList; op: tasmop; reg: tregister; var ref: treference; postfix: toppostfix; opsize: tcgsize);
+        begin
+          list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),postfix));
+          updaterefafterloadstore(ref,tcgsize2size[opsize]);
+        end;
+
+      { generate a dual load/store (ldp/stp) and adjust the reference offset to
+        the next memory location if necessary }
+      procedure gendualloadstore(list: TAsmList; op: tasmop; reg1, reg2: tregister; var ref: treference; postfix: toppostfix; opsize: tcgsize);
+        begin
+          list.concat(setoppostfix(taicpu.op_reg_reg_ref(op,reg1,reg2,ref),postfix));
+          updaterefafterloadstore(ref,tcgsize2size[opsize]*2);
+        end;
+
+      { turn a reference into a pre- or post-indexed reference for use in a
+        load/store of a particular size }
+      procedure makesimpleforcopy(list: TAsmList; var scaledop: tasmop; opsize: tcgsize; postfix: toppostfix; forcepostindexing: boolean; var ref: treference; var basereplaced: boolean);
+        var
+          tmpreg: tregister;
+          scaledoffset: longint;
+          orgaddressmode: taddressmode;
+        begin
+          scaledoffset:=tcgsize2size[opsize];
+          if scaledop in [A_LDP,A_STP] then
+            scaledoffset:=scaledoffset*2;
+          { can we use the reference as post-indexed without changes? }
+          if forcepostindexing then
+            begin
+              orgaddressmode:=ref.addressmode;
+              ref.addressmode:=AM_POSTINDEXED;
+              if (orgaddressmode=AM_POSTINDEXED) or
+                 ((ref.offset=0) and
+                  (simple_ref_type(scaledop,opsize,postfix,ref)=sr_simple)) then
+                begin
+                  { just change the post-indexed offset to the access size }
+                  ref.offset:=scaledoffset;
+                  { and replace the base register if that didn't happen yet
+                    (could be sp or a regvar) }
+                  if not basereplaced then
+                    begin
+                      tmpreg:=getaddressregister(list);
+                      a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg);
+                      ref.base:=tmpreg;
+                      basereplaced:=true;
+                    end;
+                  exit;
+                end;
+              ref.addressmode:=orgaddressmode;
+            end;
+{$ifdef dummy}
+          This could in theory be useful in case you have a concatcopy from
+          e.g. x1+255 to x1+267 *and* the reference is aligned, but this seems
+          very unlikely. Disabled because it still needs fixes, as it
+          also generates pre-indexed loads right now at the very end for the
+          left-over gencopies
+
+          { can we turn it into a pre-indexed reference for free? (after the
+            first operation, it will be turned into an offset one) }
+          if not forcepostindexing and
+             (ref.offset<>0) then
+            begin
+              orgaddressmode:=ref.addressmode;
+              ref.addressmode:=AM_PREINDEXED;
+              tmpreg:=ref.base;
+              if not basereplaced and
+                 (ref.base=tmpreg) then
+                begin
+                  tmpreg:=getaddressregister(list);
+                  a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg);
+                  ref.base:=tmpreg;
+                  basereplaced:=true;
+                end;
+              if simple_ref_type(scaledop,opsize,postfix,ref)<>sr_simple then
+                make_simple_ref(list,scaledop,opsize,postfix,ref,NR_NO);
+              exit;
+            end;
+{$endif dummy}
+          if not forcepostindexing then
+            begin
+              ref.addressmode:=AM_OFFSET;
+              make_simple_ref(list,scaledop,opsize,postfix,ref,NR_NO);
+              { this may still cause problems if the final offset is no longer
+                a simple ref; it's a bit complicated to pass all information
+                through at all places and check that here, so play safe: we
+                currently never generate unrolled copies for more than 64
+                bytes (32 with non-double-register copies) }
+              if ref.index=NR_NO then
+                begin
+                  if ((scaledop in [A_LDP,A_STP]) and
+                      (ref.offset<((64-8)*tcgsize2size[opsize]))) or
+                     ((scaledop in [A_LDUR,A_STUR]) and
+                      (ref.offset<(255-8*tcgsize2size[opsize]))) or
+                     ((scaledop in [A_LDR,A_STR]) and
+                      (ref.offset<((4096-8)*tcgsize2size[opsize]))) then
+                    exit;
+                end;
+            end;
+          tmpreg:=getaddressregister(list);
+          a_loadaddr_ref_reg(list,ref,tmpreg);
+          basereplaced:=true;
+          if forcepostindexing then
+            begin
+              reference_reset_base(ref,tmpreg,scaledoffset,ref.alignment);
+              ref.addressmode:=AM_POSTINDEXED;
+            end
+          else
+            begin
+              reference_reset_base(ref,tmpreg,0,ref.alignment);
+              ref.addressmode:=AM_OFFSET;
+            end
+        end;
+
+      { prepare a reference for use by gencopy. This is done both after the
+        unrolled and regular copy loop -> get rid of post-indexing mode, make
+        sure ref is valid }
+      procedure preparecopy(list: tasmlist; scaledop, unscaledop: tasmop; var ref: treference; opsize: tcgsize; postfix: toppostfix; out op: tasmop; var basereplaced: boolean);
+        var
+          simplify: boolean;
+        begin
+          if ref.addressmode=AM_POSTINDEXED then
+            ref.offset:=tcgsize2size[opsize];
+          getmemop(scaledop,scaledop,ref,ref,opsize,postfix,op,simplify);
+          if simplify then
+            begin
+              makesimpleforcopy(list,scaledop,opsize,postfix,false,ref,basereplaced);
+              op:=scaledop;
+            end;
+        end;
+
+      { generate a copy from source to dest of size opsize/postfix }
+      procedure gencopy(list: TAsmList; var source, dest: treference; postfix: toppostfix; opsize: tcgsize);
+        var
+          reg: tregister;
+          loadop, storeop: tasmop;
+        begin
+          preparecopy(list,A_LDR,A_LDUR,source,opsize,postfix,loadop,sourcebasereplaced);
+          preparecopy(list,A_STR,A_STUR,dest,opsize,postfix,storeop,destbasereplaced);
+          reg:=getintregister(list,opsize);
+          genloadstore(list,loadop,reg,source,postfix,opsize);
+          genloadstore(list,storeop,reg,dest,postfix,opsize);
+        end;
+
+
+      { copy the leftovers after an unrolled or regular copy loop }
+      procedure gencopyleftovers(list: TAsmList; var source, dest: treference; len: longint);
+        begin
+          { stop post-indexing if we did so in the loop, since in that case all
+            offsets definitely can be represented now }
+          if source.addressmode=AM_POSTINDEXED then
+            begin
+              source.addressmode:=AM_OFFSET;
+              source.offset:=0;
+            end;
+          if dest.addressmode=AM_POSTINDEXED then
+            begin
+              dest.addressmode:=AM_OFFSET;
+              dest.offset:=0;
+            end;
+          { transfer the leftovers }
+          if len>=8 then
+            begin
+              dec(len,8);
+              gencopy(list,source,dest,PF_NONE,OS_64);
+            end;
+          if len>=4 then
+            begin
+              dec(len,4);
+              gencopy(list,source,dest,PF_NONE,OS_32);
+            end;
+          if len>=2 then
+            begin
+              dec(len,2);
+              gencopy(list,source,dest,PF_H,OS_16);
+            end;
+          if len>=1 then
+            begin
+              dec(len);
+              gencopy(list,source,dest,PF_B,OS_8);
+            end;
+        end;
+
+
+      const
+        { load_length + loop dec + cbnz }
+        loopoverhead=12;
+        { loop overhead + load + store }
+        totallooplen=loopoverhead + 8;
+      var
+        totalalign: longint;
+        maxlenunrolled: tcgint;
+        loadop, storeop: tasmop;
+        opsize: tcgsize;
+        postfix: toppostfix;
+        tmpsource, tmpdest: treference;
+        scaledstoreop, unscaledstoreop,
+        scaledloadop, unscaledloadop: tasmop;
+        regs: array[1..8] of tregister;
+        countreg: tregister;
+        i, regcount: longint;
+        hl: tasmlabel;
+        simplifysource, simplifydest: boolean;
+      begin
+        if len=0 then
+          exit;
+        sourcebasereplaced:=false;
+        destbasereplaced:=false;
+        { maximum common alignment }
+        totalalign:=max(1,newalignment(source.alignment,dest.alignment));
+        { use a simple load/store? }
+        if (len in [1,2,4,8]) and
+           ((totalalign>=(len div 2)) or
+            (source.alignment=len) or
+            (dest.alignment=len)) then
+          begin
+            opsize:=int_cgsize(len);
+            a_load_ref_ref(list,opsize,opsize,source,dest);
+            exit;
+          end;
+
+        { alignment > length is not useful, and would break some checks below }
+        while totalalign>len do
+          totalalign:=totalalign div 2;
+
+        { operation sizes to use based on common alignment }
+        case totalalign of
+          1:
+            begin
+              postfix:=PF_B;
+              opsize:=OS_8;
+            end;
+          2:
+            begin
+              postfix:=PF_H;
+              opsize:=OS_16;
+            end;
+          4:
+            begin
+              postfix:=PF_None;
+              opsize:=OS_32;
+            end
+          else
+            begin
+              totalalign:=8;
+              postfix:=PF_None;
+              opsize:=OS_64;
+            end;
+        end;
+        { maximum length to handled with an unrolled loop (4 loads + 4 stores) }
+        maxlenunrolled:=min(totalalign,8)*4;
+        { ldp/stp -> 2 registers per instruction }
+        if (totalalign>=4) and
+           (len>=totalalign*2) then
+          begin
+            maxlenunrolled:=maxlenunrolled*2;
+            scaledstoreop:=A_STP;
+            scaledloadop:=A_LDP;
+            unscaledstoreop:=A_NONE;
+            unscaledloadop:=A_NONE;
+          end
+        else
+          begin
+            scaledstoreop:=A_STR;
+            scaledloadop:=A_LDR;
+            unscaledstoreop:=A_STUR;
+            unscaledloadop:=A_LDUR;
+          end;
+        { we only need 4 instructions extra to call FPC_MOVE }
+        if cs_opt_size in current_settings.optimizerswitches then
+          maxlenunrolled:=maxlenunrolled div 2;
+        if (len>maxlenunrolled) and
+           (len>totalalign*8) then
+          begin
+            g_concatcopy_move(list,source,dest,len);
+            exit;
+          end;
+
+        simplifysource:=true;
+        simplifydest:=true;
+        tmpsource:=source;
+        tmpdest:=dest;
+        { can we directly encode all offsets in an unrolled loop? }
+        if len<=maxlenunrolled then
+          begin
+{$ifdef extdebug}
+            list.concat(tai_comment.Create(strpnew('concatcopy unrolled loop; len/opsize/align: '+tostr(len)+'/'+tostr(tcgsize2size[opsize])+'/'+tostr(totalalign))));
+{$endif extdebug}
+            { the leftovers will be handled separately -> -(len mod opsize) }
+            inc(tmpsource.offset,len-(len mod tcgsize2size[opsize]));
+            { additionally, the last regular load/store will be at
+              offset+len-opsize (if len-(len mod opsize)>len) }
+            if tmpsource.offset>source.offset then
+              dec(tmpsource.offset,tcgsize2size[opsize]);
+            getmemop(scaledloadop,unscaledloadop,source,tmpsource,opsize,postfix,loadop,simplifysource);
+            inc(tmpdest.offset,len-(len mod tcgsize2size[opsize]));
+            if tmpdest.offset>dest.offset then
+              dec(tmpdest.offset,tcgsize2size[opsize]);
+            getmemop(scaledstoreop,unscaledstoreop,dest,tmpdest,opsize,postfix,storeop,simplifydest);
+            tmpsource:=source;
+            tmpdest:=dest;
+            { if we can't directly encode all offsets, simplify }
+            if simplifysource then
+              begin
+                loadop:=scaledloadop;
+                makesimpleforcopy(list,loadop,opsize,postfix,false,tmpsource,sourcebasereplaced);
+              end;
+            if simplifydest then
+              begin
+                storeop:=scaledstoreop;
+                makesimpleforcopy(list,storeop,opsize,postfix,false,tmpdest,destbasereplaced);
+              end;
+            regcount:=len div tcgsize2size[opsize];
+            { in case we transfer two registers at a time, we copy an even
+              number of registers }
+            if loadop=A_LDP then
+              regcount:=regcount and not(1);
+            { initialise for dfa }
+            regs[low(regs)]:=NR_NO;
+            { max 4 loads/stores -> max 8 registers (in case of ldp/stdp) }
+            for i:=1 to regcount do
+              regs[i]:=getintregister(list,opsize);
+            if loadop=A_LDP then
+              begin
+                { load registers }
+                for i:=1 to (regcount div 2) do
+                  gendualloadstore(list,loadop,regs[i*2-1],regs[i*2],tmpsource,postfix,opsize);
+                { store registers }
+                for i:=1 to (regcount div 2) do
+                  gendualloadstore(list,storeop,regs[i*2-1],regs[i*2],tmpdest,postfix,opsize);
+              end
+            else
+              begin
+                for i:=1 to regcount do
+                  genloadstore(list,loadop,regs[i],tmpsource,postfix,opsize);
+                for i:=1 to regcount do
+                  genloadstore(list,storeop,regs[i],tmpdest,postfix,opsize);
+              end;
+            { leftover }
+            len:=len-regcount*tcgsize2size[opsize];
+{$ifdef extdebug}
+            list.concat(tai_comment.Create(strpnew('concatcopy unrolled loop leftover: '+tostr(len))));
+{$endif extdebug}
+          end
+        else
+          begin
+{$ifdef extdebug}
+            list.concat(tai_comment.Create(strpnew('concatcopy regular loop; len/align: '+tostr(len)+'/'+tostr(totalalign))));
+{$endif extdebug}
+            { regular loop -> definitely use post-indexing }
+            loadop:=scaledloadop;
+            makesimpleforcopy(list,loadop,opsize,postfix,true,tmpsource,sourcebasereplaced);
+            storeop:=scaledstoreop;
+            makesimpleforcopy(list,storeop,opsize,postfix,true,tmpdest,destbasereplaced);
+            current_asmdata.getjumplabel(hl);
+            countreg:=getintregister(list,OS_32);
+            if loadop=A_LDP then
+              a_load_const_reg(list,OS_32,len div tcgsize2size[opsize]*2,countreg)
+            else
+              a_load_const_reg(list,OS_32,len div tcgsize2size[opsize],countreg);
+            a_label(list,hl);
+            a_op_const_reg(list,OP_SUB,OS_32,1,countreg);
+            if loadop=A_LDP then
+              begin
+                regs[1]:=getintregister(list,opsize);
+                regs[2]:=getintregister(list,opsize);
+                gendualloadstore(list,loadop,regs[1],regs[2],tmpsource,postfix,opsize);
+                gendualloadstore(list,storeop,regs[1],regs[2],tmpdest,postfix,opsize);
+              end
+            else
+              begin
+                regs[1]:=getintregister(list,opsize);
+                genloadstore(list,loadop,regs[1],tmpsource,postfix,opsize);
+                genloadstore(list,storeop,regs[1],tmpdest,postfix,opsize);
+              end;
+            list.concat(taicpu.op_reg_sym_ofs(A_CBNZ,countreg,hl,0));
+            len:=len mod tcgsize2size[opsize];
+          end;
+        gencopyleftovers(list,tmpsource,tmpdest,len);
+      end;
+
+
+    procedure tcgaarch64.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
+      begin
+        { This method is integrated into g_intf_wrapper and shouldn't be called separately }
+        InternalError(2013020102);
+      end;
+
+
+
+    procedure create_codegen;
+      begin
+        cg:=tcgaarch64.Create;
+        cg128:=tcg128.Create;
+      end;
+
+end.

+ 200 - 38
compiler/aarch64/cpubase.pas

@@ -1,7 +1,8 @@
 {
 {
     Copyright (c) 1998-2012 by Florian Klaempfl and Peter Vreman
     Copyright (c) 1998-2012 by Florian Klaempfl and Peter Vreman
+    Copyright (c) 2014 by Jonas Maebe and Florian Klaempfl
 
 
-    Contains the base types for ARM64
+    Contains the base types for Aarch64
 
 
     This program is free software; you can redistribute it and/or modify
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
     it under the terms of the GNU General Public License as published by
@@ -68,14 +69,22 @@ unit cpubase;
       { Available Superregisters }
       { Available Superregisters }
       {$i ra64sup.inc}
       {$i ra64sup.inc}
 
 
+      RS_IP0 = RS_X16;
+      RS_IP1 = RS_X17;
+
       R_SUBWHOLE = R_SUBQ;
       R_SUBWHOLE = R_SUBQ;
 
 
       { Available Registers }
       { Available Registers }
       {$i ra64con.inc}
       {$i ra64con.inc}
 
 
+      NR_IP0 = NR_X16;
+      NR_IP1 = NR_X17;
+
       { Integer Super registers first and last }
       { Integer Super registers first and last }
       first_int_supreg = RS_X0;
       first_int_supreg = RS_X0;
-      first_int_imreg = $20;
+      { xzr and sp take up a separate super register because some instructions
+        are ambiguous otherwise }
+      first_int_imreg = $21;
 
 
       { Integer Super registers first and last }
       { Integer Super registers first and last }
       first_fpu_supreg = RS_S0;
       first_fpu_supreg = RS_S0;
@@ -92,7 +101,7 @@ unit cpubase;
         The value of this constant is equal to the constant
         The value of this constant is equal to the constant
         PARM_BOUNDARY / BITS_PER_UNIT in the GCC source.
         PARM_BOUNDARY / BITS_PER_UNIT in the GCC source.
       }
       }
-      std_param_align = 4;
+      std_param_align = 8;
 
 
       { TODO: Calculate bsstart}
       { TODO: Calculate bsstart}
       regnumber_count_bsstart = 128;
       regnumber_count_bsstart = 128;
@@ -109,7 +118,7 @@ unit cpubase;
         {$i ra64dwa.inc}
         {$i ra64dwa.inc}
       );
       );
       { registers which may be destroyed by calls }
       { registers which may be destroyed by calls }
-      VOLATILE_INTREGISTERS = [RS_X0..RS_X18,RS_X29..RS_X30];
+      VOLATILE_INTREGISTERS = [RS_X0..RS_X18,RS_X30];
       VOLATILE_MMREGISTERS =  [RS_D0..RS_D7,RS_D16..RS_D31];
       VOLATILE_MMREGISTERS =  [RS_D0..RS_D7,RS_D16..RS_D31];
 
 
     type
     type
@@ -126,16 +135,23 @@ unit cpubase;
       TOpPostfix = (PF_None,
       TOpPostfix = (PF_None,
         { update condition flags }
         { update condition flags }
         PF_S,
         PF_S,
-        { load/store }
-        PF_B,PF_SB,PF_H,PF_SH
+        { load/store sizes }
+        PF_B,PF_SB,PF_H,PF_SH,PF_W,PF_SW
       );
       );
 
 
       TOpPostfixes = set of TOpPostfix;
       TOpPostfixes = set of TOpPostfix;
 
 
     const
     const
-      oppostfix2str : array[TOpPostfix] of string[2] = ('',
+      tcgsizep2size: array[OS_NO..OS_F128] of byte =
+        {OS_NO }
+        (0,
+        {OS_8,OS_16,OS_32,OS_64,OS_128,OS_S8,OS_S16,OS_S32,OS_S64,OS_S128}
+            0,    1,    2,    3,     4,    0,     1,     2,     3,      4,
+        {OS_F32,OS_F64,OS_F80,OS_C64,OS_F128,}
+             2,      3,     0,     3,      4);
+      oppostfix2str: array[TOpPostfix] of string[2] = ('',
         's',
         's',
-        'b','sb','h','sh');
+        'b','sb','h','sh','w','sw');
 
 
 {*****************************************************************************
 {*****************************************************************************
                                 Conditions
                                 Conditions
@@ -150,13 +166,15 @@ unit cpubase;
       TAsmConds = set of TAsmCond;
       TAsmConds = set of TAsmCond;
 
 
     const
     const
+      C_CS = C_HS;
+      C_CC = C_LO;
       cond2str : array[TAsmCond] of string[2]=('',
       cond2str : array[TAsmCond] of string[2]=('',
         'eq','ne','hs','lo','mi','pl','vs','vc','hi','ls',
         'eq','ne','hs','lo','mi','pl','vs','vc','hi','ls',
         'ge','lt','gt','le','al','nv'
         'ge','lt','gt','le','al','nv'
       );
       );
 
 
       uppercond2str : array[TAsmCond] of string[2]=('',
       uppercond2str : array[TAsmCond] of string[2]=('',
-        'EQ','NE','hs','LO','MI','PL','VS','VC','HI','LS',
+        'EQ','NE','HS','LO','MI','PL','VS','VC','HI','LS',
         'GE','LT','GT','LE','AL','NV'
         'GE','LT','GT','LE','AL','NV'
       );
       );
 
 
@@ -168,12 +186,28 @@ unit cpubase;
       TResFlags = (F_EQ,F_NE,F_CS,F_CC,F_MI,F_PL,F_VS,F_VC,F_HI,F_LS,
       TResFlags = (F_EQ,F_NE,F_CS,F_CC,F_MI,F_PL,F_VS,F_VC,F_HI,F_LS,
         F_GE,F_LT,F_GT,F_LE);
         F_GE,F_LT,F_GT,F_LE);
 
 
+    const
+      F_HS = F_CS;
+      F_LO = F_CC;
+
 {*****************************************************************************
 {*****************************************************************************
                                 Operands
                                 Operands
 *****************************************************************************}
 *****************************************************************************}
 
 
+    type
       taddressmode = (AM_OFFSET,AM_PREINDEXED,AM_POSTINDEXED);
       taddressmode = (AM_OFFSET,AM_PREINDEXED,AM_POSTINDEXED);
-      tshiftmode = (SM_None,SM_LSL,SM_LSR,SM_ASR,SM_ROR);
+
+      tshiftmode = (SM_None,
+                    { shifted register instructions. LSL can also be used for
+                      the index register of certain loads/stores }
+                    SM_LSL,SM_LSR,SM_ASR,
+                    { extended register instructions: zero/sign extension +
+                        optional shift (interpreted as LSL after extension)
+                       -- the index register of certain loads/stores can be
+                          extended via (s|u)xtw with a shiftval of either 0 or
+                          log2(transfer size of the load/store)
+                    }
+                    SM_UXTB,SM_UXTH,SM_UXTW,SM_UXTX,SM_SXTB,SM_SXTH,SM_SXTW,SM_SXTX);
 
 
       tupdatereg = (UR_None,UR_Update);
       tupdatereg = (UR_None,UR_Update);
 
 
@@ -184,12 +218,6 @@ unit cpubase;
         shiftimm : byte;
         shiftimm : byte;
       end;
       end;
 
 
-      tcpumodeflag = (mfA, mfI, mfF);
-      tcpumodeflags = set of tcpumodeflag;
-
-      tspecialregflag = (srC, srX, srS, srF);
-      tspecialregflags = set of tspecialregflag;
-
 {*****************************************************************************
 {*****************************************************************************
                                  Constants
                                  Constants
 *****************************************************************************}
 *****************************************************************************}
@@ -201,6 +229,10 @@ unit cpubase;
       maxfpuregs = 32;
       maxfpuregs = 32;
       maxaddrregs = 0;
       maxaddrregs = 0;
 
 
+      shiftedregmodes = [SM_LSL,SM_UXTB,SM_UXTH,SM_UXTW,SM_UXTX,SM_SXTB,SM_SXTH,SM_SXTW,SM_SXTX];
+      extendedregmodes = [SM_LSL,SM_LSR,SM_ASR];
+
+
 {*****************************************************************************
 {*****************************************************************************
                                 Operand Sizes
                                 Operand Sizes
 *****************************************************************************}
 *****************************************************************************}
@@ -232,17 +264,23 @@ unit cpubase;
                           Generic Register names
                           Generic Register names
 *****************************************************************************}
 *****************************************************************************}
 
 
-      NR_SP = NR_XZR;
-      RS_SP = RS_XZR;
-      NR_WSP = NR_WZR;
-      RS_WSP = RS_WZR;
+
+      NR_FP = NR_X29;
+      RS_FP = RS_X29;
+      NR_WFP = NR_W29;
+      RS_WFP = RS_W29;
+
+      NR_LR = NR_X30;
+      RS_LR = RS_X30;
+      NR_WLR = NR_W30;
+      RS_WLR = RS_W30;
 
 
       { Stack pointer register }
       { Stack pointer register }
       NR_STACK_POINTER_REG = NR_SP;
       NR_STACK_POINTER_REG = NR_SP;
       RS_STACK_POINTER_REG = RS_SP;
       RS_STACK_POINTER_REG = RS_SP;
-      { Frame pointer register (initialized in tarmprocinfo.init_framepointer) }
-      RS_FRAME_POINTER_REG: tsuperregister = RS_X29;
-      NR_FRAME_POINTER_REG: tregister = NR_X29;
+      { Frame pointer register }
+      NR_FRAME_POINTER_REG = NR_X29;
+      RS_FRAME_POINTER_REG = RS_X29;
       { Register for addressing absolute data in a position independant way,
       { Register for addressing absolute data in a position independant way,
         such as in PIC code. The exact meaning is ABI specific. For
         such as in PIC code. The exact meaning is ABI specific. For
         further information look at GCC source : PIC_OFFSET_TABLE_REGNUM
         further information look at GCC source : PIC_OFFSET_TABLE_REGNUM
@@ -307,6 +345,9 @@ unit cpubase;
 
 
     function dwarf_reg(r:tregister):shortint;
     function dwarf_reg(r:tregister):shortint;
 
 
+    function is_shifter_const(d: aint; size: tcgsize): boolean;
+
+
   implementation
   implementation
 
 
     uses
     uses
@@ -329,13 +370,24 @@ unit cpubase;
     function cgsize2subreg(regtype: tregistertype; s:Tcgsize):Tsubregister;
     function cgsize2subreg(regtype: tregistertype; s:Tcgsize):Tsubregister;
       begin
       begin
         case regtype of
         case regtype of
+          R_INTREGISTER:
+            begin
+              case s of
+                { there's only Wn and Xn }
+                OS_64,
+                OS_S64:
+                  cgsize2subreg:=R_SUBWHOLE;
+                else
+                  cgsize2subreg:=R_SUBD;
+                end;
+            end;
           R_MMREGISTER:
           R_MMREGISTER:
             begin
             begin
               case s of
               case s of
                 OS_F32:
                 OS_F32:
-                  cgsize2subreg:=R_SUBFS;
+                  cgsize2subreg:=R_SUBMMS;
                 OS_F64:
                 OS_F64:
-                  cgsize2subreg:=R_SUBFD;
+                  cgsize2subreg:=R_SUBMMD;
                 else
                 else
                   internalerror(2009112701);
                   internalerror(2009112701);
               end;
               end;
@@ -349,18 +401,22 @@ unit cpubase;
     function reg_cgsize(const reg: tregister): tcgsize;
     function reg_cgsize(const reg: tregister): tcgsize;
       begin
       begin
         case getregtype(reg) of
         case getregtype(reg) of
-          R_INTREGISTER :
-            reg_cgsize:=OS_32;
-          R_FPUREGISTER :
-            reg_cgsize:=OS_F80;
+          R_INTREGISTER:
+            case getsubreg(reg) of
+              R_SUBD:
+                result:=OS_32
+              else
+                result:=OS_64;
+            end;
           R_MMREGISTER :
           R_MMREGISTER :
             begin
             begin
               case getsubreg(reg) of
               case getsubreg(reg) of
-                R_SUBFD,
-                R_SUBWHOLE:
+                R_SUBMMD:
                   result:=OS_F64;
                   result:=OS_F64;
-                R_SUBFS:
+                R_SUBMMS:
                   result:=OS_F32;
                   result:=OS_F32;
+                R_SUBMMWHOLE:
+                  result:=OS_M128;
                 else
                 else
                   internalerror(2009112903);
                   internalerror(2009112903);
               end;
               end;
@@ -373,9 +429,7 @@ unit cpubase;
 
 
     function is_calljmp(o:tasmop):boolean;{$ifdef USEINLINE}inline;{$endif USEINLINE}
     function is_calljmp(o:tasmop):boolean;{$ifdef USEINLINE}inline;{$endif USEINLINE}
       begin
       begin
-        { This isn't 100% perfect because the arm allows jumps also by writing to PC=R15.
-          To overcome this problem we simply forbid that FPC generates jumps by loading R15 }
-        is_calljmp:= o in [A_B,A_BLR,A_RET];
+        is_calljmp:=o in [A_B,A_BL,A_BLR,A_RET,A_CBNZ,A_CBZ];
       end;
       end;
 
 
 
 
@@ -391,8 +445,8 @@ unit cpubase;
 
 
     function flags_to_cond(const f: TResFlags) : TAsmCond;
     function flags_to_cond(const f: TResFlags) : TAsmCond;
       const
       const
-        flag_2_cond: array[F_EQ..F_LE] of TAsmCond =
-          (C_EQ,C_NE,C_HI,C_LO,C_MI,C_PL,C_VS,C_VC,C_HI,C_LS,
+        flag_2_cond: array[TResFlags] of TAsmCond =
+          (C_EQ,C_NE,C_HS,C_LO,C_MI,C_PL,C_VS,C_VC,C_HI,C_LS,
            C_GE,C_LT,C_GT,C_LE);
            C_GE,C_LT,C_GT,C_LE);
       begin
       begin
         if f>high(flag_2_cond) then
         if f>high(flag_2_cond) then
@@ -434,7 +488,7 @@ unit cpubase;
     function inverse_cond(const c: TAsmCond): TAsmCond; {$ifdef USEINLINE}inline;{$endif USEINLINE}
     function inverse_cond(const c: TAsmCond): TAsmCond; {$ifdef USEINLINE}inline;{$endif USEINLINE}
       const
       const
         inverse: array[TAsmCond] of TAsmCond=(C_None,
         inverse: array[TAsmCond] of TAsmCond=(C_None,
-          C_NE,C_EQ,C_LO,C_HI,C_PL,C_MI,C_VC,C_VS,C_LS,C_HI,
+          C_NE,C_EQ,C_LO,C_HS,C_PL,C_MI,C_VC,C_VS,C_LS,C_HI,
           C_LT,C_GE,C_LE,C_GT,C_None,C_None
           C_LT,C_GE,C_LE,C_GT,C_None,C_None
         );
         );
       begin
       begin
@@ -456,4 +510,112 @@ unit cpubase;
       end;
       end;
 
 
 
 
+    function is_shifter_const(d: aint; size: tcgsize): boolean;
+      var
+         pattern, checkpattern: qword;
+         patternlen, maxbits, replicatedlen: longint;
+         rightmostone, rightmostzero, checkbit, secondrightmostbit: longint;
+      begin
+        result:=false;
+        { patterns with all bits 0 or 1 cannot be represented this way }
+        if (d=0) then
+          exit;
+        case size of
+          OS_64,
+          OS_S64:
+            begin
+              if d=-1 then
+                exit;
+              maxbits:=64;
+            end
+          else
+            begin
+              if longint(d)=-1 then
+                exit;
+              { we'll generate a 32 bit pattern -> ignore upper sign bits in
+                case of negative longint value }
+              d:=cardinal(d);
+              maxbits:=32;
+            end;
+        end;
+        { "The Logical (immediate) instructions accept a bitmask immediate value
+          that is a 32-bit pattern or a 64-bit pattern viewed as a vector of
+          identical elements of size e = 2, 4, 8, 16, 32 or, 64 bits. Each
+          element contains the same sub-pattern, that is a single run of
+          1 to (e - 1) nonzero bits from bit 0 followed by zero bits, then
+          rotated by 0 to (e - 1) bits." (ARMv8 ARM)
+
+          Rather than generating all possible patterns and checking whether they
+          match our constant, we check whether the lowest 2/4/8/... bits are
+          a valid pattern, and if so whether the constant consists of a
+          replication of this pattern. Such a valid pattern has the form of
+          either (regexp notation)
+            * 1+0+1*
+            * 0+1+0* }
+        patternlen:=2;
+        while patternlen<=maxbits do
+          begin
+            { try lowest <patternlen> bits of d as pattern }
+            if patternlen<>64 then
+              pattern:=qword(d) and ((qword(1) shl patternlen)-1)
+            else
+              pattern:=qword(d);
+            { valid pattern? If it contains too many 1<->0 transitions, larger
+              parts of d cannot be a valid pattern either }
+            rightmostone:=BsfQWord(pattern);
+            rightmostzero:=BsfQWord(not(pattern));
+            { pattern all ones or zeroes -> not a valid pattern (but larger ones
+              can still be valid, since we have too few transitions) }
+            if (rightmostone<patternlen) and
+               (rightmostzero<patternlen) then
+              begin
+                if rightmostone>rightmostzero then
+                  begin
+                    { we have .*1*0* -> check next zero position by shifting
+                      out the existing zeroes (shr rightmostone), inverting and
+                      then again looking for the rightmost one position }
+                    checkpattern:=not(pattern);
+                    checkbit:=rightmostone;
+                  end
+                else
+                  begin
+                    { same as above, but for .*0*1* }
+                    checkpattern:=pattern;
+                    checkbit:=rightmostzero;
+                  end;
+                secondrightmostbit:=BsfQWord(checkpattern shr checkbit)+checkbit;
+                { if this position is >= patternlen -> ok (1 transition),
+                  otherwise we now have 2 transitions and have to check for a
+                  third (if there is one, abort)
+
+                  bsf returns 255 if no 1 bit is found, so in that case it's
+                  also ok
+                  }
+                if secondrightmostbit<patternlen then
+                  begin
+                    secondrightmostbit:=BsfQWord(not(checkpattern) shr secondrightmostbit)+secondrightmostbit;
+                    if secondrightmostbit<patternlen then
+                      exit;
+                  end;
+                { ok, this is a valid pattern, now does d consist of a
+                  repetition of this pattern? }
+                replicatedlen:=patternlen;
+                checkpattern:=pattern;
+                while replicatedlen<maxbits do
+                  begin
+                    { douplicate current pattern }
+                    checkpattern:=checkpattern or (checkpattern shl replicatedlen);
+                    replicatedlen:=replicatedlen*2;
+                  end;
+                if qword(d)=checkpattern then
+                  begin
+                    { yes! }
+                    result:=true;
+                    exit;
+                  end;
+              end;
+            patternlen:=patternlen*2;
+          end;
+      end;
+
 end.
 end.

+ 17 - 2
compiler/aarch64/cpuinfo.pas

@@ -21,6 +21,9 @@ Interface
 
 
 Type
 Type
    bestreal = double;
    bestreal = double;
+{$if FPC_FULLVERSION>20700}
+   bestrealrec = TDoubleRec;
+{$endif FPC_FULLVERSION>20700}
    ts32real = single;
    ts32real = single;
    ts64real = double;
    ts64real = double;
    ts80real = type extended;
    ts80real = type extended;
@@ -47,6 +50,9 @@ Type
 
 
 
 
 Const
 Const
+   { Is there support for dealing with multiple microcontrollers available }
+   { for this platform? }
+   ControllerSupport = false; (* Not yet at least ;-) *)
    {# Size of native extended floating point type }
    {# Size of native extended floating point type }
    extended_size = 8;
    extended_size = 8;
    {# Size of a multimedia register               }
    {# Size of a multimedia register               }
@@ -54,6 +60,15 @@ Const
    { target cpu string (used by compiler options) }
    { target cpu string (used by compiler options) }
    target_cpu_string = 'aarch64';
    target_cpu_string = 'aarch64';
 
 
+   { We know that there are fields after sramsize
+     but we don't care about this warning }
+   {$PUSH}
+    {$WARN 3177 OFF}
+   embedded_controllers : array [tcontrollertype] of tcontrollerdatatype =
+   (
+      (controllertypestr:''; controllerunitstr:''; flashbase:0; flashsize:0; srambase:0; sramsize:0));
+   {$POP}
+
    { calling conventions supported by the code generator }
    { calling conventions supported by the code generator }
    supported_calling_conventions : tproccalloptions = [
    supported_calling_conventions : tproccalloptions = [
      pocall_internproc,
      pocall_internproc,
@@ -88,12 +103,12 @@ Const
                                  { no need to write info about those }
                                  { no need to write info about those }
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
                                  [cs_opt_regvar,cs_opt_loopunroll,cs_opt_tailrecursion,
                                  [cs_opt_regvar,cs_opt_loopunroll,cs_opt_tailrecursion,
-				  cs_opt_stackframe,cs_opt_nodecse,cs_opt_reorder_fields,cs_opt_fastmath];
+				  cs_opt_nodecse,cs_opt_reorder_fields,cs_opt_fastmath];
 
 
    level1optimizerswitches = genericlevel1optimizerswitches;
    level1optimizerswitches = genericlevel1optimizerswitches;
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches +
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches +
      [cs_opt_regvar,cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse];
      [cs_opt_regvar,cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse];
-   level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [cs_opt_scheduler{,cs_opt_loopunroll}];
+   level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [{,cs_opt_loopunroll}];
    level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [];
    level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [];
 
 
 Implementation
 Implementation

+ 40 - 0
compiler/aarch64/cpunode.pas

@@ -0,0 +1,40 @@
+{******************************************************************************
+    Copyright (c) 2014 by Florian Klaempfl
+
+    Includes the aarch64 code generator
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ *****************************************************************************}
+unit cpunode;
+
+{$I fpcdefs.inc}
+
+interface
+{ This unit is used to define the specific CPU implementations. All needed
+actions are included in the INITALIZATION part of these units. This explains
+the behaviour of such a unit having just a USES clause! }
+
+implementation
+
+  uses
+    ncgbas,ncgflw,ncgcal,ncgcnv,ncgld,ncgmem,ncgcon,ncgset,ncgobjc,
+    ncpuadd,ncpumat,ncpumem,ncpuinl,ncpucnv,ncpuset,
+    { this not really a node }
+    rgcpu,
+    { symtable }
+    symcpu;
+
+end.

+ 458 - 521
compiler/aarch64/cpupara.pas

@@ -1,5 +1,5 @@
 {
 {
-    Copyright (c) 2003-2012 by Florian Klaempfl and others
+    Copyright (c) 2013-2014 by Jonas Maebe, Florian Klaempfl and others
 
 
     AArch64 specific calling conventions
     AArch64 specific calling conventions
 
 
@@ -34,19 +34,24 @@ unit cpupara;
 
 
     type
     type
        tcpuparamanager = class(tparamanager)
        tcpuparamanager = class(tparamanager)
-          function get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;override;
-          function get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;override;
-          function get_volatile_registers_mm(calloption : tproccalloption):tcpuregisterset;override;
-          function push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;override;
-          function ret_in_param(def:tdef;pd:tabstractprocdef):boolean;override;
-          procedure getintparaloc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);override;
-          function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
-          function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
-          function get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
+          function get_volatile_registers_int(calloption: tproccalloption): tcpuregisterset; override;
+          function get_volatile_registers_fpu(calloption: tproccalloption): tcpuregisterset; override;
+          function get_volatile_registers_mm(calloption: tproccalloption): tcpuregisterset; override;
+          function push_addr_param(varspez: tvarspez; def: tdef; calloption: tproccalloption): boolean; override;
+          function ret_in_param(def: tdef; pd: tabstractprocdef):boolean;override;
+          function create_paraloc_info(p: tabstractprocdef; side: tcallercallee):longint;override;
+          function create_varargs_paraloc_info(p: tabstractprocdef; varargspara: tvarargsparalist):longint;override;
+          function get_funcretloc(p: tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
+          function param_use_paraloc(const cgpara: tcgpara): boolean; override;
          private
          private
-          procedure init_values(var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword; var sparesinglereg: tregister);
-          function create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
-            var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword; var sparesinglereg: tregister; isvariadic: boolean):longint;
+          curintreg,
+          curmmreg: tsuperregister;
+          curstackoffset: aword;
+
+          procedure init_para_alloc_values;
+          procedure alloc_para(out result: tcgpara; p: tabstractprocdef; varspez: tvarspez; side: tcallercallee; paradef: tdef; isvariadic, isdelphinestedcc: boolean);
+
+          procedure create_paraloc_info_intern(p: tabstractprocdef; side: tcallercallee; paras: tparalist; isvariadic: boolean);
        end;
        end;
 
 
   implementation
   implementation
@@ -56,6 +61,13 @@ unit cpupara;
        rgobj,
        rgobj,
        defutil,symsym,symtable;
        defutil,symsym,symtable;
 
 
+    const
+      RS_FIRST_INT_PARAM_SUPREG = RS_X0;
+      RS_LAST_INT_PARAM_SUPREG = RS_X7;
+      { Q0/D0/S0/H0/B0 all have the same superregister number }
+      RS_FIRST_MM_PARAM_SUPREG = RS_D0;
+      RS_LAST_MM_PARAM_SUPREG = RS_D7;
+
 
 
     function tcpuparamanager.get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;
     function tcpuparamanager.get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;
       begin
       begin
@@ -75,47 +87,90 @@ unit cpupara;
       end;
       end;
 
 
 
 
-    procedure tcpuparamanager.getintparaloc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);
+    function is_hfa_internal(p: tdef; var basedef: tdef; var elecount: longint): boolean;
       var
       var
-        paraloc : pcgparalocation;
-        def : tdef;
+        i: longint;
+        sym: tsym;
+        tmpelecount: longint;
       begin
       begin
-        if nr<1 then
-          internalerror(2002070801);
-        def:=tparavarsym(pd.paras[nr-1]).vardef;
-        cgpara.reset;
-        cgpara.size:=def_cgsize(def);
-        cgpara.intsize:=tcgsize2size[cgpara.size];
-        cgpara.alignment:=std_param_align;
-        cgpara.def:=def;
-        paraloc:=cgpara.add_location;
-        with paraloc^ do
-          begin
-            size:=OS_INT;
-            { the four first parameters are passed into registers }
-            if nr<=8 then
-              begin
-                loc:=LOC_REGISTER;
-                register:=newreg(R_INTREGISTER,RS_X0+nr-1,R_SUBWHOLE);
-              end
-            else
-              begin
-                { the other parameters are passed on the stack }
-                loc:=LOC_REFERENCE;
-                reference.index:=NR_STACK_POINTER_REG;
-                reference.offset:=(nr-9)*8;
-              end;
-          end;
+        result:=false;
+        case p.typ of
+          arraydef:
+            begin
+              if is_special_array(p) then
+                exit;
+              case tarraydef(p).elementdef.typ of
+                floatdef:
+                  begin
+                    { an array of empty records has no influence }
+                    if tarraydef(p).elementdef.size=0 then
+                      begin
+                        result:=true;
+                        exit
+                      end;
+                    tmpelecount:=0;
+                    if not is_hfa_internal(tarraydef(p).elementdef,basedef,tmpelecount) then
+                      exit;
+                    { tmpelecount now contains the number of hfa elements in a
+                      single array element (e.g. 2 if it's an array of a record
+                      containing two singles) -> multiply by number of elements
+                      in the array }
+                    inc(elecount,tarraydef(p).elecount*tmpelecount);
+                    if elecount>4 then
+                      exit;
+                  end;
+                else
+                  result:=is_hfa_internal(tarraydef(p).elementdef,basedef,elecount);
+                end;
+            end;
+          floatdef:
+            begin
+              if not assigned(basedef) then
+                basedef:=p
+              else if basedef<>p then
+                exit;
+              inc(elecount);
+              result:=true;
+            end;
+          recorddef:
+            begin
+              for i:=0 to tabstractrecorddef(p).symtable.symlist.count-1 do
+                begin
+                  sym:=tsym(tabstractrecorddef(p).symtable.symlist[i]);
+                  if sym.typ<>fieldvarsym then
+                    continue;
+                  if not is_hfa_internal(tfieldvarsym(sym).vardef,basedef,elecount) then
+                    exit
+                end;
+              result:=true;
+            end;
+          else
+            exit
+        end;
       end;
       end;
 
 
 
 
-    function Is_HFA(p : tdef) : boolean;
+    { Returns whether a def is a "homogeneous float array" at the machine level.
+      This means that in the memory layout, the def only consists of maximally
+      4 floating point values that appear consecutively in memory }
+    function is_hfa(p: tdef; out basedef: tdef) : boolean;
+      var
+        elecount: longint;
       begin
       begin
         result:=false;
         result:=false;
+        basedef:=nil;
+        elecount:=0;
+        result:=is_hfa_internal(p,basedef,elecount);
+        result:=
+          result and
+          (elecount>0) and
+          (p.size=basedef.size*elecount)
       end;
       end;
 
 
 
 
-    function getparaloc(calloption : tproccalloption; p : tdef; isvariadic: boolean) : tcgloc;
+    function getparaloc(calloption: tproccalloption; p: tdef): tcgloc;
+      var
+        hfabasedef: tdef;
       begin
       begin
          { Later, the LOC_REFERENCE is in most cases changed into LOC_REGISTER
          { Later, the LOC_REFERENCE is in most cases changed into LOC_REGISTER
            if push_addr_param for the def is true
            if push_addr_param for the def is true
@@ -124,7 +179,7 @@ unit cpupara;
             orddef:
             orddef:
               getparaloc:=LOC_REGISTER;
               getparaloc:=LOC_REGISTER;
             floatdef:
             floatdef:
-              getparaloc:=LOC_MMREGISTER
+              getparaloc:=LOC_MMREGISTER;
             enumdef:
             enumdef:
               getparaloc:=LOC_REGISTER;
               getparaloc:=LOC_REGISTER;
             pointerdef:
             pointerdef:
@@ -134,7 +189,10 @@ unit cpupara;
             classrefdef:
             classrefdef:
               getparaloc:=LOC_REGISTER;
               getparaloc:=LOC_REGISTER;
             recorddef:
             recorddef:
-              getparaloc:=LOC_REGISTER;
+              if not is_hfa(p,hfabasedef) then
+                getparaloc:=LOC_REGISTER
+              else
+                getparaloc:=LOC_MMREGISTER;
             objectdef:
             objectdef:
               getparaloc:=LOC_REGISTER;
               getparaloc:=LOC_REGISTER;
             stringdef:
             stringdef:
@@ -147,12 +205,12 @@ unit cpupara;
             filedef:
             filedef:
               getparaloc:=LOC_REGISTER;
               getparaloc:=LOC_REGISTER;
             arraydef:
             arraydef:
-              getparaloc:=LOC_REFERENCE;
-            setdef:
-              if is_smallset(p) then
+              if not is_hfa(p,hfabasedef) then
                 getparaloc:=LOC_REGISTER
                 getparaloc:=LOC_REGISTER
               else
               else
-                getparaloc:=LOC_REFERENCE;
+                getparaloc:=LOC_MMREGISTER;
+            setdef:
+              getparaloc:=LOC_REGISTER;
             variantdef:
             variantdef:
               getparaloc:=LOC_REGISTER;
               getparaloc:=LOC_REGISTER;
             { avoid problems with errornous definitions }
             { avoid problems with errornous definitions }
@@ -164,7 +222,9 @@ unit cpupara;
       end;
       end;
 
 
 
 
-    function tcpuparamanager.push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;
+    function tcpuparamanager.push_addr_param(varspez: tvarspez; def :tdef; calloption: tproccalloption): boolean;
+      var
+        hfabasedef: tdef;
       begin
       begin
         result:=false;
         result:=false;
         if varspez in [vs_var,vs_out,vs_constref] then
         if varspez in [vs_var,vs_out,vs_constref] then
@@ -174,19 +234,36 @@ unit cpupara;
           end;
           end;
         case def.typ of
         case def.typ of
           objectdef:
           objectdef:
-            result:=not(Is_HFA(def) and (is_object(def) and ((varspez=vs_const) or (def.size=0));
+            result:=is_object(def);
           recorddef:
           recorddef:
-            { note: should this ever be changed, make sure that const records
-                are always passed by reference for calloption=pocall_mwpascal }
-            result:=(varspez=vs_const) or (def.size=0);
+            { ABI: any composite > 16 bytes that not a hfa/hva
+              Special case: MWPascal, which passes all const parameters by
+                reference for compatibility reasons
+            }
+            result:=
+              ((varspez=vs_const) and
+               (calloption=pocall_mwpascal)) or
+              (not is_hfa(def,hfabasedef) and
+               (def.size>16));
           variantdef,
           variantdef,
           formaldef:
           formaldef:
             result:=true;
             result:=true;
+          { arrays are composites and hence treated the same as records by the
+            ABI (watch out for C, where an array is a pointer)
+            Also: all other platforms pass const arrays by reference. Do the
+              same here, because there is too much hacky code out there that
+              relies on this ("array[0..0] of x" passed as const parameter and
+              then indexed beyond its bounds) }
           arraydef:
           arraydef:
-            result:=(tarraydef(def).highrange>=tarraydef(def).lowrange) or
-                             is_open_array(def) or
-                             is_array_of_const(def) or
-                             is_array_constructor(def);
+            result:=
+              (calloption in cdecl_pocalls) or
+              is_open_array(def) or
+              is_array_of_const(def) or
+              is_array_constructor(def) or
+              ((tarraydef(def).highrange>=tarraydef(def).lowrange) and
+               ((varspez=vs_const) or
+                (not is_hfa(def,hfabasedef) and
+                 (def.size>16))));
           setdef :
           setdef :
             result:=def.size>16;
             result:=def.size>16;
           stringdef :
           stringdef :
@@ -195,511 +272,371 @@ unit cpupara;
       end;
       end;
 
 
 
 
-    function tcpuparamanager.ret_in_param(def:tdef;pd:tabstractprocdef):boolean;
-      var
-        i: longint;
-        sym: tsym;
-        fpufield: boolean;
+    function tcpuparamanager.ret_in_param(def: tdef; pd: tabstractprocdef): boolean;
       begin
       begin
         if handle_common_ret_in_param(def,pd,result) then
         if handle_common_ret_in_param(def,pd,result) then
           exit;
           exit;
-        case def.typ of
-          recorddef:
-            begin
-              result:=def.size>4;
-              if not result and
-                 (target_info.abi in [abi_default,abi_armeb]) then
-                begin
-                  { in case of the old ARM abi (APCS), a struct is returned in
-                    a register only if it is simple. And what is a (non-)simple
-                    struct:
-
-                    "A non-simple type is any non-floating-point type of size
-                     greater than one word (including structures containing only
-                     floating-point fields), and certain single-word structured
-                     types."
-                       (-- ARM APCS documentation)
-
-                    So only floating point types or more than one word ->
-                    definitely non-simple (more than one word is already
-                    checked above). This includes unions/variant records with
-                    overlaid floating point and integer fields.
-
-                    Smaller than one word struct types are simple if they are
-                    "integer-like", and:
-
-                    "A structure is termed integer-like if its size is less than
-                    or equal to one word, and the offset of each of its
-                    addressable subfields is zero."
-                      (-- ARM APCS documentation)
-
-                    An "addressable subfield" is a field of which you can take
-                    the address, which in practive means any non-bitfield.
-                    In Pascal, there is no way to express the difference that
-                    you can have in C between "char" and "int :8". In this
-                    context, we use the fake distinction that a type defined
-                    inside the record itself (such as "a: 0..255;") indicates
-                    a bitpacked field while a field using a different type
-                    (such as "a: byte;") is not.
-                  }
-                  for i:=0 to trecorddef(def).symtable.SymList.count-1 do
-                    begin
-                      sym:=tsym(trecorddef(def).symtable.SymList[i]);
-                      if sym.typ<>fieldvarsym then
-                        continue;
-                      { bitfield -> ignore }
-                      if (trecordsymtable(trecorddef(def).symtable).usefieldalignment=bit_alignment) and
-                         (tfieldvarsym(sym).vardef.typ in [orddef,enumdef]) and
-                         (tfieldvarsym(sym).vardef.owner.defowner=def) then
-                        continue;
-                      { all other fields must be at offset zero }
-                      if tfieldvarsym(sym).fieldoffset<>0 then
-                        begin
-                          result:=true;
-                          exit;
-                        end;
-                      { floating point field -> also by reference }
-                      if tfieldvarsym(sym).vardef.typ=floatdef then
-                        begin
-                          result:=true;
-                          exit;
-                        end;
-                    end;
-                end;
-            end;
-          procvardef:
-            if not tprocvardef(def).is_addressonly then
-              result:=true
-            else
-              result:=false
-          else
-            result:=inherited ret_in_param(def,pd);
-        end;
+        { ABI: if the parameter would be passed in registers, it is returned
+            in those registers; otherwise, it's returned by reference }
+        result:=push_addr_param(vs_value,def,pd.proccalloption);
       end;
       end;
 
 
 
 
-    procedure tcpuparamanager.init_values(var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword; var sparesinglereg: tregister);
-      begin
-        curintreg:=RS_R0;
-        curfloatreg:=RS_F0;
-        curmmreg:=RS_D0;
-        cur_stack_offset:=0;
-        sparesinglereg := NR_NO;
-      end;
-
-
-    function tcpuparamanager.create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
-        var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword; var sparesinglereg: tregister; isvariadic: boolean):longint;
-
+    procedure tcpuparamanager.create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist; isvariadic: boolean);
       var
       var
-        nextintreg,nextfloatreg,nextmmreg : tsuperregister;
-        paradef : tdef;
-        paraloc : pcgparalocation;
-        stack_offset : aword;
-        hp : tparavarsym;
-        loc : tcgloc;
-        paracgsize   : tcgsize;
-        paralen : longint;
-        i : integer;
-        firstparaloc: boolean;
-
-      procedure assignintreg;
-        begin
-          { In case of po_delphi_nested_cc, the parent frame pointer
-            is always passed on the stack. }
-           if (nextintreg<=RS_R3) and
-              (not(vo_is_parentfp in hp.varoptions) or
-               not(po_delphi_nested_cc in p.procoptions)) then
-             begin
-               paraloc^.loc:=LOC_REGISTER;
-               paraloc^.register:=newreg(R_INTREGISTER,nextintreg,R_SUBWHOLE);
-               inc(nextintreg);
-             end
-           else
-             begin
-               paraloc^.loc:=LOC_REFERENCE;
-               paraloc^.reference.index:=NR_STACK_POINTER_REG;
-               paraloc^.reference.offset:=stack_offset;
-               inc(stack_offset,4);
-            end;
-        end;
-
-
+        hp: tparavarsym;
+        i: longint;
       begin
       begin
-        result:=0;
-        nextintreg:=curintreg;
-        nextfloatreg:=curfloatreg;
-        nextmmreg:=curmmreg;
-        stack_offset:=cur_stack_offset;
-
         for i:=0 to paras.count-1 do
         for i:=0 to paras.count-1 do
           begin
           begin
             hp:=tparavarsym(paras[i]);
             hp:=tparavarsym(paras[i]);
-            paradef:=hp.vardef;
-
-            hp.paraloc[side].reset;
-
-            { currently only support C-style array of const,
-              there should be no location assigned to the vararg array itself }
-            if (p.proccalloption in cstylearrayofconst) and
-               is_array_of_const(paradef) then
-              begin
-                paraloc:=hp.paraloc[side].add_location;
-                { hack: the paraloc must be valid, but is not actually used }
-                paraloc^.loc:=LOC_REGISTER;
-                paraloc^.register:=NR_R0;
-                paraloc^.size:=OS_ADDR;
-                break;
-              end;
-
-            if push_addr_param(hp.varspez,paradef,p.proccalloption) then
-              begin
-                paradef:=getpointerdef(paradef);
-                loc:=LOC_REGISTER;
-                paracgsize := OS_ADDR;
-                paralen := tcgsize2size[OS_ADDR];
-              end
-            else
+            { hidden function result parameter is passed in X8 (doesn't have to
+              be valid on return) according to the ABI
+
+              -- don't follow the ABI for managed types, because
+               a) they are passed in registers as parameters, so we should also
+                  return them in a register to be ABI-compliant (which we can't
+                  because the entire compiler is built around the idea that
+                  they are returned by reference, for ref-counting performance
+                  and Delphi-compatibility reasons)
+               b) there are hacks in the system unit that expect that you can
+                  call
+                    function f: com_interface;
+                  as
+                    procedure p(out o: obj);
+                  That can only work in case we do not use x8 to return them
+                  from the function, but the regular first parameter register.
+
+              As the ABI says this behaviour is ok for C++ classes with a
+              non-trivial copy constructor or destructor, it seems reasonable
+              for us to do this for managed types as well.}
+            if (vo_is_funcret in hp.varoptions) and
+               not is_managed_type(hp.vardef) then
               begin
               begin
-                if not is_special_array(paradef) then
-                  paralen := paradef.size
-                else
-                  paralen := tcgsize2size[def_cgsize(paradef)];
-                loc := getparaloc(p.proccalloption,paradef,isvariadic);
-                if (paradef.typ in [objectdef,arraydef,recorddef]) and
-                  not is_special_array(paradef) and
-                  (hp.varspez in [vs_value,vs_const]) then
-                  paracgsize := int_cgsize(paralen)
-                else
+                hp.paraloc[side].reset;
+                hp.paraloc[side].size:=OS_ADDR;
+                hp.paraloc[side].alignment:=voidpointertype.alignment;
+                hp.paraloc[side].intsize:=voidpointertype.size;
+                hp.paraloc[side].def:=getpointerdef(hp.vardef);
+                with hp.paraloc[side].add_location^ do
                   begin
                   begin
-                    paracgsize:=def_cgsize(paradef);
-                    { for things like formaldef }
-                    if (paracgsize=OS_NO) then
-                      begin
-                        paracgsize:=OS_ADDR;
-                        paralen:=tcgsize2size[OS_ADDR];
-                        paradef:=voidpointertype;
-                      end;
+                    size:=OS_ADDR;
+                    def:=hp.paraloc[side].def;
+                    loc:=LOC_REGISTER;
+                    register:=NR_X8;
                   end
                   end
-              end;
-
-             hp.paraloc[side].size:=paracgsize;
-             hp.paraloc[side].Alignment:=std_param_align;
-             hp.paraloc[side].intsize:=paralen;
-             hp.paraloc[side].def:=paradef;
-             firstparaloc:=true;
-
-{$ifdef EXTDEBUG}
-             if paralen=0 then
-               internalerror(200410311);
-{$endif EXTDEBUG}
-             while paralen>0 do
-               begin
-                 paraloc:=hp.paraloc[side].add_location;
-
-                 if (loc=LOC_REGISTER) and (paracgsize in [OS_F32,OS_F64,OS_F80]) then
-                   case paracgsize of
-                     OS_F32:
-                       paraloc^.size:=OS_32;
-                     OS_F64:
-                       paraloc^.size:=OS_32;
-                     else
-                       internalerror(2005082901);
-                   end
-                 else if (paracgsize in [OS_NO,OS_64,OS_S64]) then
-                   paraloc^.size := OS_32
-                 else
-                   paraloc^.size:=paracgsize;
-                 case loc of
-                    LOC_REGISTER:
-                      begin
-                        { align registers for eabi }
-                        if (target_info.abi in [abi_eabi,abi_eabihf]) and
-                           firstparaloc and
-                           (paradef.alignment=8) then
-                          begin
-                            if (nextintreg in [RS_R1,RS_R3]) then
-                              inc(nextintreg)
-                            else if nextintreg>RS_R3 then
-                              stack_offset:=align(stack_offset,8);
-                          end;
-                        { this is not abi compliant
-                          why? (FK) }
-                        if nextintreg<=RS_R3 then
-                          begin
-                            paraloc^.loc:=LOC_REGISTER;
-                            paraloc^.register:=newreg(R_INTREGISTER,nextintreg,R_SUBWHOLE);
-                            inc(nextintreg);
-                          end
-                        else
-                          begin
-                            { LOC_REFERENCE always contains everything that's left }
-                            paraloc^.loc:=LOC_REFERENCE;
-                            paraloc^.size:=int_cgsize(paralen);
-                            if (side=callerside) then
-                              paraloc^.reference.index:=NR_STACK_POINTER_REG;
-                            paraloc^.reference.offset:=stack_offset;
-                            inc(stack_offset,align(paralen,4));
-                            paralen:=0;
-                         end;
-                      end;
-                    LOC_FPUREGISTER:
-                      begin
-                        if nextfloatreg<=RS_F3 then
-                          begin
-                            paraloc^.loc:=LOC_FPUREGISTER;
-                            paraloc^.register:=newreg(R_FPUREGISTER,nextfloatreg,R_SUBWHOLE);
-                            inc(nextfloatreg);
-                          end
-                        else
-                          begin
-                            paraloc^.loc:=LOC_REFERENCE;
-                            paraloc^.reference.index:=NR_STACK_POINTER_REG;
-                            paraloc^.reference.offset:=stack_offset;
-                            case paraloc^.size of
-                              OS_F32:
-                                inc(stack_offset,4);
-                              OS_F64:
-                                inc(stack_offset,8);
-                              OS_F80:
-                                inc(stack_offset,10);
-                              OS_F128:
-                                inc(stack_offset,16);
-                              else
-                                internalerror(200403201);
-                            end;
-                          end;
-                      end;
-                    LOC_MMREGISTER:
-                      begin
-                        if (nextmmreg<=RS_D7) or
-                           ((paraloc^.size = OS_F32) and
-                            (sparesinglereg<>NR_NO)) then
-                          begin
-                            paraloc^.loc:=LOC_MMREGISTER;
-                            case paraloc^.size of
-                              OS_F32:
-                                if sparesinglereg = NR_NO then 
-                                  begin     
-                                    paraloc^.register:=newreg(R_MMREGISTER,nextmmreg,R_SUBFS);
-                                    sparesinglereg:=newreg(R_MMREGISTER,nextmmreg-RS_S0+RS_S1,R_SUBFS);
-                                    inc(nextmmreg);
-                                  end
-                                else
-                                  begin
-                                    paraloc^.register:=sparesinglereg;
-                                    sparesinglereg := NR_NO;
-                                  end;
-                              OS_F64:
-                                begin
-                                  paraloc^.register:=newreg(R_MMREGISTER,nextmmreg,R_SUBFD);
-                                  inc(nextmmreg);
-                                end;
-                              else
-                                internalerror(2012031601);
-                            end;
-                          end
-                        else
-                          begin
-                            { once a floating point parameters has been placed
-                            on the stack we must not pass any more in vfp regs
-                            even if there is a single precision register still
-                            free}
-                            sparesinglereg := NR_NO;
-                            { LOC_REFERENCE always contains everything that's left }
-                            paraloc^.loc:=LOC_REFERENCE;
-                            paraloc^.size:=int_cgsize(paralen);
-                            if (side=callerside) then
-                              paraloc^.reference.index:=NR_STACK_POINTER_REG;
-                            paraloc^.reference.offset:=stack_offset;
-                            inc(stack_offset,align(paralen,4));
-                            paralen:=0;
-                         end;
-                      end;
-                    LOC_REFERENCE:
-                      begin
-                        if push_addr_param(hp.varspez,paradef,p.proccalloption) then
-                          begin
-                            paraloc^.size:=OS_ADDR;
-                            assignintreg
-                          end
-                        else
-                          begin
-                            { align stack for eabi }
-                            if (target_info.abi in [abi_eabi,abi_eabihf]) and
-                               firstparaloc and
-                               (paradef.alignment=8) then
-                              stack_offset:=align(stack_offset,8);
-
-                             paraloc^.size:=paracgsize;
-                             paraloc^.loc:=LOC_REFERENCE;
-                             paraloc^.reference.index:=NR_STACK_POINTER_REG;
-                             paraloc^.reference.offset:=stack_offset;
-                             inc(stack_offset,align(paralen,4));
-                             paralen:=0
-                          end;
-                      end;
-                    else
-                      internalerror(2002071002);
-                 end;
-                 if side=calleeside then
-                   begin
-                     if paraloc^.loc=LOC_REFERENCE then
-                       begin
-                         paraloc^.reference.index:=NR_FRAME_POINTER_REG;
-                         { on non-Darwin, the framepointer contains the value
-                           of the stack pointer on entry. On Darwin, the
-                           framepointer points to the previously saved
-                           framepointer (which is followed only by the saved
-                           return address -> framepointer + 4 = stack pointer
-                           on entry }
-                         if not(target_info.system in systems_darwin) then
-                           inc(paraloc^.reference.offset,4)
-                         else
-                           inc(paraloc^.reference.offset,8);
-                       end;
-                   end;
-                 dec(paralen,tcgsize2size[paraloc^.size]);
-                 firstparaloc:=false
-               end;
+              end
+            else
+              alloc_para(hp.paraloc[side],p,hp.varspez,side,hp.vardef,isvariadic,
+                (vo_is_parentfp in hp.varoptions) and
+                (po_delphi_nested_cc in p.procoptions));
           end;
           end;
-        curintreg:=nextintreg;
-        curfloatreg:=nextfloatreg;
-        curmmreg:=nextmmreg;
-        cur_stack_offset:=stack_offset;
-        result:=cur_stack_offset;
       end;
       end;
 
 
 
 
     function  tcpuparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;
     function  tcpuparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;
       var
       var
-        paraloc : pcgparalocation;
-        retcgsize  : tcgsize;
+        retcgsize: tcgsize;
       begin
       begin
          if set_common_funcretloc_info(p,forcetempdef,retcgsize,result) then
          if set_common_funcretloc_info(p,forcetempdef,retcgsize,result) then
            exit;
            exit;
 
 
-        paraloc:=result.add_location;
-        { Return in FPU register? }
-        if result.def.typ=floatdef then
+         { in this case, it must be returned in registers as if it were passed
+           as the first parameter }
+         init_para_alloc_values;
+         alloc_para(result,p,vs_value,side,result.def,false,false);
+         { sanity check (LOC_VOID for empty records) }
+         if not assigned(result.location) or
+            not(result.location^.loc in [LOC_REGISTER,LOC_MMREGISTER,LOC_VOID]) then
+           internalerror(2014113001);
+      end;
+
+
+    function tcpuparamanager.param_use_paraloc(const cgpara: tcgpara): boolean;
+      begin
+        { we always set up a stack frame -> we can always access the parameters
+          this way }
+        result:=
+          (cgpara.location^.loc=LOC_REFERENCE) and
+          not assigned(cgpara.location^.next);
+      end;
+
+
+    procedure tcpuparamanager.init_para_alloc_values;
+      begin
+        curintreg:=RS_FIRST_INT_PARAM_SUPREG;
+        curmmreg:=RS_FIRST_MM_PARAM_SUPREG;
+        curstackoffset:=0;
+      end;
+
+
+    procedure tcpuparamanager.alloc_para(out result: tcgpara; p: tabstractprocdef; varspez: tvarspez; side: tcallercallee; paradef: tdef; isvariadic, isdelphinestedcc: boolean);
+      var
+        hfabasedef, locdef: tdef;
+        paraloc: pcgparalocation;
+        paralen, stackslotlen: asizeint;
+        loc: tcgloc;
+        paracgsize, locsize: tcgsize;
+        firstparaloc: boolean;
+      begin
+        result.reset;
+
+        { currently only support C-style array of const,
+          there should be no location assigned to the vararg array itself }
+        if (p.proccalloption in cstylearrayofconst) and
+           is_array_of_const(paradef) then
           begin
           begin
-            if target_info.abi = abi_eabihf then 
-              begin
-                paraloc^.loc:=LOC_MMREGISTER;
-                case retcgsize of
-                  OS_64,
-                  OS_F64:
-                    begin
-                      paraloc^.register:=NR_MM_RESULT_REG;
-                    end;
-                  OS_32,
-                  OS_F32:
-                    begin
-                      paraloc^.register:=NR_S0;
-                    end;
-                  else
-                    internalerror(2012032501);
-                end;
-                paraloc^.size:=retcgsize;
-              end
-            else if (p.proccalloption in [pocall_softfloat]) or
-               (cs_fp_emulation in current_settings.moduleswitches) or
-               (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16,fpu_fpv4_s16]) then
-              begin
-                case retcgsize of
-                  OS_64,
-                  OS_F64:
-                    begin
-                      paraloc^.loc:=LOC_REGISTER;
-                      if target_info.endian = endian_big then
-                        paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG
-                      else
-                        paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG;
-                      paraloc^.size:=OS_32;
-                      paraloc:=result.add_location;
-                      paraloc^.loc:=LOC_REGISTER;
-                      if target_info.endian = endian_big then
-                        paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG
-                      else
-                        paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG;
-                      paraloc^.size:=OS_32;
-                    end;
-                  OS_32,
-                  OS_F32:
-                    begin
-                      paraloc^.loc:=LOC_REGISTER;
-                      paraloc^.register:=NR_FUNCTION_RETURN_REG;
-                      paraloc^.size:=OS_32;
-                    end;
-                  else
-                    internalerror(2005082603);
-                end;
-              end
-            else
-              begin
-                paraloc^.loc:=LOC_FPUREGISTER;
-                paraloc^.register:=NR_FPU_RESULT_REG;
-                paraloc^.size:=retcgsize;
-              end;
+            paraloc:=result.add_location;
+            { hack: the paraloc must be valid, but is not actually used }
+            paraloc^.loc:=LOC_REGISTER;
+            paraloc^.register:=NR_X0;
+            paraloc^.size:=OS_ADDR;
+            exit;
+          end;
+
+        if push_addr_param(varspez,paradef,p.proccalloption) then
+          begin
+            paradef:=getpointerdef(paradef);
+            loc:=LOC_REGISTER;
+            paracgsize:=OS_ADDR;
+            paralen:=tcgsize2size[OS_ADDR];
           end
           end
-          { Return in register }
         else
         else
           begin
           begin
-            if retcgsize in [OS_64,OS_S64] then
-              begin
-                paraloc^.loc:=LOC_REGISTER;
-                if target_info.endian = endian_big then
-                  paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG
-                else
-                  paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG;
-                paraloc^.size:=OS_32;
-                paraloc:=result.add_location;
-                paraloc^.loc:=LOC_REGISTER;
-                if target_info.endian = endian_big then
-                  paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG
-                else
-                  paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG;
-                paraloc^.size:=OS_32;
-              end
+            if not is_special_array(paradef) then
+              paralen:=paradef.size
+            else
+              paralen:=tcgsize2size[def_cgsize(paradef)];
+            loc:=getparaloc(p.proccalloption,paradef);
+            if (paradef.typ in [objectdef,arraydef,recorddef]) and
+               not is_special_array(paradef) and
+               (varspez in [vs_value,vs_const]) then
+              paracgsize:=int_cgsize(paralen)
             else
             else
               begin
               begin
-                paraloc^.loc:=LOC_REGISTER;
-                paraloc^.register:=NR_FUNCTION_RETURN_REG;
-                if (result.intsize<>3) then
-                  paraloc^.size:=retcgsize
-                else
-                  paraloc^.size:=OS_32;
-              end;
+                paracgsize:=def_cgsize(paradef);
+                { for things like formaldef }
+                if paracgsize=OS_NO then
+                  begin
+                    paracgsize:=OS_ADDR;
+                    paralen:=tcgsize2size[OS_ADDR];
+                    paradef:=voidpointertype;
+                  end;
+              end
           end;
           end;
+
+          { get hfa basedef if applicable }
+          if not is_hfa(paradef,hfabasedef) then
+            hfabasedef:=nil;
+
+         result.size:=paracgsize;
+         result.alignment:=std_param_align;
+         result.intsize:=paralen;
+         result.def:=paradef;
+
+         { empty record: skipped (explicitly defined by Apple ABI, undefined
+           by general ABI; libffi also skips them in all cases) }
+         if not is_special_array(paradef) and
+            (paradef.size=0) then
+           begin
+             paraloc:=result.add_location;
+             paraloc^.loc:=LOC_VOID;
+             paraloc^.def:=paradef;
+             paraloc^.size:=OS_NO;
+             exit;
+           end;
+
+         { sufficient registers left? }
+         case loc of
+           LOC_REGISTER:
+             begin
+               { In case of po_delphi_nested_cc, the parent frame pointer
+                 is always passed on the stack. }
+               if isdelphinestedcc then
+                 loc:=LOC_REFERENCE
+               else if curintreg+((paralen-1) shr 3)>RS_LAST_INT_PARAM_SUPREG then
+                 begin
+                   { not enough integer registers left -> no more register
+                     parameters, copy all to stack
+                   }
+                   curintreg:=succ(RS_LAST_INT_PARAM_SUPREG);
+                   loc:=LOC_REFERENCE;
+                 end;
+             end;
+           LOC_MMREGISTER:
+             begin;
+               { every hfa element must be passed in a separate register }
+               if (assigned(hfabasedef) and
+                   (curmmreg+(paralen div hfabasedef.size)>RS_LAST_MM_PARAM_SUPREG)) or
+                  (curmmreg+((paralen-1) shr 3)>RS_LAST_MM_PARAM_SUPREG) then
+                 begin
+                   { not enough mm registers left -> no more register
+                     parameters, copy all to stack
+                   }
+                   curmmreg:=succ(RS_LAST_MM_PARAM_SUPREG);
+                   loc:=LOC_REFERENCE;
+                 end;
+             end;
+         end;
+
+         { allocate registers/stack locations }
+         firstparaloc:=true;
+         repeat
+           paraloc:=result.add_location;
+
+           { set paraloc size/def }
+           if assigned(hfabasedef) then
+             begin
+               locsize:=def_cgsize(hfabasedef);
+               locdef:=hfabasedef;
+             end
+           { make sure we don't lose whether or not the type is signed }
+           else if (loc=LOC_REGISTER) and
+                   (paradef.typ<>orddef) then
+             begin
+               locsize:=int_cgsize(paralen);
+               locdef:=get_paraloc_def(paradef,paralen,firstparaloc);
+             end
+           else
+             begin
+               locsize:=paracgsize;
+               locdef:=paradef;
+             end;
+           if locsize in [OS_NO,OS_128,OS_S128] then
+             begin
+               if paralen>4 then
+                 begin
+                   paraloc^.size:=OS_INT;
+                   paraloc^.def:=u64inttype;
+                 end
+               else
+                 begin
+                   { for 3-byte records }
+                   paraloc^.size:=OS_32;
+                   paraloc^.def:=u32inttype;
+                 end;
+             end
+           else
+             begin
+               paraloc^.size:=locsize;
+               paraloc^.def:=locdef;
+             end;
+
+           { paraloc loc }
+           paraloc^.loc:=loc;
+
+           { assign register/stack address }
+           case loc of
+             LOC_REGISTER:
+               begin
+                 paraloc^.register:=newreg(R_INTREGISTER,curintreg,cgsize2subreg(R_INTREGISTER,paraloc^.size));
+                 inc(curintreg);
+                 dec(paralen,tcgsize2size[paraloc^.size]);
+
+                 { "The general ABI specifies that it is the callee's
+                    responsibility to sign or zero-extend arguments having fewer
+                    than 32 bits, and that unused bits in a register are
+                    unspecified. In iOS, however, the caller must perform such
+                    extensions, up to 32 bits." }
+                 if (target_info.abi=abi_aarch64_darwin) and
+                    (side=callerside) and
+                    is_ordinal(paradef) and
+                    (paradef.size<4) then
+                   paraloc^.size:=OS_32;
+
+                 { in case it's a composite, "The argument is passed as though
+                   it had been loaded into the registers from a double-word-
+                   aligned address with an appropriate sequence of LDR
+                   instructions loading consecutive registers from memory" ->
+                   in case of big endian, values in not completely filled
+                   registers must be shifted to the top bits }
+                 if (target_info.endian=endian_big) and
+                    not(paraloc^.size in [OS_64,OS_S64]) and
+                    (paradef.typ in [setdef,recorddef,arraydef,objectdef]) then
+                   paraloc^.shiftval:=-(8-tcgsize2size[paraloc^.size]);
+               end;
+             LOC_MMREGISTER:
+               begin
+                 paraloc^.register:=newreg(R_MMREGISTER,curmmreg,cgsize2subreg(R_MMREGISTER,paraloc^.size));
+                 inc(curmmreg);
+                 dec(paralen,tcgsize2size[paraloc^.size]);
+               end;
+             LOC_REFERENCE:
+               begin
+                  paraloc^.size:=paracgsize;
+                  paraloc^.loc:=LOC_REFERENCE;
+
+                  { the current stack offset may not be properly aligned in
+                    case we're on Darwin have allocated a non-variadic argument
+                    < 8 bytes previously }
+                  if target_info.abi=abi_aarch64_darwin then
+                    curstackoffset:=align(curstackoffset,paraloc^.def.alignment);
+
+                  { on Darwin, non-variadic arguments take up their actual size
+                    on the stack; on other platforms, they take up a multiple of
+                    8 bytes }
+                  if (target_info.abi=abi_aarch64_darwin) and
+                     not isvariadic then
+                    stackslotlen:=paralen
+                  else
+                    stackslotlen:=align(paralen,8);
+
+                  { from the ABI: if arguments occupy partial stack space, they
+                    have to occupy the lowest significant bits of a register
+                    containing that value which is then stored to memory ->
+                    in case of big endian, skip the alignment bytes (if any) }
+                  if target_info.endian=endian_little then
+                    paraloc^.reference.offset:=curstackoffset
+                  else
+                    paraloc^.reference.offset:=curstackoffset+stackslotlen-paralen;
+                  if side=callerside then
+                    paraloc^.reference.index:=NR_STACK_POINTER_REG
+                  else
+                    begin
+                      paraloc^.reference.index:=NR_FRAME_POINTER_REG;
+                      inc(paraloc^.reference.offset,16);
+                    end;
+                  inc(curstackoffset,stackslotlen);
+                  paralen:=0
+               end;
+             else
+               internalerror(2002071002);
+           end;
+         firstparaloc:=false;
+         { <=0 for sign/zero-extended locations }
+         until paralen<=0;
       end;
       end;
 
 
 
 
-    function tcpuparamanager.create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;
-      var
-        cur_stack_offset: aword;
-        curintreg, curfloatreg, curmmreg: tsuperregister;
-        sparesinglereg:tregister;
+    function tcpuparamanager.create_paraloc_info(p: tabstractprocdef; side: tcallercallee):longint;
       begin
       begin
-        init_values(curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg);
+        init_para_alloc_values;
 
 
-        result:=create_paraloc_info_intern(p,side,p.paras,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg,false);
+        create_paraloc_info_intern(p,side,p.paras,false);
+        result:=curstackoffset;
 
 
         create_funcretloc_info(p,side);
         create_funcretloc_info(p,side);
      end;
      end;
 
 
 
 
-    function tcpuparamanager.create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;
-      var
-        cur_stack_offset: aword;
-        curintreg, curfloatreg, curmmreg: tsuperregister;
-        sparesinglereg:tregister;
+    function tcpuparamanager.create_varargs_paraloc_info(p: tabstractprocdef; varargspara: tvarargsparalist):longint;
       begin
       begin
-        init_values(curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg);
+        init_para_alloc_values;
 
 
-        result:=create_paraloc_info_intern(p,callerside,p.paras,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg,true);
-        if (p.proccalloption in cstylearrayofconst) then
-          { just continue loading the parameters in the registers }
-          result:=create_paraloc_info_intern(p,callerside,varargspara,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg,true)
+        { non-variadic parameters }
+        create_paraloc_info_intern(p,callerside,p.paras,false);
+        if p.proccalloption in cstylearrayofconst then
+          begin
+            { on Darwin, we cannot use any registers for variadic parameters }
+            if target_info.abi=abi_aarch64_darwin then
+              begin
+                curintreg:=succ(RS_LAST_INT_PARAM_SUPREG);
+                curmmreg:=succ(RS_LAST_MM_PARAM_SUPREG);
+              end;
+            { continue loading the parameters  }
+            create_paraloc_info_intern(p,callerside,varargspara,true);
+            result:=curstackoffset;
+          end
         else
         else
           internalerror(200410231);
           internalerror(200410231);
       end;
       end;

+ 68 - 0
compiler/aarch64/cpupi.pas

@@ -0,0 +1,68 @@
+{
+    Copyright (c) 2002 by Florian Klaempfl
+
+    This unit contains the CPU specific part of tprocinfo
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit cpupi;
+
+{$i fpcdefs.inc}
+
+interface
+
+  uses
+    procinfo,
+    psub;
+
+  type
+    taarch64procinfo=class(tcgprocinfo)
+      constructor create(aparent: tprocinfo); override;
+      procedure set_first_temp_offset; override;
+    end;
+
+implementation
+
+  uses
+    tgobj,
+    cpubase;
+
+  constructor taarch64procinfo.create(aparent: tprocinfo);
+    begin
+      inherited;
+      { use the stack pointer as framepointer, because
+         1) we exactly know the offsets of the temps from the stack pointer
+            after pass 1 (based on the require parameter stack size for called
+            routines), while we don't know it for the frame pointer (it depends
+            on the number of saved registers)
+         2) temp offsets from the stack pointer are positive while those from
+            the frame pointer are negative, and we can directly encode much
+            bigger positive offsets in the instructions
+      }
+      framepointer:=NR_STACK_POINTER_REG;
+    end;
+
+  procedure taarch64procinfo.set_first_temp_offset;
+    begin
+     { leave room for allocated parameters }
+     tg.setfirsttemp(align(maxpushedparasize,16));
+    end;
+
+
+begin
+  cprocinfo:=taarch64procinfo;
+end.

+ 70 - 0
compiler/aarch64/cputarg.pas

@@ -0,0 +1,70 @@
+{
+    Copyright (c) 2001-2002 by Peter Vreman
+
+    Includes the AArch64 dependent target units
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit cputarg;
+
+{$i fpcdefs.inc}
+
+interface
+
+
+implementation
+
+    uses
+      systems { prevent a syntax error when nothing is included }
+
+{**************************************
+             Targets
+**************************************}
+
+    {$ifndef NOTARGETLINUX}
+//      ,t_linux
+    {$endif}
+    {$ifndef NOTARGETBSD}
+      ,t_bsd
+    {$endif}
+
+{**************************************
+             Assemblers
+**************************************}
+
+    {$ifndef NOAGCPUGAS}
+      ,agcpugas
+    {$endif}
+
+{**************************************
+        Assembler Readers
+**************************************}
+
+  {$ifndef NoRaarmgas}
+       ,racpugas
+  {$endif NoRaarmgas}
+
+{**************************************
+             Debuginfo
+**************************************}
+
+  {$ifndef NoDbgDwarf}
+      ,dbgdwarf
+  {$endif NoDbgDwarf}
+      ;
+
+end.

+ 228 - 0
compiler/aarch64/hlcgcpu.pas

@@ -0,0 +1,228 @@
+{
+    Copyright (c) 1998-2010 by Florian Klaempfl and Jonas Maebe
+    Member of the Free Pascal development team
+
+    This unit contains routines to create a pass-through high-level code
+    generator. This is used by most regular code generators.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+
+unit hlcgcpu;
+
+{$i fpcdefs.inc}
+
+interface
+
+  uses
+    symtype,
+    aasmdata,
+    symdef,
+    cgbase,cgutils,
+    hlcgobj, hlcg2ll;
+
+  type
+    thlcgaarch64 = class(thlcg2ll)
+      procedure a_load_subsetreg_reg(list: TAsmList; subsetsize, tosize: tdef; const sreg: tsubsetregister; destreg: tregister); override;
+      procedure a_load_subsetreg_subsetreg(list: TAsmlist; fromsubsetsize, tosubsetsize: tdef; const fromsreg, tosreg: tsubsetregister); override;
+
+      procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);override;
+     protected
+      procedure a_load_regconst_subsetreg_intern(list: TAsmList; fromsize, subsetsize: tdef; fromreg: tregister; const sreg: tsubsetregister; slopt: tsubsetloadopt); override;
+    end;
+
+  procedure create_hlcodegen;
+
+implementation
+
+  uses
+    verbose,globtype,fmodule,
+    aasmbase,aasmtai,
+    symconst,symsym,defutil,
+    cpubase,aasmcpu,parabase,
+    cgobj,cgcpu;
+
+  procedure thlcgaarch64.a_load_subsetreg_reg(list: TAsmList; subsetsize, tosize: tdef; const sreg: tsubsetregister; destreg: tregister);
+    var
+      op: tasmop;
+      tocgsize: tcgsize;
+      tmpdestreg: tregister;
+    begin
+      tocgsize:=def_cgsize(tosize);
+      if (sreg.startbit<>0) or
+         not(sreg.bitlen in [32,64]) then
+        begin
+          if is_signed(subsetsize) then
+            op:=A_SBFX
+          else
+            op:=A_UBFX;
+          { source and destination register of SBFX/UBFX have to be the same size }
+          if (sreg.subsetregsize in [OS_64,OS_S64]) and
+             not(tocgsize in [OS_64,OS_S64]) then
+            tmpdestreg:=cg.getintregister(list,OS_64)
+          else if not(sreg.subsetregsize in [OS_64,OS_S64]) and
+             (tocgsize in [OS_64,OS_S64]) then
+            tmpdestreg:=cg.getintregister(list,OS_32)
+          else
+            tmpdestreg:=destreg;
+          list.concat(taicpu.op_reg_reg_const_const(op,tmpdestreg,sreg.subsetreg,sreg.startbit,sreg.bitlen));
+          { need to sign extend further or truncate? }
+          if (sreg.subsetregsize=OS_S64) and
+             not(tocgsize in [OS_64,OS_S64]) then
+            cg.a_load_reg_reg(list,OS_S64,tocgsize,tmpdestreg,destreg)
+          else if is_signed(subsetsize) and
+             (tocgsize in [OS_8,OS_16]) then
+            cg.a_load_reg_reg(list,OS_32,tocgsize,tmpdestreg,destreg)
+          else if tmpdestreg<>destreg then
+            cg.a_load_reg_reg(list,def_cgsize(subsetsize),tocgsize,tmpdestreg,destreg)
+        end
+      else
+        cg.a_load_reg_reg(list,def_cgsize(subsetsize),tocgsize,sreg.subsetreg,destreg);
+    end;
+
+
+  procedure makeregssamesize(list: tasmlist; fromsize, tosize: tcgsize; orgfromreg, orgtoreg: tregister; out newfromreg, newtoreg: tregister);
+    begin
+      if (fromsize in [OS_S64,OS_64])<>
+         (tosize in [OS_S64,OS_64]) then
+        begin
+          newfromreg:=cg.makeregsize(list,orgfromreg,OS_64);
+          newtoreg:=cg.makeregsize(list,orgtoreg,OS_64);
+        end
+      else
+        begin
+          newfromreg:=orgfromreg;
+          newtoreg:=orgtoreg;
+        end;
+    end;
+
+
+  procedure thlcgaarch64.a_load_subsetreg_subsetreg(list: TAsmlist; fromsubsetsize, tosubsetsize: tdef; const fromsreg, tosreg: tsubsetregister);
+    var
+      fromreg, toreg: tregister;
+
+    begin
+      { BFM can only insert a bitfield that starts at position 0 in the source
+        source or destination register }
+      if (tosreg.startbit=0) and
+         (fromsreg.bitlen>=tosreg.bitlen) then
+        begin
+          makeregssamesize(list,fromsreg.subsetregsize,tosreg.subsetregsize,fromsreg.subsetreg,tosreg.subsetreg,fromreg,toreg);
+          list.concat(taicpu.op_reg_reg_const_const(A_BFXIL,toreg,fromreg,fromsreg.startbit,tosreg.bitlen))
+        end
+      else if (fromsreg.startbit=0) and
+         (fromsreg.bitlen>=tosreg.bitlen) then
+        begin
+          makeregssamesize(list,fromsreg.subsetregsize,tosreg.subsetregsize,fromsreg.subsetreg,tosreg.subsetreg,fromreg,toreg);
+          list.concat(taicpu.op_reg_reg_const_const(A_BFI,toreg,fromreg,tosreg.startbit,tosreg.bitlen))
+        end
+      else
+        inherited;
+    end;
+
+
+  procedure thlcgaarch64.g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);
+    var
+      make_global: boolean;
+      href: treference;
+      hsym: tsym;
+      paraloc: pcgparalocation;
+      op: tasmop;
+    begin
+      if not(procdef.proctypeoption in [potype_function,potype_procedure]) then
+        Internalerror(200006137);
+      if not assigned(procdef.struct) or
+         (procdef.procoptions*[po_classmethod, po_staticmethod,
+           po_methodpointer, po_interrupt, po_iocheck]<>[]) then
+        Internalerror(200006138);
+      if procdef.owner.symtabletype<>ObjectSymtable then
+        Internalerror(200109191);
+
+      make_global:=false;
+      if (not current_module.is_unit) or create_smartlink_library or
+         (procdef.owner.defowner.owner.symtabletype=globalsymtable) then
+        make_global:=true;
+
+      if make_global then
+        list.concat(Tai_symbol.Createname_global(labelname,AT_FUNCTION,0))
+      else
+        list.concat(Tai_symbol.Createname(labelname,AT_FUNCTION,0));
+
+      { set param1 interface to self  }
+      procdef.init_paraloc_info(callerside);
+      hsym:=tsym(procdef.parast.Find('self'));
+      if not(assigned(hsym) and
+        (hsym.typ=paravarsym)) then
+        internalerror(2010103101);
+      paraloc:=tparavarsym(hsym).paraloc[callerside].location;
+      if assigned(paraloc^.next) then
+        InternalError(2013020101);
+
+      case paraloc^.loc of
+        LOC_REGISTER:
+          tcgaarch64(cg).handle_reg_imm12_reg(list,A_SUB,paraloc^.size,paraloc^.register,ioffset,paraloc^.register,NR_IP0,false,true);
+        else
+          internalerror(2010103102);
+      end;
+
+      if (po_virtualmethod in procdef.procoptions) and
+          not is_objectpascal_helper(procdef.struct) then
+        begin
+          if (procdef.extnumber=$ffff) then
+            Internalerror(200006139);
+          { mov  0(%rdi),%rax ; load vmt}
+          reference_reset_base(href,voidpointertype,paraloc^.register,0,sizeof(pint));
+          getcpuregister(list,NR_IP0);
+          a_load_ref_reg(list,voidpointertype,voidpointertype,href,NR_IP0);
+          { jmp *vmtoffs(%eax) ; method offs }
+          reference_reset_base(href,voidpointertype,NR_IP0,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),sizeof(pint));
+          op:=A_LDR;
+          tcgaarch64(cg).make_simple_ref(list,op,OS_ADDR,PF_None,href,NR_IP0);
+          list.concat(taicpu.op_reg_ref(op,NR_IP0,href));
+          ungetcpuregister(list,NR_IP0);
+          list.concat(taicpu.op_reg(A_BR,NR_IP0));
+        end
+      else
+        cg.a_jmp_name(list,procdef.mangledname);
+      list.concat(Tai_symbol_end.Createname(labelname));
+    end;
+
+
+  procedure thlcgaarch64.a_load_regconst_subsetreg_intern(list: TAsmList; fromsize, subsetsize: tdef; fromreg: tregister; const sreg: tsubsetregister; slopt: tsubsetloadopt);
+    var
+      toreg: tregister;
+    begin
+      if slopt in [SL_SETZERO,SL_SETMAX] then
+        inherited
+      else if not(sreg.bitlen in [32,64]) then
+        begin
+          makeregssamesize(list,def_cgsize(fromsize),sreg.subsetregsize,fromreg,sreg.subsetreg,fromreg,toreg);
+          list.concat(taicpu.op_reg_reg_const_const(A_BFI,toreg,fromreg,sreg.startbit,sreg.bitlen))
+        end
+      else
+        a_load_reg_reg(list,fromsize,subsetsize,fromreg,sreg.subsetreg);
+    end;
+
+
+  procedure create_hlcodegen;
+    begin
+      hlcg:=thlcgaarch64.create;
+      create_codegen;
+    end;
+
+
+end.

+ 402 - 0
compiler/aarch64/ncpuadd.pas

@@ -0,0 +1,402 @@
+{
+    Copyright (c) 2014 Jonas Maebe
+
+    Code generation for add nodes on AArch64
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit ncpuadd;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+       node,ncgadd,cpubase;
+
+    type
+       taarch64addnode = class(tcgaddnode)
+       private
+          function  GetResFlags(unsigned:Boolean):TResFlags;
+          function  GetFPUResFlags:TResFlags;
+       protected
+          procedure second_addfloat;override;
+          procedure second_cmpfloat;override;
+          procedure second_cmpboolean;override;
+          procedure second_cmpsmallset;override;
+          procedure second_cmpordinal;override;
+          procedure second_addordinal;override;
+          procedure second_add64bit; override;
+          procedure second_cmp64bit; override;
+       public
+          function use_generic_mul32to64: boolean; override;
+       end;
+
+  implementation
+
+    uses
+      systems,
+      cutils,verbose,
+      paramgr,procinfo,
+      aasmtai,aasmdata,aasmcpu,defutil,
+      cgbase,cgcpu,cgutils,
+      cpupara,
+      ncon,nset,nadd,
+      hlcgobj, ncgutil,cgobj;
+
+{*****************************************************************************
+                               taarch64addnode
+*****************************************************************************}
+
+    function taarch64addnode.GetResFlags(unsigned:Boolean):TResFlags;
+      begin
+        case NodeType of
+          equaln:
+            GetResFlags:=F_EQ;
+          unequaln:
+            GetResFlags:=F_NE;
+          else
+            if not(unsigned) then
+              begin
+                if nf_swapped in flags then
+                  case NodeType of
+                    ltn:
+                      GetResFlags:=F_GT;
+                    lten:
+                      GetResFlags:=F_GE;
+                    gtn:
+                      GetResFlags:=F_LT;
+                    gten:
+                      GetResFlags:=F_LE;
+                    else
+                      internalerror(2014082010);
+                  end
+                else
+                  case NodeType of
+                    ltn:
+                      GetResFlags:=F_LT;
+                    lten:
+                      GetResFlags:=F_LE;
+                    gtn:
+                      GetResFlags:=F_GT;
+                    gten:
+                      GetResFlags:=F_GE;
+                    else
+                      internalerror(2014082011);
+                  end;
+              end
+            else
+              begin
+                if nf_swapped in Flags then
+                  case NodeType of
+                    ltn:
+                      GetResFlags:=F_HI;
+                    lten:
+                      GetResFlags:=F_HS;
+                    gtn:
+                      GetResFlags:=F_LO;
+                    gten:
+                      GetResFlags:=F_LS;
+                    else
+                      internalerror(2014082012);
+                  end
+                else
+                  case NodeType of
+                    ltn:
+                      GetResFlags:=F_LO;
+                    lten:
+                      GetResFlags:=F_LS;
+                    gtn:
+                      GetResFlags:=F_HI;
+                    gten:
+                      GetResFlags:=F_HS;
+                    else
+                      internalerror(2014082013);
+                  end;
+              end;
+        end;
+      end;
+
+
+    function taarch64addnode.GetFPUResFlags:TResFlags;
+      begin
+        case NodeType of
+          equaln:
+            result:=F_EQ;
+          unequaln:
+            result:=F_NE;
+          else
+            begin
+              if nf_swapped in Flags then
+                case NodeType of
+                  ltn:
+                    result:=F_GT;
+                  lten:
+                    result:=F_GE;
+                  gtn:
+                    result:=F_LO;
+                  gten:
+                    result:=F_LS;
+                  else
+                    internalerror(2014082014);
+                end
+              else
+                case NodeType of
+                  ltn:
+                    result:=F_LO;
+                  lten:
+                    result:=F_LS;
+                  gtn:
+                    result:=F_GT;
+                  gten:
+                    result:=F_GE;
+                  else
+                    internalerror(2014082015);
+                end;
+            end;
+        end;
+      end;
+
+
+    procedure taarch64addnode.second_addfloat;
+      var
+        op : TAsmOp;
+      begin
+        pass_left_right;
+        if nf_swapped in flags then
+          swapleftright;
+
+        { force fpureg as location, left right doesn't matter
+          as both will be in a fpureg }
+        hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+        hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
+
+        location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
+        location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+
+        case nodetype of
+          addn :
+            begin
+              op:=A_FADD;
+            end;
+          muln :
+            begin
+              op:=A_FMUL;
+            end;
+          subn :
+            begin
+              op:=A_FSUB;
+            end;
+          slashn :
+            begin
+              op:=A_FDIV;
+            end;
+          else
+            internalerror(200306014);
+        end;
+
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,
+           location.register,left.location.register,right.location.register));
+      end;
+
+
+    procedure taarch64addnode.second_cmpfloat;
+      begin
+        pass_left_right;
+        if nf_swapped in flags then
+          swapleftright;
+
+        { force fpureg as location, left right doesn't matter
+          as both will be in a fpureg }
+        hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+        hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
+
+        location_reset(location,LOC_FLAGS,OS_NO);
+        location.resflags:=getfpuresflags;
+
+        { signalling compare so we can get exceptions }
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCMPE,
+             left.location.register,right.location.register));
+      end;
+
+
+    procedure taarch64addnode.second_cmpboolean;
+      begin
+        pass_left_right;
+        force_reg_left_right(true,true);
+
+        if right.location.loc=LOC_CONSTANT then
+          begin
+            if right.location.value>=0 then
+              Tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,A_CMP,left.location.size,left.location.register,right.location.value,NR_XZR,NR_NO,false,false)
+            else
+              { avoid overflow if value=low(int64) }
+{$push}{$r-}{$q-}
+              Tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,A_CMN,left.location.size,left.location.register,-right.location.value,NR_XZR,NR_NO,false,false)
+{$pop}
+          end
+        else
+          current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,left.location.register,right.location.register));
+
+        location_reset(location,LOC_FLAGS,OS_NO);
+        location.resflags:=getresflags(true);
+      end;
+
+
+    procedure taarch64addnode.second_cmpsmallset;
+      var
+        tmpreg : tregister;
+        op: tasmop;
+      begin
+        pass_left_right;
+
+        location_reset(location,LOC_FLAGS,OS_NO);
+
+        force_reg_left_right(true,true);
+
+        if right.location.loc=LOC_CONSTANT then
+          begin
+            { when doing a cmp/cmn on 32 bit, we care whether the *lower 32 bit*
+              is a positive/negative value -> sign extend }
+            if not(right.location.size in [OS_64,OS_S64]) then
+              right.location.value:=longint(right.location.value);
+            if right.location.value>=0 then
+              op:=A_CMP
+            else
+              op:=A_CMN;
+          end
+        else
+          { for DFA }
+          op:=A_NONE;
+
+        case nodetype of
+          equaln,
+          unequaln:
+            begin
+              if right.location.loc=LOC_CONSTANT then
+                tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,op,def_cgsize(resultdef),left.location.register,abs(right.location.value),NR_XZR,NR_NO,false,false)
+              else
+                current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,left.location.register,right.location.register));
+              location.resflags:=getresflags(true);
+            end;
+          lten,
+          gten:
+            begin
+              if (not(nf_swapped in flags) and
+                  (nodetype=lten)) or
+                 ((nf_swapped in flags) and
+                  (nodetype=gten)) then
+                swapleftright;
+              { we can't handle left as a constant yet }
+              if left.location.loc=LOC_CONSTANT then
+                hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
+              tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,left.location.size);
+              if right.location.loc=LOC_CONSTANT then
+                begin
+                  hlcg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_AND,resultdef,right.location.value,left.location.register,tmpreg);
+                  tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,op,def_cgsize(resultdef),tmpreg,abs(right.location.value),NR_XZR,NR_NO,false,false)
+                end
+              else
+                begin
+                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_AND,tmpreg,left.location.register,right.location.register));
+                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,tmpreg,right.location.register));
+                end;
+              location.resflags:=F_EQ;
+            end;
+          else
+            internalerror(2012042701);
+        end;
+      end;
+
+
+    procedure taarch64addnode.second_cmpordinal;
+      var
+        unsigned : boolean;
+      begin
+        pass_left_right;
+        force_reg_left_right(true,true);
+
+        unsigned:=not(is_signed(left.resultdef)) or
+                  not(is_signed(right.resultdef));
+
+        if right.location.loc = LOC_CONSTANT then
+          begin
+            if right.location.value>=0 then
+              Tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,A_CMP,left.location.size,left.location.register,right.location.value,NR_XZR,NR_NO,false,false)
+            else
+{$push}{$r-}{$q-}
+              Tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,A_CMN,left.location.size,left.location.register,-right.location.value,NR_XZR,NR_NO,false,false)
+{$pop}
+          end
+        else
+          current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,left.location.register,right.location.register));
+
+        location_reset(location,LOC_FLAGS,OS_NO);
+        location.resflags:=getresflags(unsigned);
+      end;
+
+
+    procedure taarch64addnode.second_addordinal;
+      const
+        multops: array[boolean] of TAsmOp = (A_SMULL,A_UMULL);
+      var
+        unsigned: boolean;
+      begin
+        { 32x32->64 multiplication }
+        if (nodetype=muln) and
+           is_32bit(left.resultdef) and
+           is_32bit(right.resultdef) and
+           is_64bit(resultdef) then
+          begin
+            unsigned:=not(is_signed(left.resultdef)) or
+                      not(is_signed(right.resultdef));
+            pass_left_right;
+            force_reg_left_right(true,true);
+            { force_reg_left_right can leave right as a LOC_CONSTANT (we can't
+              say "a constant register is okay, but an ordinal constant isn't) }
+            if right.location.loc=LOC_CONSTANT then
+              hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
+            location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
+            location.register:=cg.getintregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
+            current_asmdata.CurrAsmList.Concat(taicpu.op_reg_reg_reg(multops[unsigned],location.register,left.location.register,right.location.register));
+          end
+        else
+          inherited second_addordinal;
+      end;
+
+
+    procedure taarch64addnode.second_add64bit;
+      begin
+        second_addordinal;
+      end;
+
+
+    procedure taarch64addnode.second_cmp64bit;
+      begin
+        second_cmpordinal;
+      end;
+
+
+    function taarch64addnode.use_generic_mul32to64: boolean;
+      begin
+        result:=false;
+      end;
+
+
+begin
+  caddnode:=taarch64addnode;
+end.

+ 201 - 0
compiler/aarch64/ncpucnv.pas

@@ -0,0 +1,201 @@
+{
+    Copyright (c) 2014 by Jonas Maebe
+
+    Generate AArch64 assembler for type converting nodes
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************}
+unit ncpucnv;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+      node,ncnv,ncgcnv;
+
+    type
+      taarch64typeconvnode = class(TCgTypeConvNode)
+        protected
+         function typecheck_int_to_real: tnode; override;
+         function first_int_to_real: tnode; override;
+
+        { procedure second_int_to_int;override; }
+        { procedure second_string_to_string;override; }
+        { procedure second_cstring_to_pchar;override; }
+        { procedure second_string_to_chararray;override; }
+        { procedure second_array_to_pointer;override; }
+        { procedure second_pointer_to_array;override; }
+        { procedure second_chararray_to_string;override; }
+        { procedure second_char_to_string;override; }
+         procedure second_int_to_real;override;
+        { procedure second_real_to_real;override; }
+        { procedure second_cord_to_pointer;override; }
+        { procedure second_proc_to_procvar;override; }
+        { procedure second_bool_to_int;override; }
+         procedure second_int_to_bool;override;
+        { procedure second_load_smallset;override;  }
+        { procedure second_ansistring_to_pchar;override; }
+        { procedure second_pchar_to_string;override; }
+        { procedure second_class_to_intf;override; }
+        { procedure second_char_to_char;override; }
+      end;
+
+implementation
+
+  uses
+    verbose,globals,
+    symdef,aasmdata,aasmbase,
+    defutil,
+    cgbase,cgutils,procinfo,
+    cpubase,aasmcpu,
+    pass_2,cgobj,
+    hlcgobj;
+
+
+{*****************************************************************************
+                             FirstTypeConv
+*****************************************************************************}
+
+  function taarch64typeconvnode.typecheck_int_to_real: tnode;
+    begin
+      { aarch64 supports converting everything to floating point, even fixed
+        point! Unfortunately, it only supports fixed point with a power-of-2
+        fraction, which is not the case for currency.
+
+        Generate the division by 10000 via nodes so the 10000.0 constant can
+        be reused. }
+      if is_currency(resultdef) and
+         not(nf_is_currency in flags) then
+        begin
+          { convert the equivalent int64 value to double without conversion
+            (internal typecast -> will set nf_is_currency flag) }
+          result:=ctypeconvnode.create_internal(left,s64floattype);
+          { turn into currency with conversion, which will divide by 10000
+            (regular typecast) }
+          result:=ctypeconvnode.create(result,s64currencytype);
+          exit;
+        end;
+      { The only other thing we have to take care of: convert values < 32 bit
+        to 32 bit }
+      if left.resultdef.size<4 then
+        begin
+          if is_signed(left.resultdef) then
+            inserttypeconv(left,s32inttype)
+          else
+            inserttypeconv(left,u32inttype)
+        end;
+      result:=inherited;
+    end;
+
+
+  function taarch64typeconvnode.first_int_to_real: tnode;
+    begin
+      result:=nil;
+      expectloc:=LOC_MMREGISTER;
+    end;
+
+
+{*****************************************************************************
+                             SecondTypeConv
+*****************************************************************************}
+
+  procedure taarch64typeconvnode.second_int_to_real;
+    var
+      op: tasmop;
+    begin
+      location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
+      location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+      hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
+      if not(left.location.loc in [LOC_REGISTER,LOC_CREGISTER]) then
+        internalerror(2014120401);
+      case left.location.size of
+        OS_32,
+        OS_64:
+          op:=A_UCVTF;
+        OS_S32,
+        OS_S64,
+        { for currency and comp }
+        OS_F64:
+          op:=A_SCVTF;
+        else
+          internalerror(2014120402);
+      end;
+      current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,location.register,left.location.register));
+      { no scaling for currency, that's handled in pass_typecheck }
+    end;
+
+
+  procedure taarch64typeconvnode.second_int_to_bool;
+    var
+      resflags: tresflags;
+      hlabel,oldTrueLabel,oldFalseLabel : tasmlabel;
+    begin
+      if (nf_explicit in flags) and
+         not(left.expectloc in [LOC_FLAGS,LOC_JUMP]) then
+        begin
+          inherited;
+          exit;
+        end;
+
+      { can't use the generic code, as it assumes that OP_OR automatically sets
+        the flags. We can also do things more efficiently directly }
+
+      oldTrueLabel:=current_procinfo.CurrTrueLabel;
+      oldFalseLabel:=current_procinfo.CurrFalseLabel;
+      current_asmdata.getjumplabel(current_procinfo.CurrTrueLabel);
+      current_asmdata.getjumplabel(current_procinfo.CurrFalseLabel);
+      secondpass(left);
+      if codegenerror then
+       exit;
+
+      case left.location.loc of
+        LOC_CREFERENCE,
+        LOC_REFERENCE,
+        LOC_REGISTER,
+        LOC_CREGISTER,
+        LOC_JUMP:
+          begin
+             hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
+             current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,left.location.register,0));
+             resflags:=F_NE;
+          end;
+        LOC_FLAGS :
+          resflags:=left.location.resflags;
+        else
+          internalerror(2014122902);
+      end;
+      { load flags to register }
+      location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
+      location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
+      if is_cbool(resultdef) then
+        begin
+          current_asmdata.CurrAsmList.concat(taicpu.op_reg_cond(A_CSETM,location.register,flags_to_cond(resflags)));
+            { truncate? (in case cbools are ever made unsigned) }
+            if resultdef.size<4 then
+              cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_32,location.size,location.register,location.register);
+        end
+      else
+        cg.g_flags2reg(current_asmdata.CurrAsmList,location.size,resflags,location.register);
+      cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
+      current_procinfo.CurrTrueLabel:=oldTrueLabel;
+      current_procinfo.CurrFalseLabel:=oldFalseLabel;
+    end;
+
+
+begin
+   ctypeconvnode:=taarch64typeconvnode;
+end.

+ 184 - 0
compiler/aarch64/ncpuinl.pas

@@ -0,0 +1,184 @@
+{
+    Copyright (c) 1998-2002 by Florian Klaempfl
+
+    Generates ARM inline nodes
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit ncpuinl;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+      node,ninl,ncginl;
+
+    type
+      taarch64inlinenode = class(tcgInlineNode)
+        function first_abs_real: tnode; override;
+        function first_sqr_real: tnode; override;
+        function first_sqrt_real: tnode; override;
+        function first_round_real: tnode; override;
+        function first_trunc_real: tnode; override;
+        procedure second_abs_real; override;
+        procedure second_sqr_real; override;
+        procedure second_sqrt_real; override;
+        procedure second_abs_long; override;
+        procedure second_round_real; override;
+        procedure second_trunc_real; override;
+        procedure second_get_frame; override;
+      private
+        procedure load_fpu_location;
+      end;
+
+
+implementation
+
+    uses
+      globtype,verbose,globals,
+      cpuinfo, defutil,symdef,aasmdata,aasmcpu,
+      cgbase,cgutils,pass_1,pass_2,
+      cpubase,ncgutil,cgobj,cgcpu, hlcgobj;
+
+{*****************************************************************************
+                              taarch64inlinenode
+*****************************************************************************}
+
+    procedure taarch64inlinenode.load_fpu_location;
+      begin
+        secondpass(left);
+        hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+        location_copy(location,left.location);
+        location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+        location.loc:=LOC_MMREGISTER;
+      end;
+
+
+    function taarch64inlinenode.first_abs_real : tnode;
+      begin
+        expectloc:=LOC_MMREGISTER;
+        result:=nil;
+      end;
+
+
+    function taarch64inlinenode.first_sqr_real : tnode;
+      begin
+        expectloc:=LOC_MMREGISTER;
+        result:=nil;
+      end;
+
+
+    function taarch64inlinenode.first_sqrt_real : tnode;
+      begin
+        expectloc:=LOC_MMREGISTER;
+        result:=nil;
+      end;
+
+
+    function taarch64inlinenode.first_round_real: tnode;
+      begin
+        expectloc:=LOC_MMREGISTER;
+        result:=nil;
+      end;
+
+
+    function taarch64inlinenode.first_trunc_real: tnode;
+      begin
+        expectloc:=LOC_MMREGISTER;
+        result:=nil;
+      end;
+
+
+    procedure taarch64inlinenode.second_abs_real;
+      begin
+        load_fpu_location;
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FABS,location.register,left.location.register));
+      end;
+
+
+    procedure taarch64inlinenode.second_sqr_real;
+      begin
+        load_fpu_location;
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_FMUL,location.register,left.location.register,left.location.register));
+      end;
+
+
+    procedure taarch64inlinenode.second_sqrt_real;
+      begin
+        load_fpu_location;
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FSQRT,location.register,left.location.register));
+      end;
+
+
+    procedure taarch64inlinenode.second_abs_long;
+      var
+        opsize : tcgsize;
+        hp : taicpu;
+      begin
+        secondpass(left);
+        opsize:=def_cgsize(left.resultdef);
+        hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
+        location:=left.location;
+        location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
+
+        current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_NEG,location.register,left.location.register),PF_S));
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_cond(A_CSEL,location.register,location.register,left.location.register,C_GE));
+      end;
+
+
+    procedure taarch64inlinenode.second_round_real;
+      var
+        hreg: tregister;
+      begin
+        secondpass(left);
+        hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+        location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
+        location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
+        hreg:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
+        { round as floating point using current rounding mode }
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FRINTX,hreg,left.location.register));
+        { convert to signed integer rounding towards zero (there's no "round to
+          integer using current rounding mode") }
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCVTZS,location.register,hreg));
+      end;
+
+
+    procedure taarch64inlinenode.second_trunc_real;
+      begin
+        secondpass(left);
+        hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+        location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
+        location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
+        { convert to signed integer rounding towards zero }
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCVTZS,location.register,left.location.register));
+      end;
+
+
+    procedure taarch64inlinenode.second_get_frame;
+      begin
+        location_reset(location,LOC_CREGISTER,OS_ADDR);
+        { this routine is used to get the frame pointer for backtracing
+          purposes. current_procinfo.framepointer is set to SP because that one
+          is used to access temps. On most platforms these two frame pointers
+          are the same, but not on AArch64. }
+        location.register:=NR_FRAME_POINTER_REG;
+      end;
+
+begin
+  cinlinenode:=taarch64inlinenode;
+end.

+ 196 - 0
compiler/aarch64/ncpumat.pas

@@ -0,0 +1,196 @@
+{
+    Copyright (c) 1998-2002, 2014 by Florian Klaempfl and Jonas Maebe
+
+    Generate AArch64 assembler for math nodes
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit ncpumat;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+      node,nmat,ncgmat;
+
+    type
+      taarch64moddivnode = class(tmoddivnode)
+         function pass_1: tnode; override;
+         procedure pass_generate_code;override;
+      end;
+
+      taarch64notnode = class(tcgnotnode)
+         procedure second_boolean;override;
+      end;
+
+      taarch64unaryminusnode = class(tcgunaryminusnode)
+         procedure second_float; override;
+      end;
+
+implementation
+
+    uses
+      globtype,systems,constexp,
+      cutils,verbose,globals,
+      symconst,symdef,
+      aasmbase,aasmcpu,aasmtai,aasmdata,
+      defutil,
+      cgbase,cgobj,hlcgobj,pass_2,procinfo,
+      ncon,
+      cpubase,
+      ncgutil,cgcpu,cgutils;
+
+{*****************************************************************************
+                             taarch64moddivnode
+*****************************************************************************}
+
+    function taarch64moddivnode.pass_1: tnode;
+      begin
+        result:=inherited pass_1;
+        if not assigned(result) then
+          include(current_procinfo.flags,pi_do_call);
+      end;
+
+
+    procedure taarch64moddivnode.pass_generate_code;
+      var
+         op         : tasmop;
+         tmpreg,
+         numerator,
+         divider,
+         resultreg  : tregister;
+         hl : tasmlabel;
+         overflowloc: tlocation;
+      begin
+       secondpass(left);
+       secondpass(right);
+
+       { set result location }
+       location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
+       location.register:=cg.getintregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
+       resultreg:=location.register;
+
+       { put numerator in register }
+       hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
+       numerator:=left.location.register;
+
+       { load divider in a register }
+       hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
+       divider:=right.location.register;
+
+       { start division }
+       if is_signed(left.resultdef) then
+         op:=A_SDIV
+       else
+         op:=A_UDIV;
+       current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,location.register,numerator,divider));
+
+       { no divide-by-zero detection available in hardware, emulate (if it's a
+         constant, this will have been detected earlier already) }
+       if (right.nodetype<>ordconstn) then
+         begin
+           current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,
+             right.location.register,0));
+
+           current_asmdata.getjumplabel(hl);
+           current_asmdata.CurrAsmList.concat(taicpu.op_cond_sym(A_B,C_NE,hl));
+           cg.a_call_name(current_asmdata.CurrAsmList,'FPC_DIVBYZERO',false);
+           cg.a_label(current_asmdata.CurrAsmList,hl);
+         end;
+
+       { in case of overflow checking, also check for low(int64) div (-1)
+         (no hardware support for this either) }
+       if (cs_check_overflow in current_settings.localswitches) and
+          is_signed(left.resultdef) and
+          ((right.nodetype<>ordconstn) or
+           (tordconstnode(right).value=-1)) then
+         begin
+           { num=ffff... and div=8000... <=>
+             num xor not(div xor 8000...) = 0
+             (and we have the "eon" operation, which performs "xor not(...)" }
+           tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,left.resultdef);
+           hlcg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_XOR,left.resultdef,low(int64),left.location.register,tmpreg);
+           current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_EON,
+             tmpreg,left.location.register,tmpreg));
+           current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,tmpreg,0));
+           { now the zero/equal flag is set in case we divided low(int64) by
+             (-1) }
+           location_reset(overflowloc,LOC_FLAGS,OS_NO);
+           overflowloc.resflags:=F_EQ;
+           cg.g_overflowcheck_loc(current_asmdata.CurrAsmList,location,resultdef,overflowloc);
+         end;
+
+       { in case of modulo, multiply result again by the divider and subtract
+         from the numerator }
+       if nodetype=modn then
+         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_reg(A_MSUB,resultreg,
+           resultreg,divider,numerator));
+    end;
+
+
+{*****************************************************************************
+                               taarch64notnode
+*****************************************************************************}
+
+    procedure taarch64notnode.second_boolean;
+      begin
+        if not handle_locjump then
+          begin
+            secondpass(left);
+            case left.location.loc of
+              LOC_FLAGS :
+                begin
+                  location_copy(location,left.location);
+                  inverse_flags(location.resflags);
+                end;
+              LOC_REGISTER, LOC_CREGISTER,
+              LOC_REFERENCE, LOC_CREFERENCE,
+              LOC_SUBSETREG, LOC_CSUBSETREG,
+              LOC_SUBSETREF, LOC_CSUBSETREF:
+                begin
+                  hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
+                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,
+                    left.location.register,0));
+                  location_reset(location,LOC_FLAGS,OS_NO);
+                  location.resflags:=F_EQ;
+               end;
+              else
+                internalerror(2003042401);
+            end;
+          end;
+      end;
+
+
+{*****************************************************************************
+                                   taarch64unaryminusnode
+*****************************************************************************}
+
+    procedure taarch64unaryminusnode.second_float;
+      begin
+        secondpass(left);
+        hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+        location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
+        location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FNEG,location.register,left.location.register));
+      end;
+
+begin
+   cmoddivnode:=taarch64moddivnode;
+   cnotnode:=taarch64notnode;
+   cunaryminusnode:=taarch64unaryminusnode;
+end.

+ 142 - 0
compiler/aarch64/ncpumem.pas

@@ -0,0 +1,142 @@
+{
+    Copyright (c) 2014 by Jonas Maebe
+
+    Generate AArch64 code for in memory related nodes
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit ncpumem;
+
+{$i fpcdefs.inc}
+
+interface
+
+  uses
+    globtype,
+    cgbase,
+    symtype,
+    node,nmem,ncgmem;
+
+  type
+    taarch64loadparentfpnode = class(tcgloadparentfpnode)
+      procedure pass_generate_code; override;
+    end;
+
+    taarch64vecnode = class(tcgvecnode)
+     protected
+      function valid_index_size(size: tcgsize): boolean; override;
+     public
+       procedure update_reference_reg_mul(maybe_const_reg: tregister; regsize: tdef; l: aint); override;
+    end;
+
+implementation
+
+  uses
+    cutils,verbose,
+    defutil,
+    aasmdata,cpubase,
+    cgutils,
+    cgobj;
+
+  { taarch64loadparentfpnode }
+
+  procedure taarch64loadparentfpnode.pass_generate_code;
+    begin
+      inherited pass_generate_code;
+      { see the comments in tcgaarch64.g_proc_entry }
+      if (location.loc in [LOC_REGISTER,LOC_CREGISTER]) and
+         (location.register=NR_STACK_POINTER_REG) then
+        if (kind=lpf_forpara) then
+          location.register:=NR_FRAME_POINTER_REG
+        else
+          begin
+            { load stack pointer in a different register, as many instructions
+              cannot directly work with the stack pointer. The register
+              allocator can merge them if possible }
+            location.register:=cg.getaddressregister(current_asmdata.CurrAsmList);
+            cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,NR_STACK_POINTER_REG,location.register);
+            location.loc:=LOC_REGISTER;
+          end;
+    end;
+
+
+  { taarch64vecnode }
+
+  function taarch64vecnode.valid_index_size(size: tcgsize): boolean;
+    begin
+      { all sizes are ok if we handle the "reference reg mul", because
+         a) we use a 64 bit register for 64 bit values, and a 32 bit one (that
+            we can sign/zero-extend inside the reference) for smaller values
+         b) for values < 32 bit, the entire 32 bit register always contains the
+            sign/zero-extended version of the value }
+      result:=
+        not is_packed_array(left.resultdef) and
+        (get_mul_size in [1,2,4,8,16]);
+    end;
+
+
+  procedure taarch64vecnode.update_reference_reg_mul(maybe_const_reg: tregister; regsize: tdef; l: aint);
+    var
+      base: tregister;
+      oldoffset: asizeint;
+      shift: byte;
+    begin
+      { we can only scale the index by shl 0..4 }
+      if not(l in [1,2,4,8,16]) then
+        begin
+          inherited;
+          exit;
+        end;
+      { we need a base set and an index available }
+      if (location.reference.base=NR_NO) or
+         (location.reference.index<>NR_NO) then
+        begin
+          { don't integrate the offset yet, make_simple_ref() may be able to
+            handle it more efficiently later (unless an offset is all we have
+            -> optimization for someone that wants to add support for AArch64
+            embedded targets) }
+          oldoffset:=location.reference.offset;
+          location.reference.offset:=0;
+          base:=cg.getaddressregister(current_asmdata.CurrAsmList);
+          cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,location.reference,base);
+          reference_reset_base(location.reference,base,oldoffset,location.reference.alignment);
+        end;
+      shift:=BsfDWord(l);
+      location.reference.index:=maybe_const_reg;
+      { sign/zero-extend? }
+      if regsize.size=8 then
+        if shift<>0 then
+          location.reference.shiftmode:=SM_LSL
+        else
+          location.reference.shiftmode:=SM_NONE
+      else if is_signed(regsize) then
+        location.reference.shiftmode:=SM_SXTW
+      else if shift<>0 then
+        location.reference.shiftmode:=SM_UXTW
+      else
+        { the upper 32 bits are always already zero-extended -> just use 64 bit
+          register }
+        location.reference.index:=cg.makeregsize(current_asmdata.CurrAsmList,location.reference.index,OS_64);
+      location.reference.shiftimm:=shift;
+      location.reference.alignment:=newalignment(location.reference.alignment,l);
+    end;
+
+
+begin
+  cloadparentfpnode:=taarch64loadparentfpnode;
+  cvecnode:=taarch64vecnode;
+end.

+ 175 - 0
compiler/aarch64/ncpuset.pas

@@ -0,0 +1,175 @@
+{
+    Copyright (c) 2015 by Jonas Maebe
+
+    Generate AArch64 assembler for in set/case nodes
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit ncpuset;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+       node,nset,ncgset,cpubase,cgbase,cgobj,aasmbase,aasmtai,aasmdata,globtype;
+
+    type
+       taarch64casenode = class(tcgcasenode)
+         protected
+           procedure optimizevalues(var max_linear_list: aint; var max_dist: aword);override;
+           function  has_jumptable: boolean;override;
+           procedure genjumptable(hp: pcaselabel ;min_, max_: aint);override;
+       end;
+
+
+implementation
+
+    uses
+      systems,
+      verbose,globals,constexp,
+      symconst,symdef,defutil,
+      paramgr,
+      cpuinfo,
+      pass_2,cgcpu,
+      ncon,
+      tgobj,ncgutil,regvars,rgobj,aasmcpu,
+      procinfo,
+      cgutils;
+
+{*****************************************************************************
+                            TCGCASENODE
+*****************************************************************************}
+
+
+    procedure taarch64casenode.optimizevalues(var max_linear_list: aint; var max_dist: aword);
+      begin
+        max_linear_list:=10;
+      end;
+    
+
+    function taarch64casenode.has_jumptable: boolean;
+      begin
+        has_jumptable:=true;
+      end;
+
+
+    procedure taarch64casenode.genjumptable(hp: pcaselabel; min_, max_: aint);
+      var
+        last: TConstExprInt;
+        tablelabel: TAsmLabel;
+        basereg,indexreg,jumpreg: TRegister;
+        href: TReference;
+        opcgsize: tcgsize;
+        sectype: TAsmSectiontype;
+        jtitemconsttype: taiconst_type;
+
+      procedure genitem(list:TAsmList;t : pcaselabel);
+        var
+          i : aint;
+        begin
+          if assigned(t^.less) then
+            genitem(list,t^.less);
+          { fill possible hole }
+          i:=last.svalue+1;
+          while i<=t^._low.svalue-1 do
+            begin
+              list.concat(Tai_const.Create_rel_sym(jtitemconsttype,tablelabel,elselabel));
+              inc(i);
+            end;
+          i:=t^._low.svalue;
+          while i<=t^._high.svalue do
+            begin
+              list.concat(Tai_const.Create_rel_sym(jtitemconsttype,tablelabel,blocklabel(t^.blockid)));
+              inc(i);
+            end;
+          last:=t^._high;
+          if assigned(t^.greater) then
+            genitem(list,t^.greater);
+        end;
+
+      begin
+        if not(target_info.system in systems_darwin) then
+          jtitemconsttype:=aitconst_32bit
+        else
+          { see https://gmplib.org/list-archives/gmp-bugs/2012-December/002836.html }
+          jtitemconsttype:=aitconst_darwin_dwarf_delta32;
+
+        last:=min_;
+        opcgsize:=def_cgsize(opsize);
+        { a <= x <= b <-> unsigned(x-a) <= (b-a) }
+        cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SUB,opcgsize,aint(min_),hregister);
+        if not(jumptable_no_range) then
+          begin
+             { case expr greater than max_ => goto elselabel }
+             cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opcgsize,OC_A,aint(max_)-aint(min_),hregister,elselabel);
+             min_:=0;
+          end;
+        { local label in order to avoid using GOT }
+        current_asmdata.getlabel(tablelabel,alt_data);
+        indexreg:=cg.makeregsize(current_asmdata.CurrAsmList,hregister,OS_ADDR);
+        cg.a_load_reg_reg(current_asmdata.CurrAsmList,opcgsize,OS_ADDR,hregister,indexreg);
+        { load table address }
+        reference_reset_symbol(href,tablelabel,0,4);
+        basereg:=cg.getaddressregister(current_asmdata.CurrAsmList);
+        cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,basereg);
+        { load table slot, 32-bit sign extended }
+        reference_reset_base(href,basereg,0,4);
+        href.index:=indexreg;
+        href.shiftmode:=SM_LSL;
+        href.shiftimm:=2;
+        jumpreg:=cg.getaddressregister(current_asmdata.CurrAsmList);
+        cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_S32,OS_ADDR,href,jumpreg);
+        { add table address }
+        cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_ADD,OS_ADDR,basereg,jumpreg);
+        { and finally jump }
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_BR,jumpreg));
+        { generate jump table }
+        if not(target_info.system in systems_darwin) then
+          sectype:=sec_rodata
+        else
+          begin
+            { on Mac OS X, dead code stripping ("smart linking") happens based on
+              global symbols: every global/static symbol (symbols that do not
+              start with "L") marks the start of a new "subsection" that is
+              discarded by the linker if there are no references to this symbol.
+              This means that if you put the jump table in the rodata section, it
+              will become part of the block of data associated with the previous
+              non-L-label in the rodata section and stay or be thrown away
+              depending on whether that block of data is referenced. Therefore,
+              jump tables must be added in the code section and since aktlocaldata
+              is inserted right after the routine, it will become part of the
+              same subsection that contains the routine's code }
+            sectype:=sec_code;
+          end;
+        new_section(current_procinfo.aktlocaldata,sectype,current_procinfo.procdef.mangledname,4);
+        if target_info.system in systems_darwin then
+          begin
+            { additionally, these tables are now marked via ".data_region jt32"
+              and ".end_data_region" }
+            current_procinfo.aktlocaldata.concat(tai_directive.Create(asd_data_region,'jt32'));
+          end;
+        current_procinfo.aktlocaldata.concat(Tai_label.Create(tablelabel));
+        genitem(current_procinfo.aktlocaldata,hp);
+        if target_info.system in systems_darwin then
+          current_procinfo.aktlocaldata.concat(tai_directive.Create(asd_end_data_region,''));
+      end;
+
+
+begin
+   ccasenode:=taarch64casenode;
+end.

+ 5 - 0
compiler/aarch64/ra64con.inc

@@ -64,6 +64,8 @@ NR_W30 = tregister($0104001E);
 NR_X30 = tregister($0105001E);
 NR_X30 = tregister($0105001E);
 NR_WZR = tregister($0104001F);
 NR_WZR = tregister($0104001F);
 NR_XZR = tregister($0105001F);
 NR_XZR = tregister($0105001F);
+NR_WSP = tregister($01040020);
+NR_SP = tregister($01050020);
 NR_B0 = tregister($04010000);
 NR_B0 = tregister($04010000);
 NR_H0 = tregister($04030000);
 NR_H0 = tregister($04030000);
 NR_S0 = tregister($04090000);
 NR_S0 = tregister($04090000);
@@ -225,3 +227,6 @@ NR_S31 = tregister($0409001F);
 NR_D31 = tregister($040a001F);
 NR_D31 = tregister($040a001F);
 NR_Q31 = tregister($0405001F);
 NR_Q31 = tregister($0405001F);
 NR_NZCV = tregister($05000000);
 NR_NZCV = tregister($05000000);
+NR_FPCR = tregister($05000001);
+NR_FPSR = tregister($05000002);
+NR_TPIDR_EL0 = tregister($05000003);

+ 162 - 157
compiler/aarch64/ra64dwa.inc

@@ -64,164 +64,169 @@
 30,
 30,
 31,
 31,
 31,
 31,
+31,
+31,
+64,
+64,
+64,
+64,
+64,
+65,
+65,
+65,
+65,
+65,
+66,
+66,
+66,
+66,
+66,
+67,
+67,
+67,
+67,
+67,
+68,
+68,
+68,
+68,
+68,
+69,
+69,
+69,
+69,
+69,
+70,
+70,
+70,
+70,
+70,
+71,
+71,
+71,
+71,
+71,
+72,
+72,
+72,
+72,
+72,
+73,
+73,
+73,
+73,
+73,
+74,
+74,
+74,
+74,
+74,
+75,
+75,
+75,
+75,
+75,
+76,
+76,
+76,
+76,
+76,
+77,
+77,
+77,
+77,
+77,
+78,
+78,
+78,
+78,
+78,
+79,
+79,
+79,
+79,
+79,
+80,
+80,
+80,
+80,
+80,
+81,
+81,
+81,
+81,
+81,
+82,
+82,
+82,
+82,
+82,
+83,
+83,
+83,
+83,
+83,
+84,
+84,
+84,
+84,
+84,
+85,
+85,
+85,
+85,
+85,
+86,
+86,
+86,
+86,
+86,
+87,
+87,
+87,
+87,
+87,
+88,
+88,
+88,
+88,
+88,
+89,
+89,
+89,
+89,
+89,
+90,
+90,
+90,
+90,
+90,
+91,
+91,
+91,
+91,
+91,
+92,
+92,
+92,
+92,
+92,
+93,
+93,
+93,
+93,
+93,
+94,
+94,
+94,
+94,
+94,
+95,
+95,
+95,
+95,
+95,
 0,
 0,
 0,
 0,
 0,
 0,
-0,
-0,
-1,
-1,
-1,
-1,
-1,
-2,
-2,
-2,
-2,
-2,
-3,
-3,
-3,
-3,
-3,
-4,
-4,
-4,
-4,
-4,
-5,
-5,
-5,
-5,
-5,
-6,
-6,
-6,
-6,
-6,
-7,
-7,
-7,
-7,
-7,
-8,
-8,
-8,
-8,
-8,
-9,
-9,
-9,
-9,
-9,
-10,
-10,
-10,
-10,
-10,
-11,
-11,
-11,
-11,
-11,
-12,
-12,
-12,
-12,
-12,
-13,
-13,
-13,
-13,
-13,
-14,
-14,
-14,
-14,
-14,
-15,
-15,
-15,
-15,
-15,
-16,
-16,
-16,
-16,
-16,
-17,
-17,
-17,
-17,
-17,
-18,
-18,
-18,
-18,
-18,
-19,
-19,
-19,
-19,
-19,
-20,
-20,
-20,
-20,
-20,
-21,
-21,
-21,
-21,
-21,
-22,
-22,
-22,
-22,
-22,
-23,
-23,
-23,
-23,
-23,
-24,
-24,
-24,
-24,
-24,
-25,
-25,
-25,
-25,
-25,
-26,
-26,
-26,
-26,
-26,
-27,
-27,
-27,
-27,
-27,
-28,
-28,
-28,
-28,
-28,
-29,
-29,
-29,
-29,
-29,
-30,
-30,
-30,
-30,
-30,
-31,
-31,
-31,
-31,
-31,
 0
 0

+ 1 - 1
compiler/aarch64/ra64nor.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from a64reg.dat }
 { don't edit, this file is generated from a64reg.dat }
-226
+231

+ 6 - 1
compiler/aarch64/ra64num.inc

@@ -64,6 +64,8 @@ tregister($0104001E),
 tregister($0105001E),
 tregister($0105001E),
 tregister($0104001F),
 tregister($0104001F),
 tregister($0105001F),
 tregister($0105001F),
+tregister($01040020),
+tregister($01050020),
 tregister($04010000),
 tregister($04010000),
 tregister($04030000),
 tregister($04030000),
 tregister($04090000),
 tregister($04090000),
@@ -224,4 +226,7 @@ tregister($0403001F),
 tregister($0409001F),
 tregister($0409001F),
 tregister($040a001F),
 tregister($040a001F),
 tregister($0405001F),
 tregister($0405001F),
-tregister($05000000)
+tregister($05000000),
+tregister($05000001),
+tregister($05000002),
+tregister($05000003)

+ 101 - 96
compiler/aarch64/ra64rni.inc

@@ -32,6 +32,7 @@
 59,
 59,
 61,
 61,
 63,
 63,
+65,
 2,
 2,
 4,
 4,
 6,
 6,
@@ -64,102 +65,7 @@
 60,
 60,
 62,
 62,
 64,
 64,
-65,
-70,
-75,
-80,
-85,
-90,
-95,
-100,
-105,
-110,
-115,
-120,
-125,
-130,
-135,
-140,
-145,
-150,
-155,
-160,
-165,
-170,
-175,
-180,
-185,
-190,
-195,
-200,
-205,
-210,
-215,
-220,
 66,
 66,
-71,
-76,
-81,
-86,
-91,
-96,
-101,
-106,
-111,
-116,
-121,
-126,
-131,
-136,
-141,
-146,
-151,
-156,
-161,
-166,
-171,
-176,
-181,
-186,
-191,
-196,
-201,
-206,
-211,
-216,
-221,
-69,
-74,
-79,
-84,
-89,
-94,
-99,
-104,
-109,
-114,
-119,
-124,
-129,
-134,
-139,
-144,
-149,
-154,
-159,
-164,
-169,
-174,
-179,
-184,
-189,
-194,
-199,
-204,
-209,
-214,
-219,
-224,
 67,
 67,
 72,
 72,
 77,
 77,
@@ -224,4 +130,103 @@
 213,
 213,
 218,
 218,
 223,
 223,
-225
+71,
+76,
+81,
+86,
+91,
+96,
+101,
+106,
+111,
+116,
+121,
+126,
+131,
+136,
+141,
+146,
+151,
+156,
+161,
+166,
+171,
+176,
+181,
+186,
+191,
+196,
+201,
+206,
+211,
+216,
+221,
+226,
+69,
+74,
+79,
+84,
+89,
+94,
+99,
+104,
+109,
+114,
+119,
+124,
+129,
+134,
+139,
+144,
+149,
+154,
+159,
+164,
+169,
+174,
+179,
+184,
+189,
+194,
+199,
+204,
+209,
+214,
+219,
+224,
+70,
+75,
+80,
+85,
+90,
+95,
+100,
+105,
+110,
+115,
+120,
+125,
+130,
+135,
+140,
+145,
+150,
+155,
+160,
+165,
+170,
+175,
+180,
+185,
+190,
+195,
+200,
+205,
+210,
+215,
+220,
+225,
+227,
+228,
+229,
+230

+ 48 - 43
compiler/aarch64/ra64sri.inc

@@ -1,8 +1,39 @@
 { don't edit, this file is generated from a64reg.dat }
 { don't edit, this file is generated from a64reg.dat }
 0,
 0,
-65,
+67,
+72,
+117,
+122,
+127,
+132,
+137,
+142,
+147,
+152,
+157,
+162,
+77,
+167,
+172,
+177,
+182,
+187,
+192,
+197,
+202,
+207,
+212,
+82,
+217,
+222,
+87,
+92,
+97,
+102,
+107,
+112,
 70,
 70,
-115,
+75,
 120,
 120,
 125,
 125,
 130,
 130,
@@ -12,8 +43,8 @@
 150,
 150,
 155,
 155,
 160,
 160,
-75,
 165,
 165,
+80,
 170,
 170,
 175,
 175,
 180,
 180,
@@ -23,15 +54,18 @@
 200,
 200,
 205,
 205,
 210,
 210,
-80,
 215,
 215,
-220,
 85,
 85,
+220,
+225,
 90,
 90,
 95,
 95,
 100,
 100,
 105,
 105,
 110,
 110,
+115,
+228,
+229,
 68,
 68,
 73,
 73,
 118,
 118,
@@ -64,9 +98,9 @@
 103,
 103,
 108,
 108,
 113,
 113,
-66,
+227,
 71,
 71,
-116,
+76,
 121,
 121,
 126,
 126,
 131,
 131,
@@ -76,8 +110,8 @@
 151,
 151,
 156,
 156,
 161,
 161,
-76,
 166,
 166,
+81,
 171,
 171,
 176,
 176,
 181,
 181,
@@ -87,16 +121,16 @@
 201,
 201,
 206,
 206,
 211,
 211,
-81,
 216,
 216,
-221,
 86,
 86,
+221,
+226,
 91,
 91,
 96,
 96,
 101,
 101,
 106,
 106,
 111,
 111,
-225,
+116,
 69,
 69,
 74,
 74,
 119,
 119,
@@ -129,38 +163,8 @@
 104,
 104,
 109,
 109,
 114,
 114,
-67,
-72,
-117,
-122,
-127,
-132,
-137,
-142,
-147,
-152,
-157,
-162,
-77,
-167,
-172,
-177,
-182,
-187,
-192,
-197,
-202,
-207,
-212,
-82,
-217,
-222,
-87,
-92,
-97,
-102,
-107,
-112,
+66,
+230,
 1,
 1,
 3,
 3,
 21,
 21,
@@ -192,6 +196,7 @@
 15,
 15,
 17,
 17,
 19,
 19,
+65,
 63,
 63,
 2,
 2,
 4,
 4,

+ 162 - 157
compiler/aarch64/ra64sta.inc

@@ -64,164 +64,169 @@
 30,
 30,
 31,
 31,
 31,
 31,
+31,
+31,
+64,
+64,
+64,
+64,
+64,
+65,
+65,
+65,
+65,
+65,
+66,
+66,
+66,
+66,
+66,
+67,
+67,
+67,
+67,
+67,
+68,
+68,
+68,
+68,
+68,
+69,
+69,
+69,
+69,
+69,
+70,
+70,
+70,
+70,
+70,
+71,
+71,
+71,
+71,
+71,
+72,
+72,
+72,
+72,
+72,
+73,
+73,
+73,
+73,
+73,
+74,
+74,
+74,
+74,
+74,
+75,
+75,
+75,
+75,
+75,
+76,
+76,
+76,
+76,
+76,
+77,
+77,
+77,
+77,
+77,
+78,
+78,
+78,
+78,
+78,
+79,
+79,
+79,
+79,
+79,
+80,
+80,
+80,
+80,
+80,
+81,
+81,
+81,
+81,
+81,
+82,
+82,
+82,
+82,
+82,
+83,
+83,
+83,
+83,
+83,
+84,
+84,
+84,
+84,
+84,
+85,
+85,
+85,
+85,
+85,
+86,
+86,
+86,
+86,
+86,
+87,
+87,
+87,
+87,
+87,
+88,
+88,
+88,
+88,
+88,
+89,
+89,
+89,
+89,
+89,
+90,
+90,
+90,
+90,
+90,
+91,
+91,
+91,
+91,
+91,
+92,
+92,
+92,
+92,
+92,
+93,
+93,
+93,
+93,
+93,
+94,
+94,
+94,
+94,
+94,
+95,
+95,
+95,
+95,
+95,
 0,
 0,
 0,
 0,
 0,
 0,
-0,
-0,
-1,
-1,
-1,
-1,
-1,
-2,
-2,
-2,
-2,
-2,
-3,
-3,
-3,
-3,
-3,
-4,
-4,
-4,
-4,
-4,
-5,
-5,
-5,
-5,
-5,
-6,
-6,
-6,
-6,
-6,
-7,
-7,
-7,
-7,
-7,
-8,
-8,
-8,
-8,
-8,
-9,
-9,
-9,
-9,
-9,
-10,
-10,
-10,
-10,
-10,
-11,
-11,
-11,
-11,
-11,
-12,
-12,
-12,
-12,
-12,
-13,
-13,
-13,
-13,
-13,
-14,
-14,
-14,
-14,
-14,
-15,
-15,
-15,
-15,
-15,
-16,
-16,
-16,
-16,
-16,
-17,
-17,
-17,
-17,
-17,
-18,
-18,
-18,
-18,
-18,
-19,
-19,
-19,
-19,
-19,
-20,
-20,
-20,
-20,
-20,
-21,
-21,
-21,
-21,
-21,
-22,
-22,
-22,
-22,
-22,
-23,
-23,
-23,
-23,
-23,
-24,
-24,
-24,
-24,
-24,
-25,
-25,
-25,
-25,
-25,
-26,
-26,
-26,
-26,
-26,
-27,
-27,
-27,
-27,
-27,
-28,
-28,
-28,
-28,
-28,
-29,
-29,
-29,
-29,
-29,
-30,
-30,
-30,
-30,
-30,
-31,
-31,
-31,
-31,
-31,
 0
 0

+ 6 - 1
compiler/aarch64/ra64std.inc

@@ -64,6 +64,8 @@
 'x30',
 'x30',
 'wzr',
 'wzr',
 'xzr',
 'xzr',
+'wsp',
+'sp',
 'b0',
 'b0',
 'h0',
 'h0',
 's0',
 's0',
@@ -224,4 +226,7 @@
 's31',
 's31',
 'd31',
 'd31',
 'q31',
 'q31',
-'nzcv'
+'nzcv',
+'fpcr',
+'fpsr',
+'tpidr_el0'

+ 5 - 0
compiler/aarch64/ra64sup.inc

@@ -64,6 +64,8 @@ RS_W30 = $1E;
 RS_X30 = $1E;
 RS_X30 = $1E;
 RS_WZR = $1F;
 RS_WZR = $1F;
 RS_XZR = $1F;
 RS_XZR = $1F;
+RS_WSP = $20;
+RS_SP = $20;
 RS_B0 = $00;
 RS_B0 = $00;
 RS_H0 = $00;
 RS_H0 = $00;
 RS_S0 = $00;
 RS_S0 = $00;
@@ -225,3 +227,6 @@ RS_S31 = $1F;
 RS_D31 = $1F;
 RS_D31 = $1F;
 RS_Q31 = $1F;
 RS_Q31 = $1F;
 RS_NZCV = $00;
 RS_NZCV = $00;
+RS_FPCR = $01;
+RS_FPSR = $02;
+RS_TPIDR_EL0 = $03;

+ 88 - 0
compiler/aarch64/racpu.pas

@@ -0,0 +1,88 @@
+{
+    Copyright (c) 1998-2003 by Carl Eric Codere and Peter Vreman
+    Copyright (c) 2014 by Jonas Maebe
+
+    Handles the common AArch64 assembler reader routines
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit racpu;
+
+{$i fpcdefs.inc}
+
+  interface
+
+    uses
+      cgbase,
+      cpubase,
+      aasmtai,aasmdata,
+      rautils;
+
+    type
+      TAArch64Operand=class(TOperand)
+      end;
+
+      TAArch64Instruction=class(TInstruction)
+        oppostfix : toppostfix;
+        function ConcatInstruction(p:TAsmList) : tai;override;
+        function Is64bit: boolean;
+        function cgsize: tcgsize;
+      end;
+
+  implementation
+
+    uses
+      verbose,
+      aasmcpu;
+
+    function TAArch64Instruction.ConcatInstruction(p:TAsmList) : tai;
+      begin
+        result:=inherited ConcatInstruction(p);
+        taicpu(result).oppostfix:=oppostfix;
+      end;
+
+
+    function TAArch64Instruction.Is64bit: boolean;
+      begin
+        result:=
+          (operands[1].opr.typ=OPR_REGISTER) and
+          (getsubreg(operands[1].opr.reg)=R_SUBQ);
+      end;
+
+    function TAArch64Instruction.cgsize: tcgsize;
+      begin
+        if ops<1 then
+          internalerror(2014122001);
+        if operands[1].opr.typ<>OPR_REGISTER then
+          internalerror(2014122002);
+        result:=reg_cgsize(operands[1].opr.reg);
+        { a 32 bit integer register could actually be 16 or 8 bit }
+        if result=OS_32 then
+          case oppostfix of
+            PF_B:
+              result:=OS_8;
+            PF_SB:
+              result:=OS_S8;
+            PF_H:
+              result:=OS_16;
+            PF_SH:
+              result:=OS_S16;
+          end;
+      end;
+
+
+end.

+ 1053 - 0
compiler/aarch64/racpugas.pas

@@ -0,0 +1,1053 @@
+{
+    Copyright (c) 1998-2002 by Carl Eric Codere and Peter Vreman
+    Copyright (c) 2014 by Jonas Maebe
+
+    Does the parsing for the AArch64 GNU AS styled inline assembler.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+Unit racpugas;
+
+{$i fpcdefs.inc}
+
+  Interface
+
+    uses
+      raatt,racpu,
+      cpubase;
+
+    type
+      taarch64attreader = class(tattreader)
+        actoppostfix : TOpPostfix;
+        function is_asmopcode(const s: string):boolean;override;
+        function is_register(const s:string):boolean;override;
+        procedure handleopcode;override;
+        procedure BuildReference(oper: taarch64operand; is64bit: boolean);
+        procedure BuildOperand(oper: taarch64operand; is64bit: boolean);
+        function TryBuildShifterOp(instr: taarch64instruction; opnr: longint) : boolean;
+        procedure BuildOpCode(instr: taarch64instruction);
+        procedure ReadSym(oper: taarch64operand; is64bit: boolean);
+        procedure ConvertCalljmp(instr: taarch64instruction);
+        function ToConditionCode(const hs: string; is_operand: boolean): tasmcond;
+      end;
+
+
+  Implementation
+
+    uses
+      { helpers }
+      cutils,
+      { global }
+      globtype,verbose,
+      systems,aasmbase,aasmtai,aasmdata,aasmcpu,
+      { symtable }
+      symconst,symsym,
+      procinfo,
+      rabase,rautils,
+      cgbase,cgutils;
+
+
+    function taarch64attreader.is_register(const s:string):boolean;
+      type
+        treg2str = record
+          name : string[3];
+          reg : tregister;
+        end;
+
+      const
+        extraregs : array[0..3] of treg2str = (
+          (name: 'FP' ; reg: NR_FP),
+          (name: 'LR' ; reg: NR_LR),
+          (name: 'IP0'; reg: NR_IP0),
+          (name: 'IP1'; reg: NR_IP1));
+
+      var
+        i : longint;
+
+      begin
+        result:=inherited is_register(s);
+        { reg found?
+          possible aliases are always 2 or 3 chars
+        }
+        if result or not(length(s) in [2,3]) then
+          exit;
+        for i:=low(extraregs) to high(extraregs) do
+          begin
+            if s=extraregs[i].name then
+              begin
+                actasmregister:=extraregs[i].reg;
+                result:=true;
+                actasmtoken:=AS_REGISTER;
+                exit;
+              end;
+          end;
+      end;
+
+
+    procedure taarch64attreader.ReadSym(oper: taarch64operand; is64bit: boolean);
+      var
+         tempstr, mangledname : string;
+         typesize,l,k: aint;
+      begin
+        tempstr:=actasmpattern;
+        Consume(AS_ID);
+        { typecasting? }
+        if (actasmtoken=AS_LPAREN) and
+           SearchType(tempstr,typesize) then
+          begin
+            oper.hastype:=true;
+            Consume(AS_LPAREN);
+            BuildOperand(oper,is64bit);
+            Consume(AS_RPAREN);
+            if oper.opr.typ in [OPR_REFERENCE,OPR_LOCAL] then
+              oper.SetSize(typesize,true);
+          end
+        else
+          if not oper.SetupVar(tempstr,false) then
+            Message1(sym_e_unknown_id,tempstr);
+        { record.field ? }
+        if actasmtoken=AS_DOT then
+          begin
+            BuildRecordOffsetSize(tempstr,l,k,mangledname,false);
+            if (mangledname<>'') then
+              Message(asmr_e_invalid_reference_syntax);
+            inc(oper.opr.ref.offset,l);
+          end;
+      end;
+
+
+    Procedure taarch64attreader.BuildReference(oper: taarch64operand; is64bit: boolean);
+
+      procedure do_error;
+        begin
+          Message(asmr_e_invalid_reference_syntax);
+          RecoverConsume(false);
+        end;
+
+
+      procedure test_end(require_rbracket : boolean);
+        begin
+          if require_rbracket then begin
+            if not(actasmtoken=AS_RBRACKET) then
+              begin
+                do_error;
+                exit;
+              end
+            else
+              Consume(AS_RBRACKET);
+            if (actasmtoken=AS_NOT) then
+              begin
+                oper.opr.ref.addressmode:=AM_PREINDEXED;
+                Consume(AS_NOT);
+              end;
+          end;
+          if not(actasmtoken in [AS_SEPARATOR,AS_end]) then
+            do_error
+          else
+            begin
+{$IFDEF debugasmreader}
+              writeln('TEST_end_FINAL_OK. Created the following ref:');
+              writeln('oper.opr.ref.shiftimm=',oper.opr.ref.shiftimm);
+              writeln('oper.opr.ref.shiftmode=',ord(oper.opr.ref.shiftmode));
+              writeln('oper.opr.ref.index=',ord(oper.opr.ref.index));
+              writeln('oper.opr.ref.base=',ord(oper.opr.ref.base));
+              writeln('oper.opr.ref.signindex=',ord(oper.opr.ref.signindex));
+              writeln('oper.opr.ref.addressmode=',ord(oper.opr.ref.addressmode));
+              writeln;
+{$endIF debugasmreader}
+            end;
+        end;
+
+
+      function is_shifter_ref_operation(var a : tshiftmode) : boolean;
+        begin
+          a:=SM_NONE;
+          if (actasmpattern='LSL') then
+            a:=SM_LSL
+          else if (actasmpattern='UXTW') then
+            a:=SM_UXTW
+          else if (actasmpattern='SXTW') then
+            a:=SM_SXTW
+          else if (actasmpattern='SXTX') then
+            a:=SM_SXTX;
+          is_shifter_ref_operation:=not(a=SM_NONE);
+        end;
+
+
+      procedure read_index_shift(require_rbracket : boolean);
+        var
+          shift: aint;
+        begin
+          case actasmtoken of
+            AS_COMMA :
+              begin
+                Consume(AS_COMMA);
+                if not(actasmtoken=AS_ID) then
+                  do_error;
+                if is_shifter_ref_operation(oper.opr.ref.shiftmode) then
+                  begin
+                    Consume(actasmtoken);
+                    if actasmtoken=AS_HASH then
+                      begin
+                        Consume(AS_HASH);
+                        shift:=BuildConstExpression(false,true);
+                        if not(shift in [0,2+ord(is64bit)]) then
+                          do_error;
+                        oper.opr.ref.shiftimm:=shift;
+                        test_end(require_rbracket);
+                      end;
+                   end
+                 else
+                   begin
+                     do_error;
+                     exit;
+                   end;
+              end;
+            AS_RBRACKET :
+              if require_rbracket then
+                test_end(require_rbracket)
+              else
+                begin
+                  do_error;
+                  exit;
+                end;
+            AS_SEPARATOR,AS_END :
+              if not require_rbracket then
+                test_end(false)
+               else
+                 do_error;
+            else
+              begin
+                do_error;
+                exit;
+              end;
+          end;
+        end;
+
+
+      procedure read_index(require_rbracket : boolean);
+        var
+          recname : string;
+          o_int,s_int : aint;
+        begin
+          case actasmtoken of
+            AS_REGISTER :
+              begin
+                if getsupreg(actasmregister)=RS_XZR then
+                  Message1(asmr_e_invalid_ref_register,actasmpattern);
+                oper.opr.ref.index:=actasmregister;
+                Consume(AS_REGISTER);
+                read_index_shift(require_rbracket);
+                exit;
+              end;
+            AS_HASH : // constant
+              begin
+                Consume(AS_HASH);
+(*
+                if actasmtoken=AS_COLON then
+                  begin
+                    consume(AS_COLON);
+                    { GNU-style lower 12 bits of address of non-GOT-based
+                      access }
+                    if (actasmpattern='LO12') then
+                      begin
+                        consume(actasmtoken);
+                        consume(AS_COLON);
+                        if not oper.SetupVar(actasmpattern,false) then
+                          begin
+                            do_error;
+                            exit
+                          end;
+                        consume(AS_ID);
+                        oper.opr.ref.refaddr:=addr_??? (not gotpageoffset);
+                      end
+                    else
+                      begin
+                        do_error;
+                        exit
+                      end;
+                  end
+                else
+*)
+                  begin
+                    o_int:=BuildConstExpression(false,true);
+                    inc(oper.opr.ref.offset,o_int);
+                  end;
+                test_end(require_rbracket);
+                exit;
+              end;
+            AS_ID :
+              begin
+                recname:=actasmpattern;
+                Consume(AS_ID);
+                { Apple-style got page offset }
+                if actasmtoken=AS_AT then
+                  begin
+                    if not oper.SetupVar(recname,false) then
+                      begin
+                        do_error;
+                        exit
+                      end;
+                    consume(AS_AT);
+                    if actasmpattern='GOTPAGEOFF' then
+                      begin
+                        consume(actasmtoken);
+                        oper.opr.ref.refaddr:=addr_gotpageoffset;
+                      end
+                    else if actasmpattern='PAGEOFF' then
+                      begin
+                        consume(actasmtoken);
+                        oper.opr.ref.refaddr:=addr_pageoffset;
+                      end
+                    else
+                      begin
+                        do_error;
+                        exit
+                      end;
+                  end
+                else
+                  begin
+                    BuildRecordOffsetSize(recname,o_int,s_int,recname,false);
+                    inc(oper.opr.ref.offset,o_int);
+                  end;
+                test_end(require_rbracket);
+                exit;
+              end;
+            AS_AT:
+              begin
+                do_error;
+                exit;
+              end;
+            AS_RBRACKET :
+              begin
+                if require_rbracket then
+                  begin
+                    test_end(require_rbracket);
+                    exit;
+                  end
+                else
+                  begin
+                    do_error; // unexpected rbracket
+                    exit;
+                  end;
+              end;
+            AS_SEPARATOR,AS_end :
+              begin
+                if not require_rbracket then
+                  begin
+                    test_end(false);
+                    exit;
+                  end
+                else
+                  begin
+                    do_error;
+                    exit;
+                  end;
+              end;
+            else
+              begin
+                // unexpected token
+                do_error;
+                exit;
+              end;
+          end; // case
+        end;
+
+
+      procedure try_prepostindexed;
+        begin
+          Consume(AS_RBRACKET);
+          case actasmtoken of
+            AS_COMMA :
+              begin // post-indexed
+                Consume(AS_COMMA);
+                oper.opr.ref.addressmode:=AM_POSTINDEXED;
+                read_index(false);
+                exit;
+              end;
+            AS_NOT :
+              begin   // pre-indexed
+                Consume(AS_NOT);
+                oper.opr.ref.addressmode:=AM_PREINDEXED;
+                test_end(false);
+                exit;
+              end;
+            else
+              begin
+                test_end(false);
+                exit;
+              end;
+          end; // case
+        end;
+
+      begin
+        Consume(AS_LBRACKET);
+        oper.opr.ref.addressmode:=AM_OFFSET; // assume "neither PRE nor POST inc"
+        if actasmtoken=AS_REGISTER then
+          begin
+            if getsupreg(actasmregister)=RS_XZR then
+              Message1(asmr_e_invalid_ref_register,actasmpattern);
+            oper.opr.ref.base:=actasmregister;
+            Consume(AS_REGISTER);
+            case actasmtoken of
+              AS_RBRACKET :
+                begin
+                  try_prepostindexed;
+                  exit;
+                end;
+              AS_COMMA :
+                begin
+                  Consume(AS_COMMA);
+                  read_index(true);
+                  exit;
+                end;
+              else
+                begin
+                  Message(asmr_e_invalid_reference_syntax);
+                  RecoverConsume(false);
+                end;
+            end;
+          end
+        else
+          Begin
+            case actasmtoken of
+              AS_ID :
+                begin
+                  { TODO: local variables and parameters }
+                  Message(asmr_e_invalid_reference_syntax);
+                  RecoverConsume(false);
+                  exit;
+                end;
+              else
+                begin // elsecase
+                  Message(asmr_e_invalid_reference_syntax);
+                  RecoverConsume(false);
+                  exit;
+                end;
+            end;
+          end;
+      end;
+
+
+    function taarch64attreader.TryBuildShifterOp(instr: taarch64instruction; opnr: longint): boolean;
+
+      procedure handlepara(sm : tshiftmode);
+        begin
+          consume(AS_ID);
+          fillchar(instr.operands[opnr].opr,sizeof(instr.operands[opnr].opr),0);
+          instr.operands[opnr].opr.typ:=OPR_SHIFTEROP;
+          instr.operands[opnr].opr.shifterop.shiftmode:=sm;
+          if (sm=SM_LSL) or
+             (actasmtoken=AS_HASH) then
+            begin
+              consume(AS_HASH);
+              instr.operands[opnr].opr.shifterop.shiftimm:=BuildConstExpression(false,false);
+            end;
+        end;
+
+      const
+        shiftmode2str: array[SM_LSL..SM_SXTX] of string[4] =
+          ('LSL','LSR','ASR',
+           'UXTB','UXTH','UXTW','UXTX',
+           'SXTB','SXTH','SXTW','SXTX');
+      var
+        sm: tshiftmode;
+        i: longint;
+        usessp,
+        useszr: boolean;
+      begin
+        result:=false;
+        if (actasmtoken=AS_ID) then
+          begin
+            for sm:=low(shiftmode2str) to high(shiftmode2str) do
+              if actasmpattern=shiftmode2str[sm] then
+                begin
+                  handlepara(sm);
+                  if instr.operands[1].opr.typ=OPR_REGISTER then
+                    begin
+                      { the possible shifter ops depend on whether this
+                        instruction uses sp and/or zr }
+                      usessp:=false;
+                      useszr:=false;
+                      for i:=low(instr.operands) to pred(opnr) do
+                        begin
+                          if (instr.operands[1].opr.typ=OPR_REGISTER) then
+                            case getsupreg(instr.operands[1].opr.reg) of
+                              RS_XZR:
+                                useszr:=true;
+                              RS_SP:
+                                usessp:=true;
+                            end;
+                        end;
+                      result:=valid_shifter_operand(instr.opcode,useszr,usessp,instr.Is64bit,sm,instr.operands[opnr].opr.shifterop.shiftimm);
+                    end
+                end;
+          end;
+      end;
+
+
+    function taarch64attreader.ToConditionCode(const hs: string; is_operand: boolean): tasmcond;
+      begin
+        case actopcode of
+          A_CSEL,A_CSINC,A_CSINV,A_CSNEG,A_CSET,A_CSETM,
+          A_CINC,A_CINV,A_CNEG,A_CCMN,A_CCMP,
+          A_B:
+            begin
+              { search for condition, conditions are always 2 chars }
+              if (is_operand<>(actopcode=A_B)) and
+                 (length(hs)>1) then
+                begin
+                  { workaround for DFA bug }
+                  result:=low(tasmcond);
+                  for result:=low(tasmcond) to high(tasmcond) do
+                    begin
+                      if hs=uppercond2str[result] then
+                        exit;
+                    end;
+                end;
+            end;
+        end;
+        result:=C_None;;
+      end;
+
+
+    Procedure taarch64attreader.BuildOperand(oper: taarch64operand; is64bit: boolean);
+      var
+        expr: string;
+        typesize, l: aint;
+
+        procedure MaybeAddGotAddrMode;
+          begin
+            if actasmtoken=AS_AT then
+              begin
+                consume(AS_AT);
+                if actasmpattern='GOTPAGE' then
+                  oper.opr.ref.refaddr:=addr_gotpage
+                else if actasmpattern='GOTPAGEOFF' then
+                  oper.opr.ref.refaddr:=addr_gotpageoffset
+                else if actasmpattern='PAGE' then
+                  oper.opr.ref.refaddr:=addr_page
+                else if actasmpattern='PAGEOFF' then
+                  oper.opr.ref.refaddr:=addr_pageoffset
+                else
+                  Message(asmr_e_expr_illegal);
+                consume(actasmtoken);
+              end
+            else
+              oper.opr.ref.refaddr:=addr_pic;
+          end;
+
+        procedure AddLabelOperand(hl:tasmlabel);
+          begin
+            if not(actasmtoken in [AS_PLUS,AS_MINUS,AS_LPAREN]) and
+               is_calljmp(actopcode) then
+             begin
+               oper.opr.typ:=OPR_SYMBOL;
+               oper.opr.symbol:=hl;
+             end
+            else if (actopcode=A_ADR) or
+               (actopcode=A_ADRP) then
+              begin
+                oper.InitRef;
+                MaybeAddGotAddrMode;
+                oper.opr.ref.symbol:=hl;
+                if (actasmtoken in [AS_PLUS, AS_MINUS]) then
+                  begin
+                    l:=BuildConstExpression(true,false);
+                    oper.opr.ref.offset:=l;
+                  end;
+              end;
+          end;
+
+
+        procedure MaybeRecordOffset;
+          var
+            mangledname: string;
+            hasdot  : boolean;
+            l,
+            toffset,
+            tsize   : aint;
+          begin
+            if not(actasmtoken in [AS_DOT,AS_PLUS,AS_MINUS]) then
+              exit;
+            l:=0;
+            hasdot:=(actasmtoken=AS_DOT);
+            if hasdot then
+              begin
+                if expr<>'' then
+                  begin
+                    BuildRecordOffsetSize(expr,toffset,tsize,mangledname,false);
+                    if (oper.opr.typ<>OPR_CONSTANT) and
+                       (mangledname<>'') then
+                      Message(asmr_e_wrong_sym_type);
+                    inc(l,toffset);
+                    oper.SetSize(tsize,true);
+                  end;
+              end;
+            if actasmtoken in [AS_PLUS,AS_MINUS] then
+              inc(l,BuildConstExpression(true,false));
+            case oper.opr.typ of
+              OPR_LOCAL :
+                begin
+                  { don't allow direct access to fields of parameters, because that
+                    will generate buggy code. Allow it only for explicit typecasting }
+                  if hasdot and
+                     (not oper.hastype) and
+                     (tabstractnormalvarsym(oper.opr.localsym).owner.symtabletype=parasymtable) and
+                     (current_procinfo.procdef.proccalloption<>pocall_register) then
+                    Message(asmr_e_cannot_access_field_directly_for_parameters);
+                  inc(oper.opr.localsymofs,l)
+                end;
+              OPR_CONSTANT :
+                inc(oper.opr.val,l);
+              OPR_REFERENCE :
+                if (mangledname<>'') then
+                  begin
+                    if (oper.opr.val<>0) then
+                      Message(asmr_e_wrong_sym_type);
+                    oper.opr.typ:=OPR_SYMBOL;
+                    oper.opr.symbol:=current_asmdata.RefAsmSymbol(mangledname);
+                  end
+                else
+                  inc(oper.opr.val,l);
+              OPR_SYMBOL:
+                Message(asmr_e_invalid_symbol_ref);
+              else
+                internalerror(200309221);
+            end;
+          end;
+
+
+        function MaybeBuildReference(is64bit: boolean):boolean;
+          { Try to create a reference, if not a reference is found then false
+            is returned }
+          begin
+            MaybeBuildReference:=true;
+            case actasmtoken of
+              AS_INTNUM,
+              AS_MINUS,
+              AS_PLUS:
+                Begin
+                  oper.opr.ref.offset:=BuildConstExpression(True,False);
+                  if actasmtoken<>AS_LPAREN then
+                    Message(asmr_e_invalid_reference_syntax)
+                  else
+                    BuildReference(oper,is64bit);
+                end;
+              AS_LPAREN:
+                BuildReference(oper,is64bit);
+              AS_ID: { only a variable is allowed ... }
+                Begin
+                  ReadSym(oper,is64bit);
+                  case actasmtoken of
+                    AS_end,
+                    AS_SEPARATOR,
+                    AS_COMMA: ;
+                    AS_LPAREN:
+                      BuildReference(oper,is64bit);
+                  else
+                    Begin
+                      Message(asmr_e_invalid_reference_syntax);
+                      Consume(actasmtoken);
+                    end;
+                  end; {end case }
+                end;
+              else
+               MaybeBuildReference:=false;
+            end; { end case }
+          end;
+
+
+      var
+        tempreg: tregister;
+        hl: tasmlabel;
+        icond: tasmcond;
+      Begin
+        expr:='';
+        case actasmtoken of
+          AS_LBRACKET: { Memory reference or constant expression }
+            Begin
+              oper.InitRef;
+              BuildReference(oper,is64bit);
+            end;
+
+          AS_HASH: { Constant expression  }
+            Begin
+              Consume(AS_HASH);
+              BuildConstantOperand(oper);
+            end;
+
+          (*
+          AS_INTNUM,
+          AS_MINUS,
+          AS_PLUS:
+            Begin
+              { Constant memory offset }
+              { This must absolutely be followed by (  }
+              oper.InitRef;
+              oper.opr.ref.offset:=BuildConstExpression(True,False);
+              if actasmtoken<>AS_LPAREN then
+                begin
+                  ofs:=oper.opr.ref.offset;
+                  BuildConstantOperand(oper);
+                  inc(oper.opr.val,ofs);
+                end
+              else
+                BuildReference(oper,is64bit);
+            end;
+          *)
+          AS_ID: { A constant expression, or a Variable ref.  }
+            Begin
+              { Condition code? }
+              icond:=ToConditionCode(actasmpattern,true);
+              if icond<>C_None then
+                begin
+                  oper.opr.typ:=OPR_COND;
+                  oper.opr.cc:=icond;
+                  consume(AS_ID);
+                end
+              else
+              { Local Label ? }
+              if is_locallabel(actasmpattern) then
+               begin
+                 CreateLocalLabel(actasmpattern,hl,false);
+                 Consume(AS_ID);
+                 AddLabelOperand(hl);
+               end
+              else
+               { Check for label }
+               if SearchLabel(actasmpattern,hl,false) then
+                 begin
+                   Consume(AS_ID);
+                   AddLabelOperand(hl);
+                 end
+              else
+               { probably a variable or normal expression }
+               { or a procedure (such as in CALL ID)      }
+               begin
+                 { is it a constant ? }
+                 if SearchIConstant(actasmpattern,l) then
+                  begin
+                    if not (oper.opr.typ in [OPR_NONE,OPR_CONSTANT]) then
+                      Message(asmr_e_invalid_operand_type);
+                    BuildConstantOperand(oper);
+                  end
+                 else
+                  begin
+                    expr:=actasmpattern;
+                    Consume(AS_ID);
+                    { typecasting? }
+                    if (actasmtoken=AS_LPAREN) and
+                       SearchType(expr,typesize) then
+                     begin
+                       oper.hastype:=true;
+                       Consume(AS_LPAREN);
+                       BuildOperand(oper,is64bit);
+                       Consume(AS_RPAREN);
+                       if oper.opr.typ in [OPR_REFERENCE,OPR_LOCAL] then
+                         oper.SetSize(typesize,true);
+                     end
+                    else
+                     begin
+                       if not(oper.SetupVar(expr,false)) then
+                        Begin
+                          { look for special symbols ... }
+                          if expr= '__HIGH' then
+                            begin
+                              consume(AS_LPAREN);
+                              if not oper.setupvar('high'+actasmpattern,false) then
+                                Message1(sym_e_unknown_id,'high'+actasmpattern);
+                              consume(AS_ID);
+                              consume(AS_RPAREN);
+                            end
+                          else
+                           if expr = '__RESULT' then
+                            oper.SetUpResult
+                          else
+                           if expr = '__SELF' then
+                            oper.SetupSelf
+                          else
+                           if expr = '__OLDEBP' then
+                            oper.SetupOldEBP
+                          else
+                            Message1(sym_e_unknown_id,expr);
+                        end
+                       else
+                         MaybeAddGotAddrMode;
+                     end;
+                  end;
+                  if actasmtoken=AS_DOT then
+                    MaybeRecordOffset;
+                  { add a constant expression? }
+                  if (actasmtoken=AS_PLUS) then
+                   begin
+                     l:=BuildConstExpression(true,false);
+                     case oper.opr.typ of
+                       OPR_CONSTANT :
+                         inc(oper.opr.val,l);
+                       OPR_LOCAL :
+                         inc(oper.opr.localsymofs,l);
+                       OPR_REFERENCE :
+                         inc(oper.opr.ref.offset,l);
+                       else
+                         internalerror(200309202);
+                     end;
+                   end
+               end;
+              { Do we have a indexing reference, then parse it also }
+              if actasmtoken=AS_LPAREN then
+                BuildReference(oper,is64bit);
+            end;
+
+          { Register, a variable reference or a constant reference  }
+          AS_REGISTER:
+            Begin
+              { save the type of register used. }
+              tempreg:=actasmregister;
+              Consume(AS_REGISTER);
+              if (actasmtoken in [AS_end,AS_SEPARATOR,AS_COMMA]) then
+                Begin
+                  if not (oper.opr.typ in [OPR_NONE,OPR_REGISTER]) then
+                    Message(asmr_e_invalid_operand_type);
+                  oper.opr.typ:=OPR_REGISTER;
+                  oper.opr.reg:=tempreg;
+                end
+              else
+                Message(asmr_e_syn_operand);
+            end;
+
+          AS_end,
+          AS_SEPARATOR,
+          AS_COMMA: ;
+        else
+          Begin
+            Message(asmr_e_syn_operand);
+            Consume(actasmtoken);
+          end;
+        end; { end case }
+      end;
+
+{*****************************************************************************
+                                taarch64attreader
+*****************************************************************************}
+
+    procedure taarch64attreader.BuildOpCode(instr: taarch64instruction);
+      var
+        operandnum : longint;
+      Begin
+        { opcode }
+        if (actasmtoken<>AS_OPCODE) then
+         Begin
+           Message(asmr_e_invalid_or_missing_opcode);
+           RecoverConsume(true);
+           exit;
+         end;
+        { Fill the instr object with the current state }
+        with instr do
+          begin
+            Opcode:=ActOpcode;
+            condition:=ActCondition;
+            oppostfix:=actoppostfix;
+          end;
+        Consume(AS_OPCODE);
+
+        { We are reading operands, so opcode will be an AS_ID }
+        operandnum:=1;
+        { Zero operand opcode ?  }
+        if actasmtoken in [AS_SEPARATOR,AS_end] then
+         begin
+           instr.Ops:=0;
+           exit;
+         end;
+        { Read the operands }
+        repeat
+          case actasmtoken of
+            AS_COMMA: { Operand delimiter }
+              Begin
+                { operandnum and not operandnum+1, because tinstruction is
+                  one-based and taicpu is zero-based)
+                }
+                if can_be_shifter_operand(instr.opcode,operandnum) then
+                  begin
+                    Consume(AS_COMMA);
+                    if not TryBuildShifterOp(instr,operandnum+1) then
+                      Message(asmr_e_illegal_shifterop_syntax);
+                    Inc(operandnum);
+                  end
+                else
+                  begin
+                    if operandnum>Max_Operands then
+                      Message(asmr_e_too_many_operands)
+                    else
+                      Inc(operandnum);
+                    Consume(AS_COMMA);
+                  end;
+              end;
+            AS_SEPARATOR,
+            AS_end : { End of asm operands for this opcode  }
+              begin
+                break;
+              end;
+          else
+            begin
+              BuildOperand(taarch64operand(instr.operands[operandnum]),instr.Is64bit);
+              instr.Ops:=operandnum;
+              if instr.operands[operandnum].opr.typ=OPR_REFERENCE then
+                if simple_ref_type(instr.opcode,instr.cgsize,instr.oppostfix,instr.operands[operandnum].opr.ref)<>sr_simple then
+                  Message(asmr_e_invalid_reference_syntax);
+                ;
+            end;
+          end; { end case }
+        until false;
+      end;
+
+
+    function taarch64attreader.is_asmopcode(const s: string):boolean;
+
+      const
+        { sorted by length so longer postfixes will match first }
+        postfix2strsorted : array[1..7] of string[3] = (
+          'SB','SH','SW',
+          'B','H','W',
+          'S');
+
+        postfixsorted : array[1..7] of TOpPostfix = (
+          PF_SB,PF_SH,PF_SW,
+          PF_B,PF_H,PF_W,
+          PF_S);
+
+      var
+        j  : longint;
+        hs : string;
+        maxlen : longint;
+        icond : tasmcond;
+      Begin
+        { making s a value parameter would break other assembler readers }
+        hs:=s;
+        is_asmopcode:=false;
+
+        { clear opcode }
+        actopcode:=A_None;
+        actcondition:=C_None;
+
+        { b.cond ? }
+        if (length(hs)=4) and
+           (hs[1]='B') and
+           (hs[2]='.') then
+          begin
+            actopcode:=A_B;
+            actasmtoken:=AS_OPCODE;
+            actcondition:=ToConditionCode(copy(hs,3,length(actasmpattern)-2),false);
+            if actcondition<>C_None then
+              is_asmopcode:=true;
+            exit;
+          end;
+
+        maxlen:=max(length(hs),7);
+        actopcode:=A_NONE;
+        for j:=maxlen downto 1 do
+          begin
+            actopcode:=tasmop(PtrUInt(iasmops.Find(copy(hs,1,j))));
+            if actopcode<>A_NONE then
+              begin
+                actasmtoken:=AS_OPCODE;
+                { strip op code }
+                delete(hs,1,j);
+                break;
+              end;
+          end;
+        if actopcode=A_NONE then
+          exit;
+
+        { check for postfix }
+        if length(hs)>0 then
+          begin
+            for j:=low(postfixsorted) to high(postfixsorted) do
+              begin
+                if copy(hs,1,length(postfix2strsorted[j]))=postfix2strsorted[j] then
+                  begin
+                    actoppostfix:=postfixsorted[j];
+                    { strip postfix }
+                    delete(hs,1,length(postfix2strsorted[j]));
+                    break;
+                  end;
+              end;
+          end;
+        { if we stripped all postfixes, it's a valid opcode }
+        is_asmopcode:=length(hs)=0;
+      end;
+
+
+    procedure taarch64attreader.ConvertCalljmp(instr: taarch64instruction);
+      var
+        newopr : toprrec;
+      begin
+        if instr.Operands[1].opr.typ=OPR_REFERENCE then
+          begin
+            newopr.typ:=OPR_SYMBOL;
+            newopr.symbol:=instr.Operands[1].opr.ref.symbol;
+            newopr.symofs:=instr.Operands[1].opr.ref.offset;
+            if (instr.Operands[1].opr.ref.base<>NR_NO) or
+              (instr.Operands[1].opr.ref.index<>NR_NO) or
+              (instr.Operands[1].opr.ref.refaddr<>addr_pic) then
+              Message(asmr_e_syn_operand);
+            instr.Operands[1].opr:=newopr;
+          end;
+      end;
+
+    procedure taarch64attreader.handleopcode;
+      var
+        instr: taarch64instruction;
+      begin
+        instr:=taarch64instruction.Create(taarch64operand);
+        BuildOpcode(instr);
+        if is_calljmp(instr.opcode) then
+          ConvertCalljmp(instr);
+        {
+        instr.AddReferenceSizes;
+        instr.SetInstructionOpsize;
+        instr.CheckOperandSizes;
+        }
+        instr.ConcatInstruction(curlist);
+        instr.Free;
+        actoppostfix:=PF_None;
+      end;
+
+
+{*****************************************************************************
+                                     Initialize
+*****************************************************************************}
+
+const
+  asmmode_arm_att_info : tasmmodeinfo =
+          (
+            id    : asmmode_arm_gas;
+            idtxt : 'GAS';
+            casmreader : taarch64attreader;
+          );
+
+  asmmode_arm_standard_info : tasmmodeinfo =
+          (
+            id    : asmmode_standard;
+            idtxt : 'STANDARD';
+            casmreader : taarch64attreader;
+          );
+
+initialization
+  RegisterAsmMode(asmmode_arm_att_info);
+  RegisterAsmMode(asmmode_arm_standard_info);
+end.

+ 171 - 0
compiler/aarch64/rgcpu.pas

@@ -0,0 +1,171 @@
+{
+    Copyright (c) 1998-2002 by Florian Klaempfl
+
+    This unit implements the SPARC specific class for the register
+    allocator
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************}
+unit rgcpu;
+
+{$i fpcdefs.inc}
+
+  interface
+
+    uses
+      aasmbase,aasmcpu,aasmtai,aasmdata,
+      cgbase,cgutils,
+      cpubase,
+      globtype,
+      rgobj;
+
+    type
+      trgcpu=class(trgobj)
+        procedure do_spill_read(list: TAsmList; pos: tai; const spilltemp: treference; tempreg: tregister; orgsupreg: tsuperregister); override;
+        procedure do_spill_written(list: TAsmList; pos: tai; const spilltemp: treference; tempreg: tregister; orgsupreg: tsuperregister); override;
+       protected
+        procedure do_spill_op(list: tasmlist; op: tasmop; pos: tai; const spilltemp: treference; tempreg: tregister; orgsupreg: tsuperregister);
+      end;
+
+      trgintcpu=class(trgcpu)
+        procedure add_cpu_interferences(p: tai); override;
+      end;
+
+
+implementation
+
+    uses
+      verbose,cutils,
+      cgobj;
+
+    procedure trgcpu.do_spill_read(list: TAsmList; pos: tai; const spilltemp: treference; tempreg: tregister; orgsupreg: tsuperregister);
+      begin
+        do_spill_op(list,A_LDR,pos,spilltemp,tempreg,orgsupreg);
+      end;
+
+
+    procedure trgcpu.do_spill_written(list: TAsmList; pos: tai; const spilltemp: treference; tempreg: tregister; orgsupreg: tsuperregister);
+      begin
+        do_spill_op(list,A_STR,pos,spilltemp,tempreg,orgsupreg);
+      end;
+
+
+    procedure trgcpu.do_spill_op(list: tasmlist; op: tasmop; pos: tai; const spilltemp: treference; tempreg: tregister; orgsupreg: tsuperregister);
+      var
+        helpins  : tai;
+        tmpref   : treference;
+        helplist : TAsmList;
+        hreg     : tregister;
+        isload   : boolean;
+      begin
+        isload:=op=A_LDR;
+        { offset out of range for regular load/store? }
+        if simple_ref_type(op,reg_cgsize(tempreg),PF_None,spilltemp)<>sr_simple then
+          begin
+            helplist:=TAsmList.create;
+
+            if getregtype(tempreg)=R_INTREGISTER then
+              hreg:=tempreg
+            else
+              hreg:=cg.getaddressregister(helplist);
+
+            cg.a_load_const_reg(helplist,OS_ADDR,spilltemp.offset,hreg);
+            reference_reset_base(tmpref,spilltemp.base,0,sizeof(pint));
+            tmpref.index:=hreg;
+            if isload then
+              helpins:=spilling_create_load(tmpref,tempreg)
+            else
+              helpins:=spilling_create_store(tempreg,tmpref);
+            helplist.concat(helpins);
+            add_cpu_interferences(helpins);
+            list.insertlistafter(pos,helplist);
+            helplist.free;
+          end
+        else if isload then
+          inherited do_spill_read(list,pos,spilltemp,tempreg,orgsupreg)
+        else
+          inherited do_spill_written(list,pos,spilltemp,tempreg,orgsupreg)
+      end;
+
+
+    procedure trgintcpu.add_cpu_interferences(p: tai);
+     var
+       i, j: longint;
+     begin
+       if p.typ=ait_instruction then
+         begin
+           { add interferences for instructions that can have SP as a register
+             operand }
+           case taicpu(p).opcode of
+             A_MOV:
+               { all operands can be SP }
+               exit;
+             A_ADD,
+             A_SUB,
+             A_CMP,
+             A_CMN:
+               { ok as destination or first source in immediate or extended
+                 register form }
+               if (taicpu(p).oper[taicpu(p).ops-1]^.typ<>top_shifterop) or
+                  valid_shifter_operand(taicpu(p).opcode,false,true,
+                    reg_cgsize(taicpu(p).oper[0]^.reg) in [OS_64,OS_S64],
+                    taicpu(p).oper[taicpu(p).ops-1]^.shifterop^.shiftmode,
+                    taicpu(p).oper[taicpu(p).ops-1]^.shifterop^.shiftimm) then
+                 begin
+                   if taicpu(p).oper[taicpu(p).ops-1]^.typ=top_shifterop then
+                     i:=taicpu(p).ops-2
+                   else
+                     i:=taicpu(p).ops-1;
+                   if (taicpu(p).oper[i]^.typ=top_reg) then
+                     add_edge(getsupreg(taicpu(p).oper[i]^.reg),RS_SP);
+                   exit;
+                 end;
+             A_AND,
+             A_EOR,
+             A_ORR,
+             A_TST:
+               { ok in immediate form }
+               if taicpu(p).oper[taicpu(p).ops-1]^.typ=top_const then
+                 exit;
+           end;
+           { add interferences for other registers }
+           for i:=0 to taicpu(p).ops-1 do
+             begin
+               case taicpu(p).oper[i]^.typ of
+                 top_reg:
+                   if getregtype(taicpu(p).oper[i]^.reg)=R_INTREGISTER then
+                     add_edge(getsupreg(taicpu(p).oper[i]^.reg),RS_SP);
+                 top_ref:
+                   begin
+                     { sp can always be base, never be index }
+                     if taicpu(p).oper[i]^.ref^.index<>NR_NO then
+                       add_edge(getsupreg(taicpu(p).oper[i]^.ref^.index),RS_SP);
+                     { in case of write back, the base register must be
+                       different from the loaded/stored register }
+                     if (taicpu(p).oper[i]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
+                        (taicpu(p).oper[i]^.ref^.base<>NR_NO) then
+                       begin
+                         for j:=pred(i) downto 0 do
+                           if taicpu(p).oper[j]^.typ=TOP_REG then
+                             add_edge(getsupreg(taicpu(p).oper[j]^.reg),getsupreg(taicpu(p).oper[i]^.ref^.base));
+                       end;
+                   end;
+               end;
+             end;
+         end;
+     end;
+
+end.

+ 4 - 2
compiler/aasmbase.pas

@@ -64,6 +64,8 @@ interface
 
 
     const
     const
        asmlabeltypeprefix : array[TAsmLabeltype] of char = ('j','a','d','l','f','t','c');
        asmlabeltypeprefix : array[TAsmLabeltype] of char = ('j','a','d','l','f','t','c');
+       asmsymbindname : array[TAsmsymbind] of string[23] = ('none', 'external','common',
+       'local','global','weak external','private external','lazy','import','internal temp');
 
 
     type
     type
        TAsmSectiontype=(sec_none,
        TAsmSectiontype=(sec_none,
@@ -203,7 +205,7 @@ interface
     function EncodeUleb128(a: qword;out buf) : byte;
     function EncodeUleb128(a: qword;out buf) : byte;
     function EncodeSleb128(a: int64;out buf) : byte;
     function EncodeSleb128(a: int64;out buf) : byte;
 
 
-    function ReplaceForbiddenAsmSymbolChars(const s: string): string;
+    function ReplaceForbiddenAsmSymbolChars(const s: ansistring): ansistring;
 
 
     { dummy default noop callback }
     { dummy default noop callback }
     procedure default_global_used;
     procedure default_global_used;
@@ -354,7 +356,7 @@ implementation
       end;
       end;
 
 
 
 
-    function ReplaceForbiddenAsmSymbolChars(const s: string): string;
+    function ReplaceForbiddenAsmSymbolChars(const s: ansistring): ansistring;
       var
       var
         i : longint;
         i : longint;
         rchar: char;
         rchar: char;

+ 2 - 2
compiler/aasmcnst.pas

@@ -658,7 +658,7 @@ implementation
      var
      var
        prelist: tasmlist;
        prelist: tasmlist;
      begin
      begin
-       prelist:=tasmlist.create_without_marker;
+       prelist:=tasmlist.create;
        { only now add items based on the symbolname, because it may be
        { only now add items based on the symbolname, because it may be
          modified by the "section" specifier in case of a typed constant }
          modified by the "section" specifier in case of a typed constant }
        if tcalo_new_section in options then
        if tcalo_new_section in options then
@@ -748,7 +748,7 @@ implementation
    constructor ttai_typedconstbuilder.create;
    constructor ttai_typedconstbuilder.create;
      begin
      begin
        inherited create;
        inherited create;
-       fasmlist:=tasmlist.create_without_marker;
+       fasmlist:=tasmlist.create;
        { queue is empty }
        { queue is empty }
        fqueue_offset:=low(fqueue_offset);
        fqueue_offset:=low(fqueue_offset);
      end;
      end;

+ 15 - 16
compiler/aasmdata.pas

@@ -122,8 +122,6 @@ interface
     type
     type
       TAsmList = class(tlinkedlist)
       TAsmList = class(tlinkedlist)
          constructor create;
          constructor create;
-         constructor create_without_marker;
-         function  empty : boolean;
          function  getlasttaifilepos : pfileposinfo;
          function  getlasttaifilepos : pfileposinfo;
       end;
       end;
 
 
@@ -284,20 +282,6 @@ implementation
     constructor TAsmList.create;
     constructor TAsmList.create;
       begin
       begin
         inherited create;
         inherited create;
-        { make sure the optimizer won't remove the first tai of this list}
-        insert(tai_marker.create(mark_BlockStart));
-      end;
-
-    constructor TAsmList.create_without_marker;
-      begin
-        inherited create;
-      end;
-
-    function TAsmList.empty : boolean;
-      begin
-        { there is always a mark_BlockStart available,
-          see TAsmList.create }
-        result:=(count<=1);
       end;
       end;
 
 
 
 
@@ -422,6 +406,21 @@ implementation
                  internalerror(200603261);
                  internalerror(200603261);
              end;
              end;
            hp.typ:=_typ;
            hp.typ:=_typ;
+           { Changing bind from AB_GLOBAL to AB_LOCAL is wrong
+             if bind is already AB_GLOBAL or AB_EXTERNAL,
+             GOT might have been used, so change might be harmful. }
+           if (_bind<>hp.bind) and (hp.getrefs>0) then
+             begin
+{$ifdef extdebug}
+               { the changes that matter must become internalerrors, the rest
+                 should be ignored; a used cannot change anything about this,
+                 so printing a warning/hint is not useful }
+               if (_bind=AB_LOCAL) then
+                 Message3(asmw_w_changing_bind_type,s,asmsymbindname[hp.bind],asmsymbindname[_bind])
+               else
+                 Message3(asmw_h_changing_bind_type,s,asmsymbindname[hp.bind],asmsymbindname[_bind]);
+{$endif extdebug}
+             end;
            hp.bind:=_bind;
            hp.bind:=_bind;
          end
          end
         else
         else

+ 46 - 11
compiler/aasmtai.pas

@@ -237,11 +237,11 @@ interface
 {$ifdef arm}
 {$ifdef arm}
        { ARM only }
        { ARM only }
        ,top_regset
        ,top_regset
-       ,top_conditioncode
        ,top_modeflags
        ,top_modeflags
        ,top_specialreg
        ,top_specialreg
 {$endif arm}
 {$endif arm}
 {$if defined(arm) or defined(aarch64)}
 {$if defined(arm) or defined(aarch64)}
+       ,top_conditioncode
        ,top_shifterop
        ,top_shifterop
 {$endif defined(arm) or defined(aarch64)}
 {$endif defined(arm) or defined(aarch64)}
 {$ifdef m68k}
 {$ifdef m68k}
@@ -334,7 +334,7 @@ interface
         mark_Position
         mark_Position
       );
       );
 
 
-      TRegAllocType = (ra_alloc,ra_dealloc,ra_sync,ra_resize);
+      TRegAllocType = (ra_alloc,ra_dealloc,ra_sync,ra_resize,ra_markused);
 
 
       TStabType = (stab_stabs,stab_stabn,stab_stabd,
       TStabType = (stab_stabs,stab_stabn,stab_stabd,
                    { AIX/XCOFF stab types }
                    { AIX/XCOFF stab types }
@@ -356,7 +356,9 @@ interface
         { for Jasmin }
         { for Jasmin }
         asd_jclass,asd_jinterface,asd_jsuper,asd_jfield,asd_jlimit,asd_jline,
         asd_jclass,asd_jinterface,asd_jsuper,asd_jfield,asd_jlimit,asd_jline,
         { .ent/.end for MIPS and Alpha }
         { .ent/.end for MIPS and Alpha }
-        asd_ent,asd_ent_end
+        asd_ent,asd_ent_end,
+        { supported by recent clang-based assemblers for data-in-code  }
+        asd_data_region, asd_end_data_region
       );
       );
 
 
       TAsmSehDirective=(
       TAsmSehDirective=(
@@ -369,7 +371,7 @@ interface
 
 
 
 
     const
     const
-      regallocstr : array[tregalloctype] of string[10]=('allocated','released','sync','resized');
+      regallocstr : array[tregalloctype] of string[10]=('allocated','released','sync','resized','used');
       tempallocstr : array[boolean] of string[10]=('released','allocated');
       tempallocstr : array[boolean] of string[10]=('released','allocated');
       stabtypestr : array[TStabType] of string[8]=(
       stabtypestr : array[TStabType] of string[8]=(
         'stabs','stabn','stabd',
         'stabs','stabn','stabd',
@@ -385,7 +387,9 @@ interface
         { for Jasmin }
         { for Jasmin }
         'class','interface','super','field','limit','line',
         'class','interface','super','field','limit','line',
         { .ent/.end for MIPS and Alpha }
         { .ent/.end for MIPS and Alpha }
-        'ent','end'
+        'ent','end',
+        { supported by recent clang-based assemblers for data-in-code }
+        'data_region','end_data_region'
       );
       );
       sehdirectivestr : array[TAsmSehDirective] of string[16]=(
       sehdirectivestr : array[TAsmSehDirective] of string[16]=(
         '.seh_proc','.seh_endproc',
         '.seh_proc','.seh_endproc',
@@ -411,15 +415,15 @@ interface
             top_local  : (localoper:plocaloper);
             top_local  : (localoper:plocaloper);
         {$ifdef arm}
         {$ifdef arm}
             top_regset : (regset:^tcpuregisterset; regtyp: tregistertype; subreg: tsubregister; usermode: boolean);
             top_regset : (regset:^tcpuregisterset; regtyp: tregistertype; subreg: tsubregister; usermode: boolean);
-            top_conditioncode : (cc : TAsmCond);
             top_modeflags : (modeflags : tcpumodeflags);
             top_modeflags : (modeflags : tcpumodeflags);
             top_specialreg : (specialreg:tregister; specialflags:tspecialregflags);
             top_specialreg : (specialreg:tregister; specialflags:tspecialregflags);
         {$endif arm}
         {$endif arm}
         {$if defined(arm) or defined(aarch64)}
         {$if defined(arm) or defined(aarch64)}
             top_shifterop : (shifterop : pshifterop);
             top_shifterop : (shifterop : pshifterop);
+            top_conditioncode : (cc : TAsmCond);
         {$endif defined(arm) or defined(aarch64)}
         {$endif defined(arm) or defined(aarch64)}
         {$ifdef m68k}
         {$ifdef m68k}
-            top_regset : (dataregset,addrregset:^tcpuregisterset);
+            top_regset : (dataregset,addrregset,fpuregset:^tcpuregisterset);
         {$endif m68k}
         {$endif m68k}
         {$ifdef jvm}
         {$ifdef jvm}
             top_single : (sval:single);
             top_single : (sval:single);
@@ -625,6 +629,8 @@ interface
           constructor Create_type_name(_typ:taiconst_type;const name:string;_symtyp:Tasmsymtype;ofs:aint);
           constructor Create_type_name(_typ:taiconst_type;const name:string;_symtyp:Tasmsymtype;ofs:aint);
           constructor Create_nil_codeptr;
           constructor Create_nil_codeptr;
           constructor Create_nil_dataptr;
           constructor Create_nil_dataptr;
+          constructor Create_int_codeptr(_value: int64);
+          constructor Create_int_dataptr(_value: int64);
           constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
           constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
           procedure ppuwrite(ppufile:tcompilerppufile);override;
           procedure ppuwrite(ppufile:tcompilerppufile);override;
           procedure derefimpl;override;
           procedure derefimpl;override;
@@ -731,6 +737,7 @@ interface
           constructor dealloc(r : tregister;ainstr:tai);
           constructor dealloc(r : tregister;ainstr:tai);
           constructor sync(r : tregister);
           constructor sync(r : tregister);
           constructor resize(r : tregister);
           constructor resize(r : tregister);
+          constructor markused(r : tregister);
           constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
           constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
           procedure ppuwrite(ppufile:tcompilerppufile);override;
           procedure ppuwrite(ppufile:tcompilerppufile);override;
        end;
        end;
@@ -1770,6 +1777,18 @@ implementation
 
 
 
 
     constructor tai_const.Create_nil_codeptr;
     constructor tai_const.Create_nil_codeptr;
+      begin
+        self.Create_int_codeptr(0);
+      end;
+
+
+    constructor tai_const.Create_nil_dataptr;
+      begin
+        self.Create_int_dataptr(0);
+      end;
+
+
+    constructor tai_const.Create_int_codeptr(_value: int64);
       begin
       begin
         inherited Create;
         inherited Create;
         typ:=ait_const;
         typ:=ait_const;
@@ -1782,11 +1801,11 @@ implementation
         sym:=nil;
         sym:=nil;
         endsym:=nil;
         endsym:=nil;
         symofs:=0;
         symofs:=0;
-        value:=0;
+        value:=_value;
       end;
       end;
 
 
 
 
-    constructor tai_const.Create_nil_dataptr;
+    constructor tai_const.Create_int_dataptr(_value: int64);
       begin
       begin
         inherited Create;
         inherited Create;
         typ:=ait_const;
         typ:=ait_const;
@@ -1799,7 +1818,7 @@ implementation
         sym:=nil;
         sym:=nil;
         endsym:=nil;
         endsym:=nil;
         symofs:=0;
         symofs:=0;
-        value:=0;
+        value:=_value;
       end;
       end;
 
 
 
 
@@ -2192,7 +2211,10 @@ implementation
          typ:=ait_stab;
          typ:=ait_stab;
          stabtype:=_stabtype;
          stabtype:=_stabtype;
          getmem(str,length(s)+1);
          getmem(str,length(s)+1);
-         move(s[1],str^,length(s)+1);
+         if length(s)>0 then
+           move(s[1],str^,length(s)+1)
+         else
+           str^:=#0;
       end;
       end;
 
 
     destructor tai_stab.destroy;
     destructor tai_stab.destroy;
@@ -2422,6 +2444,15 @@ implementation
       end;
       end;
 
 
 
 
+    constructor tai_regalloc.markused(r : tregister);
+      begin
+        inherited create;
+        typ:=ait_regalloc;
+        ratype:=ra_markused;
+        reg:=r;
+      end;
+
+
     constructor tai_regalloc.ppuload(t:taitype;ppufile:tcompilerppufile);
     constructor tai_regalloc.ppuload(t:taitype;ppufile:tcompilerppufile);
       begin
       begin
         inherited ppuload(t,ppufile);
         inherited ppuload(t,ppufile);
@@ -2555,6 +2586,9 @@ implementation
 {$ifdef ARM}
 {$ifdef ARM}
               and not(r.base=NR_R15)
               and not(r.base=NR_R15)
 {$endif ARM}
 {$endif ARM}
+{$ifdef aarch64}
+              and not(r.refaddr in [addr_full,addr_gotpageoffset,addr_gotpage])
+{$endif aarch64}
               then
               then
               internalerror(200502052);
               internalerror(200502052);
 {$endif not llvm}
 {$endif not llvm}
@@ -2657,6 +2691,7 @@ implementation
                 begin
                 begin
                   dispose(dataregset);
                   dispose(dataregset);
                   dispose(addrregset);
                   dispose(addrregset);
+                  dispose(fpuregset);
                 end;
                 end;
 {$endif m68k}
 {$endif m68k}
 {$ifdef jvm}
 {$ifdef jvm}

+ 16 - 3
compiler/aggas.pas

@@ -453,6 +453,8 @@ implementation
          system_powerpc64_darwin,
          system_powerpc64_darwin,
          system_x86_64_darwin,
          system_x86_64_darwin,
          system_arm_darwin,
          system_arm_darwin,
+         system_aarch64_darwin,
+         system_x86_64_iphonesim,
          system_powerpc_aix,
          system_powerpc_aix,
          system_powerpc64_aix:
          system_powerpc64_aix:
            begin
            begin
@@ -488,7 +490,8 @@ implementation
                     AsmWriteln('__TEXT,__picsymbolstub4,symbol_stubs,none,16')
                     AsmWriteln('__TEXT,__picsymbolstub4,symbol_stubs,none,16')
                   else
                   else
                     AsmWriteln('__TEXT,__symbol_stub4,symbol_stubs,none,12')
                     AsmWriteln('__TEXT,__symbol_stub4,symbol_stubs,none,12')
-                { darwin/x86-64 uses RIP-based GOT addressing, no symbol stubs }
+                { darwin/(x86-64/AArch64) uses PC-based GOT addressing, no
+                  explicit symbol stubs }
                 else
                 else
                   internalerror(2006031101);
                   internalerror(2006031101);
               end;
               end;
@@ -1072,6 +1075,11 @@ implementation
                     end;
                     end;
                   if tai_label(hp).labsym.bind in [AB_GLOBAL,AB_PRIVATE_EXTERN] then
                   if tai_label(hp).labsym.bind in [AB_GLOBAL,AB_PRIVATE_EXTERN] then
                    begin
                    begin
+{$ifdef arm}
+                     { do no change arm mode accidently, .globl seems to reset the mode }
+                     if GenerateThumbCode or GenerateThumb2Code then
+                       AsmWriteln(#9'.thumb_func'#9);
+{$endif arm}
                      AsmWrite('.globl'#9);
                      AsmWrite('.globl'#9);
                      if replaceforbidden then
                      if replaceforbidden then
                        AsmWriteLn(ReplaceForbiddenAsmSymbolChars(tai_label(hp).labsym.name))
                        AsmWriteLn(ReplaceForbiddenAsmSymbolChars(tai_label(hp).labsym.name))
@@ -1275,7 +1283,12 @@ implementation
              begin
              begin
                WriteDirectiveName(tai_directive(hp).directive);
                WriteDirectiveName(tai_directive(hp).directive);
                if tai_directive(hp).name <>'' then
                if tai_directive(hp).name <>'' then
-                 AsmWrite(tai_directive(hp).name);
+                 begin
+                   if replaceforbidden then
+                     AsmWrite(ReplaceForbiddenAsmSymbolChars(tai_directive(hp).name))
+                   else
+                     AsmWrite(tai_directive(hp).name);
+                 end;
                AsmLn;
                AsmLn;
              end;
              end;
 
 
@@ -1541,7 +1554,7 @@ implementation
 
 
       { "no executable stack" marker }
       { "no executable stack" marker }
       { TODO: used by OpenBSD/NetBSD as well? }
       { TODO: used by OpenBSD/NetBSD as well? }
-      if (target_info.system in (systems_linux + systems_android + systems_freebsd)) and
+      if (target_info.system in (systems_linux + systems_android + systems_freebsd + systems_dragonfly)) and
          not(cs_executable_stack in current_settings.moduleswitches) then
          not(cs_executable_stack in current_settings.moduleswitches) then
         begin
         begin
           AsmWriteLn('.section .note.GNU-stack,"",%progbits');
           AsmWriteLn('.section .note.GNU-stack,"",%progbits');

+ 4 - 0
compiler/agjasmin.pas

@@ -516,6 +516,8 @@ implementation
         i: longint;
         i: longint;
         toplevelowner: tsymtable;
         toplevelowner: tsymtable;
       begin
       begin
+        superclass:=nil;
+
         { JVM 1.5+ }
         { JVM 1.5+ }
         AsmWriteLn('.bytecode 49.0');
         AsmWriteLn('.bytecode 49.0');
         // include files are not support by Java, and the directory of the main
         // include files are not support by Java, and the directory of the main
@@ -753,6 +755,8 @@ implementation
               2:result:=tostr(smallint(csym.value.valueord.svalue));
               2:result:=tostr(smallint(csym.value.valueord.svalue));
               4:result:=tostr(longint(csym.value.valueord.svalue));
               4:result:=tostr(longint(csym.value.valueord.svalue));
               8:result:=tostr(csym.value.valueord.svalue);
               8:result:=tostr(csym.value.valueord.svalue);
+              else
+                internalerror(2014082050);
             end;
             end;
           conststring:
           conststring:
             result:=constastr(pchar(csym.value.valueptr),csym.value.len);
             result:=constastr(pchar(csym.value.valueptr),csym.value.len);

+ 23 - 0
compiler/alpha/cpuinfo.pas

@@ -21,6 +21,9 @@ Unit CPUInfo;
 
 
 Interface
 Interface
 
 
+uses
+ globtype;
+
 Type
 Type
    { Natural integer register type and size for the target machine }
    { Natural integer register type and size for the target machine }
 {$ifdef FPC}
 {$ifdef FPC}
@@ -38,6 +41,9 @@ Type
    TConstPtrUInt = qword;
    TConstPtrUInt = qword;
 
 
    bestreal = extended;
    bestreal = extended;
+{$if FPC_FULLVERSION>20700}
+   bestrealrec = TExtended80Rec;
+{$endif FPC_FULLVERSION>20700}
    ts32real = single;
    ts32real = single;
    ts64real = double;
    ts64real = double;
    ts80real = extended;
    ts80real = extended;
@@ -52,7 +58,15 @@ Type
        ClassEV8
        ClassEV8
       );
       );
 
 
+   tcontrollertype =
+     (ct_none
+     );
+
+
 Const
 Const
+   { Is there support for dealing with multiple microcontrollers available }
+   { for this platform? }
+   ControllerSupport = false;
    { Size of native extended type }
    { Size of native extended type }
    extended_size = 16;
    extended_size = 16;
    {# Size of a pointer                           }
    {# Size of a pointer                           }
@@ -60,6 +74,15 @@ Const
    {# Size of a multimedia register               }
    {# Size of a multimedia register               }
    mmreg_size = 8;
    mmreg_size = 8;
 
 
+   { We know that there are fields after sramsize
+     but we don't care about this warning }
+   {$PUSH}
+    {$WARN 3177 OFF}
+   embedded_controllers : array [tcontrollertype] of tcontrollerdatatype =
+   (
+      (controllertypestr:''; controllerunitstr:''; flashbase:0; flashsize:0; srambase:0; sramsize:0));
+   {$POP}
+
    { target cpu string (used by compiler options) }
    { target cpu string (used by compiler options) }
    target_cpu_string = 'alpha';
    target_cpu_string = 'alpha';
 
 

+ 22 - 5
compiler/aoptobj.pas

@@ -361,7 +361,7 @@ Unit AoptObj;
 
 
     function JumpTargetOp(ai: taicpu): poper; inline;
     function JumpTargetOp(ai: taicpu): poper; inline;
       begin
       begin
-{$ifdef MIPS}
+{$if defined(MIPS)}
         { MIPS branches can have 1,2 or 3 operands, target label is the last one. }
         { MIPS branches can have 1,2 or 3 operands, target label is the last one. }
         result:=ai.oper[ai.ops-1];
         result:=ai.oper[ai.ops-1];
 {$else MIPS}
 {$else MIPS}
@@ -1041,6 +1041,9 @@ Unit AoptObj;
         Repeat
         Repeat
           While Assigned(StartPai) And
           While Assigned(StartPai) And
                 ((StartPai.typ in (SkipInstr - [ait_regAlloc])) Or
                 ((StartPai.typ in (SkipInstr - [ait_regAlloc])) Or
+{$if defined(MIPS) or defined(SPARC)}
+                ((startpai.typ=ait_instruction) and (taicpu(startpai).opcode=A_NOP)) or
+{$endif MIPS or SPARC}
                  ((StartPai.typ = ait_label) and
                  ((StartPai.typ = ait_label) and
                   Not(Tai_Label(StartPai).labsym.Is_Used))) Do
                   Not(Tai_Label(StartPai).labsym.Is_Used))) Do
             StartPai := Tai(StartPai.Next);
             StartPai := Tai(StartPai.Next);
@@ -1176,9 +1179,9 @@ Unit AoptObj;
     function IsJumpToLabel(hp: taicpu): boolean;
     function IsJumpToLabel(hp: taicpu): boolean;
       begin
       begin
         result:=(hp.opcode=aopt_uncondjmp) and
         result:=(hp.opcode=aopt_uncondjmp) and
-{$ifdef arm}
+{$if defined(arm) or defined(aarch64)}
           (hp.condition=c_None) and
           (hp.condition=c_None) and
-{$endif arm}
+{$endif arm or aarch64}
           (JumpTargetOp(hp)^.typ = top_ref) and
           (JumpTargetOp(hp)^.typ = top_ref) and
           (JumpTargetOp(hp)^.ref^.symbol is TAsmLabel);
           (JumpTargetOp(hp)^.ref^.symbol is TAsmLabel);
       end;
       end;
@@ -1257,6 +1260,14 @@ Unit AoptObj;
                     exit;
                     exit;
                   if not GetFinalDestination(taicpu(p1),succ(level)) then
                   if not GetFinalDestination(taicpu(p1),succ(level)) then
                     exit;
                     exit;
+{$if defined(aarch64)}
+                  { can't have conditional branches to
+                    global labels on AArch64, because the
+                    offset may become too big }
+                  if not(taicpu(hp).condition in [C_None,C_AL,C_NV]) and
+                     (tasmlabel(JumpTargetOp(taicpu(p1))^.ref^.symbol).bind<>AB_LOCAL) then
+                    exit;
+{$endif aarch64}
                   tasmlabel(JumpTargetOp(hp)^.ref^.symbol).decrefs;
                   tasmlabel(JumpTargetOp(hp)^.ref^.symbol).decrefs;
                   JumpTargetOp(hp)^.ref^.symbol:=JumpTargetOp(taicpu(p1))^.ref^.symbol;
                   JumpTargetOp(hp)^.ref^.symbol:=JumpTargetOp(taicpu(p1))^.ref^.symbol;
                   tasmlabel(JumpTargetOp(hp)^.ref^.symbol).increfs;
                   tasmlabel(JumpTargetOp(hp)^.ref^.symbol).increfs;
@@ -1395,9 +1406,15 @@ Unit AoptObj;
                                     FindLabel(tasmlabel(JumpTargetOp(taicpu(p))^.ref^.symbol), hp2) then
                                     FindLabel(tasmlabel(JumpTargetOp(taicpu(p))^.ref^.symbol), hp2) then
                                   begin
                                   begin
                                     if (taicpu(p).opcode=aopt_condjmp)
                                     if (taicpu(p).opcode=aopt_condjmp)
-  {$ifdef arm}
+  {$if defined(arm) or defined(aarch64)}
                                       and (taicpu(p).condition<>C_None)
                                       and (taicpu(p).condition<>C_None)
-  {$endif arm}
+  {$endif arm or aarch64}
+  {$if defined(aarch64)}
+                                      { can't have conditional branches to
+                                        global labels on AArch64, because the
+                                        offset may become too big }
+                                      and (tasmlabel(JumpTargetOp(taicpu(hp1))^.ref^.symbol).bind=AB_LOCAL)
+  {$endif aarch64}
                                     then
                                     then
                                       begin
                                       begin
                                         taicpu(p).condition:=inverse_cond(taicpu(p).condition);
                                         taicpu(p).condition:=inverse_cond(taicpu(p).condition);

+ 336 - 30
compiler/arm/aasmcpu.pas

@@ -212,7 +212,7 @@ uses
          function is_same_reg_move(regtype: Tregistertype):boolean; override;
          function is_same_reg_move(regtype: Tregistertype):boolean; override;
 
 
          function spilling_get_operation_type(opnr: longint): topertype;override;
          function spilling_get_operation_type(opnr: longint): topertype;override;
-
+         function spilling_get_operation_type_ref(opnr: longint; reg: tregister): topertype;override;
          { assembler }
          { assembler }
       public
       public
          { the next will reset all instructions that can change in pass 2 }
          { the next will reset all instructions that can change in pass 2 }
@@ -777,6 +777,15 @@ implementation
       end;
       end;
 
 
 
 
+    function taicpu.spilling_get_operation_type_ref(opnr: longint; reg: tregister): topertype;
+      begin
+        result := operand_read;
+        if (oper[opnr]^.ref^.base = reg) and
+          (oper[opnr]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) then
+           result := operand_readwrite;
+      end;
+
+
     procedure BuildInsTabCache;
     procedure BuildInsTabCache;
       var
       var
         i : longint;
         i : longint;
@@ -889,6 +898,20 @@ implementation
               limit:=254;
               limit:=254;
         end;
         end;
 
 
+      function is_case_dispatch(hp: taicpu): boolean;
+        begin
+          result:=
+            ((taicpu(hp).opcode in [A_ADD,A_LDR]) and
+             not(GenerateThumbCode or GenerateThumb2Code) and
+             (taicpu(hp).oper[0]^.typ=top_reg) and
+             (taicpu(hp).oper[0]^.reg=NR_PC)) or
+             ((taicpu(hp).opcode=A_MOV) and (GenerateThumbCode) and
+              (taicpu(hp).oper[0]^.typ=top_reg) and
+              (taicpu(hp).oper[0]^.reg=NR_PC)) or
+             (taicpu(hp).opcode=A_TBH) or
+             (taicpu(hp).opcode=A_TBB);
+        end;
+
       var
       var
         curinspos,
         curinspos,
         penalty,
         penalty,
@@ -897,7 +920,8 @@ implementation
         currentsize,
         currentsize,
         extradataoffset,
         extradataoffset,
         curop : longint;
         curop : longint;
-        curtai : tai;
+        curtai,
+        inserttai : tai;
         ai_label : tai_label;
         ai_label : tai_label;
         curdatatai,hp,hp2 : tai;
         curdatatai,hp,hp2 : tai;
         curdata : TAsmList;
         curdata : TAsmList;
@@ -1041,16 +1065,13 @@ implementation
               (tai(hp).typ=ait_instruction) then
               (tai(hp).typ=ait_instruction) then
               begin
               begin
                 case taicpu(hp).opcode of
                 case taicpu(hp).opcode of
-                  A_BX,
+                  A_MOV,
                   A_LDR,
                   A_LDR,
-                  A_ADD:
+                  A_ADD,
+                  A_TBH,
+                  A_TBB:
                     { approximation if we hit a case jump table }
                     { approximation if we hit a case jump table }
-                    if ((taicpu(hp).opcode in [A_ADD,A_LDR]) and not(GenerateThumbCode or GenerateThumb2Code) and
-                       (taicpu(hp).oper[0]^.typ=top_reg) and
-                      (taicpu(hp).oper[0]^.reg=NR_PC)) or
-                      ((taicpu(hp).opcode=A_BX) and (GenerateThumbCode) and
-                       (taicpu(hp).oper[0]^.typ=top_reg))
-                       then
+                    if is_case_dispatch(taicpu(hp)) then
                       begin
                       begin
                         penalty:=multiplier;
                         penalty:=multiplier;
                         hp:=tai(hp.next);
                         hp:=tai(hp.next);
@@ -1144,12 +1165,34 @@ implementation
                 else
                 else
                   limit:=1016;
                   limit:=1016;
 
 
+                { if this is an add/tbh/tbb-based jumptable, go back to the
+                  previous instruction, because inserting data between the
+                  dispatch instruction and the table would mess up the
+                  addresses }
+                inserttai:=curtai;
+                if is_case_dispatch(taicpu(inserttai)) and
+                   ((taicpu(inserttai).opcode=A_ADD) or
+                    (taicpu(inserttai).opcode=A_TBH) or
+                    (taicpu(inserttai).opcode=A_TBB)) then
+                  begin
+                    repeat
+                      inserttai:=tai(inserttai.previous);
+                    until inserttai.typ=ait_instruction;
+                    { if it's an add-based jump table, then also skip the
+                      pc-relative load }
+                    if taicpu(curtai).opcode=A_ADD then
+                      repeat
+                        inserttai:=tai(inserttai.previous);
+                      until inserttai.typ=ait_instruction;
+                  end
+                else
+
                 { on arm thumb, insert the data always after all labels etc. following an instruction so it
                 { on arm thumb, insert the data always after all labels etc. following an instruction so it
                   is prevent that a bxx yyy; bl xxx; yyyy: sequence gets separated ( we never insert on arm thumb after
                   is prevent that a bxx yyy; bl xxx; yyyy: sequence gets separated ( we never insert on arm thumb after
                   bxx) and the distance of bxx gets too long }
                   bxx) and the distance of bxx gets too long }
                 if GenerateThumbCode then
                 if GenerateThumbCode then
-                  while assigned(tai(curtai.Next)) and (tai(curtai.Next).typ in SkipInstr+[ait_label]) do
-                    curtai:=tai(curtai.next);
+                  while assigned(tai(inserttai.Next)) and (tai(inserttai.Next).typ in SkipInstr+[ait_label]) do
+                    inserttai:=tai(inserttai.next);
 
 
                 doinsert:=false;
                 doinsert:=false;
                 current_asmdata.getjumplabel(l);
                 current_asmdata.getjumplabel(l);
@@ -1176,7 +1219,7 @@ implementation
                   is then equal curdata.last.previous) we could over see one
                   is then equal curdata.last.previous) we could over see one
                   instruction }
                   instruction }
                 hp:=tai(curdata.Last);
                 hp:=tai(curdata.Last);
-                list.insertlistafter(curtai,curdata);
+                list.insertlistafter(inserttai,curdata);
                 curtai:=hp;
                 curtai:=hp;
               end
               end
             else
             else
@@ -1572,6 +1615,12 @@ implementation
                      s:=s+' am2 ';
                      s:=s+' am2 ';
                  end
                  end
                else
                else
+                 if (ot and OT_SHIFTEROP)=OT_SHIFTEROP then
+                  begin
+                    s:=s+'shifterop';
+                    addsize:=false;
+                  end
+                else
                  s:=s+'???';
                  s:=s+'???';
                { size }
                { size }
                if addsize then
                if addsize then
@@ -1843,8 +1892,10 @@ implementation
                 begin
                 begin
                   ot:=OT_SHIFTEROP;
                   ot:=OT_SHIFTEROP;
                 end;
                 end;
+              top_conditioncode:
+                ot:=OT_CONDITION;
               else
               else
-                internalerror(200402261);
+                internalerror(2004022623);
             end;
             end;
           end;
           end;
       end;
       end;
@@ -1876,7 +1927,6 @@ implementation
         {siz : array[0..3] of longint;}
         {siz : array[0..3] of longint;}
       begin
       begin
         Matches:=100;
         Matches:=100;
-        writeln(getstring,'---');
 
 
         { Check the opcode and operands }
         { Check the opcode and operands }
         if (p^.opcode<>opcode) or (p^.ops<>ops) then
         if (p^.opcode<>opcode) or (p^.ops<>ops) then
@@ -1918,7 +1968,7 @@ implementation
         { update condition flags
         { update condition flags
           or floating point single }
           or floating point single }
       if (oppostfix=PF_S) and
       if (oppostfix=PF_S) and
-        not(p^.code[0] in [#$04]) then
+        not(p^.code[0] in [#$04..#$0B]) then
         begin
         begin
           Matches:=0;
           Matches:=0;
           exit;
           exit;
@@ -2089,61 +2139,317 @@ implementation
 
 
 
 
     procedure taicpu.gencode(objdata:TObjData);
     procedure taicpu.gencode(objdata:TObjData);
+      const
+        CondVal : array[TAsmCond] of byte=(
+         $E, $0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $A,
+         $B, $C, $D, $E, 0);
       var
       var
         bytes : dword;
         bytes : dword;
         i_field : byte;
         i_field : byte;
+        currsym : TObjSymbol;
+        offset : longint;
 
 
       procedure setshifterop(op : byte);
       procedure setshifterop(op : byte);
+        var
+          r : byte;
+          imm : dword;
         begin
         begin
           case oper[op]^.typ of
           case oper[op]^.typ of
             top_const:
             top_const:
               begin
               begin
                 i_field:=1;
                 i_field:=1;
-                bytes:=bytes or dword(oper[op]^.val and $fff);
+                if oper[op]^.val and $ff=oper[op]^.val then
+                  bytes:=bytes or dword(oper[op]^.val)
+                else
+                  begin
+                    { calc rotate and adjust imm }
+                    r:=0;
+                    imm:=dword(oper[op]^.val);
+                    repeat
+                      imm:=RolDWord(imm, 2);
+                      inc(r)
+                    until imm and $ff=imm;
+                    bytes:=bytes or (r shl 8) or imm;
+                  end;
               end;
               end;
             top_reg:
             top_reg:
               begin
               begin
                 i_field:=0;
                 i_field:=0;
-                bytes:=bytes or (getsupreg(oper[op]^.reg) shl 16);
+                bytes:=bytes or getsupreg(oper[op]^.reg);
 
 
                 { does a real shifter op follow? }
                 { does a real shifter op follow? }
-                if (op+1<=op) and (oper[op+1]^.typ=top_shifterop) then
-                  begin
-                  end;
+                if (op+1<opercnt) and (oper[op+1]^.typ=top_shifterop) then
+                  with oper[op+1]^.shifterop^ do
+                    begin
+                      bytes:=bytes or (shiftimm shl 7);
+                      if shiftmode<>SM_RRX then
+                        bytes:=bytes or (ord(shiftmode) - ord(SM_LSL)) shl 5
+                      else
+                        bytes:=bytes or (3 shl 5);
+                      if getregtype(rs) <> R_INVALIDREGISTER then
+                        begin
+                          bytes:=bytes or (1 shl 4);
+                          bytes:=bytes or (getsupreg(rs) shl 8);
+                        end
+                    end;
               end;
               end;
           else
           else
             internalerror(2005091103);
             internalerror(2005091103);
           end;
           end;
         end;
         end;
 
 
+      function MakeRegList(reglist: tcpuregisterset): word;
+        var
+          i, w: word;
+        begin
+          result:=0;
+          w:=1;
+          for i:=RS_R0 to RS_R15 do
+            begin
+              if i in reglist then
+                result:=result or w;
+              w:=w shl 1
+            end;
+        end;
+
       begin
       begin
         bytes:=$0;
         bytes:=$0;
         i_field:=0;
         i_field:=0;
         { evaluate and set condition code }
         { evaluate and set condition code }
+        bytes:=bytes or (CondVal[condition] shl 28);
 
 
         { condition code allowed? }
         { condition code allowed? }
 
 
         { setup rest of the instruction }
         { setup rest of the instruction }
         case insentry^.code[0] of
         case insentry^.code[0] of
-          #$08:
+          #$01: // B/BL
             begin
             begin
               { set instruction code }
               { set instruction code }
-              bytes:=bytes or (ord(insentry^.code[1]) shl 26);
-              bytes:=bytes or (ord(insentry^.code[2]) shl 21);
-
+              bytes:=bytes or (ord(insentry^.code[1]) shl 24);
+              { set offset }
+              currsym:=objdata.symbolref(oper[0]^.ref^.symbol);
+              if (currsym.bind<>AB_LOCAL) and (currsym.objsection<>objdata.CurrObjSec) then
+                objdata.writereloc(oper[0]^.ref^.offset,0,currsym,RELOC_RELATIVE_24)
+              else
+                bytes:=bytes or (((currsym.offset-insoffset-8) shr 2) and $ffffff);
+            end;
+          #$04..#$07: // SUB
+            begin
+              { set instruction code }
+              bytes:=bytes or (ord(insentry^.code[1]) shl 24);
+              bytes:=bytes or (ord(insentry^.code[2]) shl 16);
+              { set destination }
+              bytes:=bytes or (getsupreg(oper[0]^.reg) shl 12);
+              { set Rn }
+              bytes:=bytes or (getsupreg(oper[1]^.reg) shl 16);
+              { create shifter op }
+              setshifterop(2);
+              { set I field }
+              bytes:=bytes or (i_field shl 25);
+              { set S if necessary }
+              if oppostfix=PF_S then
+                bytes:=bytes or (1 shl 20);
+            end;
+          #$08,#$0A,#$0B: // MOV
+            begin
+              { set instruction code }
+              bytes:=bytes or (ord(insentry^.code[1]) shl 24);
+              bytes:=bytes or (ord(insentry^.code[2]) shl 16);
               { set destination }
               { set destination }
               bytes:=bytes or (getsupreg(oper[0]^.reg) shl 12);
               bytes:=bytes or (getsupreg(oper[0]^.reg) shl 12);
-
               { create shifter op }
               { create shifter op }
               setshifterop(1);
               setshifterop(1);
-
-              { set i field }
+              { set I field }
               bytes:=bytes or (i_field shl 25);
               bytes:=bytes or (i_field shl 25);
-
-              { set s if necessary }
+              { set S if necessary }
               if oppostfix=PF_S then
               if oppostfix=PF_S then
                 bytes:=bytes or (1 shl 20);
                 bytes:=bytes or (1 shl 20);
             end;
             end;
+          #$0C,#$0E,#$0F: // CMP
+            begin
+              { set instruction code }
+              bytes:=bytes or (ord(insentry^.code[1]) shl 24);
+              bytes:=bytes or (ord(insentry^.code[2]) shl 16);
+              { set destination }
+              bytes:=bytes or (getsupreg(oper[0]^.reg) shl 16);
+              { create shifter op }
+              setshifterop(1);
+              { set I field }
+              bytes:=bytes or (i_field shl 25);
+              { always set S bit }
+              bytes:=bytes or (1 shl 20);
+            end;
+          #$14: // MUL/MLA r1,r2,r3
+            begin
+              { set instruction code }
+              bytes:=bytes or ord(insentry^.code[1]) shl 24;
+              bytes:=bytes or ord(insentry^.code[2]) shl 16;
+              bytes:=bytes or ord(insentry^.code[3]);
+              { set regs }
+              bytes:=bytes or getsupreg(oper[0]^.reg) shl 16;
+              bytes:=bytes or getsupreg(oper[1]^.reg);
+              bytes:=bytes or getsupreg(oper[2]^.reg) shl 8;
+            end;
+          #$15: // MUL/MLA r1,r2,r3,r4
+            begin
+              { set instruction code }
+              bytes:=bytes or ord(insentry^.code[1]) shl 24;
+              bytes:=bytes or ord(insentry^.code[2]) shl 16;
+              bytes:=bytes or ord(insentry^.code[3]);
+              { set regs }
+              bytes:=bytes or getsupreg(oper[0]^.reg) shl 16;
+              bytes:=bytes or getsupreg(oper[1]^.reg);
+              bytes:=bytes or getsupreg(oper[2]^.reg) shl 8;
+              bytes:=bytes or getsupreg(oper[3]^.reg) shl 12;
+            end;
+          #$16: // MULL r1,r2,r3,r4
+            begin
+              { set instruction code }
+              bytes:=bytes or ord(insentry^.code[1]) shl 24;
+              bytes:=bytes or ord(insentry^.code[2]) shl 16;
+              bytes:=bytes or ord(insentry^.code[3]);
+              { set regs }
+              bytes:=bytes or getsupreg(oper[0]^.reg) shl 12;
+              bytes:=bytes or getsupreg(oper[1]^.reg) shl 16;
+              bytes:=bytes or getsupreg(oper[2]^.reg);
+              bytes:=bytes or getsupreg(oper[3]^.reg) shl 8;
+            end;
+          #$17: // LDR/STR
+            begin
+              { set instruction code }
+              bytes:=bytes or (ord(insentry^.code[1]) shl 24);
+              bytes:=bytes or (ord(insentry^.code[2]) shl 16);
+              { set Rn and Rd }
+              bytes:=bytes or getsupreg(oper[0]^.reg) shl 12;
+              bytes:=bytes or getsupreg(oper[1]^.ref^.base) shl 16;
+              if getregtype(oper[1]^.ref^.index)=R_INVALIDREGISTER then
+                begin
+                  { set offset }
+                  offset:=0;
+                  currsym:=objdata.symbolref(oper[1]^.ref^.symbol);
+                  if assigned(currsym) then
+                    offset:=currsym.offset-insoffset-8;
+                  offset:=offset+oper[1]^.ref^.offset;
+                  if offset>=0 then
+                    begin
+                      { set U flag }
+                      bytes:=bytes or (1 shl 23);
+                      bytes:=bytes or offset
+                    end
+                  else
+                    begin
+                      offset:=-offset;
+                      bytes:=bytes or offset
+                    end;
+                end
+              else
+                begin
+                  { set U flag }
+                  if oper[1]^.ref^.signindex>0 then
+                    bytes:=bytes or (1 shl 23);
+                  { set I flag }
+                  bytes:=bytes or (1 shl 25);
+                  bytes:=bytes or getsupreg(oper[1]^.ref^.index);
+                  { set shift }
+                  with oper[1]^.ref^ do
+                    if shiftmode<>SM_None then
+                      begin
+                        bytes:=bytes or (shiftimm shl 7);
+                        if shiftmode<>SM_RRX then
+                          bytes:=bytes or (ord(shiftmode) - ord(SM_LSL)) shl 5
+                        else
+                          bytes:=bytes or (3 shl 5);
+                      end
+                end;
+              { set W bit }
+              if oper[1]^.ref^.addressmode=AM_PREINDEXED then
+                bytes:=bytes or (1 shl 21);
+              { set P bit if necessary }
+              if oper[1]^.ref^.addressmode<>AM_POSTINDEXED then
+                bytes:=bytes or (1 shl 24);
+            end;
+          #$22: // LDRH/STRH
+            begin
+              { set instruction code }
+              bytes:=bytes or (ord(insentry^.code[1]) shl 16);
+              bytes:=bytes or ord(insentry^.code[2]);
+              { src/dest register (Rd) }
+              bytes:=bytes or getsupreg(oper[0]^.reg) shl 12;
+              { base register (Rn) }
+              bytes:=bytes or getsupreg(oper[1]^.ref^.base) shl 16;
+              if getregtype(oper[1]^.ref^.index)=R_INVALIDREGISTER then
+                begin
+                  bytes:=bytes or (1 shl 22); // with immediate offset
+                  if oper[1]^.ref^.offset < 0 then
+                    begin
+                      bytes:=bytes or ((-oper[1]^.ref^.offset) and $f0 shl 4);
+                      bytes:=bytes or ((-oper[1]^.ref^.offset) and $f);
+                    end
+                  else
+                    begin
+                      { set U bit }
+                      bytes:=bytes or (1 shl 23);
+                      bytes:=bytes or (oper[1]^.ref^.offset and $f0 shl 4);
+                      bytes:=bytes or (oper[1]^.ref^.offset and $f);
+                    end;
+                end
+              else
+                begin
+                  { set U flag }
+                  bytes:=bytes or (1 shl 23);
+                  bytes:=bytes or getsupreg(oper[1]^.ref^.index);
+                end;
+              { set W bit }
+              if oper[1]^.ref^.addressmode=AM_PREINDEXED then
+                bytes:=bytes or (1 shl 21);
+              { set P bit if necessary }
+              if oper[1]^.ref^.addressmode<>AM_POSTINDEXED then
+                bytes:=bytes or (1 shl 24);
+            end;
+          #$26: // LDM/STM
+            begin
+              { set instruction code }
+              bytes:=bytes or (ord(insentry^.code[1]) shl 20);
+              if oper[0]^.typ=top_ref then
+                begin
+                  { set W bit }
+                  if oper[0]^.ref^.addressmode=AM_PREINDEXED then
+                    bytes:=bytes or (1 shl 21);
+                  { set Rn }
+                  bytes:=bytes or (getsupreg(oper[0]^.ref^.index) shl 16);
+                end
+              else { typ=top_reg }
+                begin
+                  { set Rn }
+                  bytes:=bytes or (getsupreg(oper[0]^.reg) shl 16);
+                end;
+              { reglist }
+              bytes:=bytes or MakeRegList(oper[1]^.regset^);
+              { set P bit }
+              if (opcode=A_LDM) and (oppostfix in [PF_ED,PF_EA,PF_IB,PF_DB])
+              or (opcode=A_STM) and (oppostfix in [PF_FA,PF_FD,PF_IB,PF_DB]) then
+                bytes:=bytes or (1 shl 24);
+              { set U bit }
+              if (opcode=A_LDM) and (oppostfix in [PF_ED,PF_FD,PF_IB,PF_IA])
+              or (opcode=A_STM) and (oppostfix in [PF_FA,PF_EA,PF_IB,PF_IA]) then
+                bytes:=bytes or (1 shl 23);
+            end;
+          #$27: // SWP/SWPB
+            begin
+              { set instruction code }
+              bytes:=bytes or (ord(insentry^.code[1]) shl 20);
+              bytes:=bytes or (ord(insentry^.code[2]) shl 4);
+              { set regs }
+              bytes:=bytes or (getsupreg(oper[0]^.reg) shl 12);
+              bytes:=bytes or getsupreg(oper[1]^.reg);
+              bytes:=bytes or (getsupreg(oper[2]^.ref^.base) shl 16);
+            end;
+          #$03:  // BX
+            begin
+              writeln(objdata.CurrObjSec.fullname);
+              Comment(v_warning,'BX instruction');
+              // TBD
+            end;
           #$ff:
           #$ff:
             internalerror(2005091101);
             internalerror(2005091101);
           else
           else

+ 1 - 2
compiler/arm/agarmgas.pas

@@ -114,8 +114,7 @@ unit agarmgas;
           result:='-march='+cputype_to_gas_march[current_settings.cputype]+' -mthumb -mthumb-interwork '+result
           result:='-march='+cputype_to_gas_march[current_settings.cputype]+' -mthumb -mthumb-interwork '+result
         else if GenerateThumbCode then
         else if GenerateThumbCode then
           result:='-march='+cputype_to_gas_march[current_settings.cputype]+' -mthumb -mthumb-interwork '+result
           result:='-march='+cputype_to_gas_march[current_settings.cputype]+' -mthumb -mthumb-interwork '+result
-        // EDSP instructions in RTL require armv5te at least to not generate error
-        else if current_settings.cputype >= cpu_armv5te then
+        else
           result:='-march='+cputype_to_gas_march[current_settings.cputype]+' '+result;
           result:='-march='+cputype_to_gas_march[current_settings.cputype]+' '+result;
 
 
         if target_info.abi = abi_eabihf then
         if target_info.abi = abi_eabihf then

+ 35 - 7
compiler/arm/aoptcpu.pas

@@ -341,7 +341,14 @@ Implementation
         if Result and
         if Result and
            (Next.typ=ait_instruction) and
            (Next.typ=ait_instruction) and
            (taicpu(Next).opcode in [A_LDR, A_STR]) and
            (taicpu(Next).opcode in [A_LDR, A_STR]) and
-           RefsEqual(taicpu(Next).oper[1]^.ref^,ref) then
+           (
+            ((taicpu(Next).ops = 2) and
+             (taicpu(Next).oper[1]^.typ = top_ref) and
+             RefsEqual(taicpu(Next).oper[1]^.ref^,ref)) or
+            ((taicpu(Next).ops = 3) and { LDRD/STRD }
+             (taicpu(Next).oper[2]^.typ = top_ref) and
+             RefsEqual(taicpu(Next).oper[2]^.ref^,ref))
+           ) then
             {We've found an instruction LDR or STR with the same reference}
             {We've found an instruction LDR or STR with the same reference}
             exit;
             exit;
       until not(Result) or
       until not(Result) or
@@ -507,7 +514,8 @@ Implementation
       hp1 : tai;
       hp1 : tai;
     begin
     begin
       Result:=false;
       Result:=false;
-      if (p.oper[1]^.ref^.addressmode=AM_OFFSET) and
+      if (p.oper[1]^.typ = top_ref) and
+        (p.oper[1]^.ref^.addressmode=AM_OFFSET) and
         (p.oper[1]^.ref^.index=NR_NO) and
         (p.oper[1]^.ref^.index=NR_NO) and
         (p.oper[1]^.ref^.offset=0) and
         (p.oper[1]^.ref^.offset=0) and
         GetNextInstructionUsingReg(p, hp1, p.oper[1]^.ref^.base) and
         GetNextInstructionUsingReg(p, hp1, p.oper[1]^.ref^.base) and
@@ -563,6 +571,7 @@ Implementation
       TmpUsedRegs: TAllUsedRegs;
       TmpUsedRegs: TAllUsedRegs;
       tempop: tasmop;
       tempop: tasmop;
       oldreg: tregister;
       oldreg: tregister;
+      dealloc: tai_regalloc;
 
 
     function IsPowerOf2(const value: DWord): boolean; inline;
     function IsPowerOf2(const value: DWord): boolean; inline;
       begin
       begin
@@ -632,11 +641,13 @@ Implementation
                       str reg1,ref
                       str reg1,ref
                       mov reg2,reg1
                       mov reg2,reg1
                     }
                     }
-                    if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
+                    if (taicpu(p).oper[1]^.typ = top_ref) and
+                       (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
                        (taicpu(p).oppostfix=PF_None) and
                        (taicpu(p).oppostfix=PF_None) and
                        (taicpu(p).condition=C_None) and
                        (taicpu(p).condition=C_None) and
                        GetNextInstructionUsingRef(p,hp1,taicpu(p).oper[1]^.ref^) and
                        GetNextInstructionUsingRef(p,hp1,taicpu(p).oper[1]^.ref^) and
                        MatchInstruction(hp1, A_LDR, [taicpu(p).condition], [PF_None]) and
                        MatchInstruction(hp1, A_LDR, [taicpu(p).condition], [PF_None]) and
+                       (taicpu(hp1).oper[1]^.typ=top_ref) and
                        (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
                        (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
                        not(RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
                        not(RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
                        ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or not (RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.index, p, hp1))) and
                        ((taicpu(hp1).oper[1]^.ref^.index=NR_NO) or not (RegModifiedBetween(taicpu(hp1).oper[1]^.ref^.index, p, hp1))) and
@@ -661,7 +672,7 @@ Implementation
                       str reg1,ref
                       str reg1,ref
                       str reg2,ref
                       str reg2,ref
                       into
                       into
-                      strd reg1,ref
+                      strd reg1,reg2,ref
                     }
                     }
                     else if (GenerateARMCode or GenerateThumb2Code) and
                     else if (GenerateARMCode or GenerateThumb2Code) and
                        (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
                        (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
@@ -682,6 +693,9 @@ Implementation
                       begin
                       begin
                         DebugMsg('Peephole StrStr2Strd done', p);
                         DebugMsg('Peephole StrStr2Strd done', p);
                         taicpu(p).oppostfix:=PF_D;
                         taicpu(p).oppostfix:=PF_D;
+                        taicpu(p).loadref(2,taicpu(p).oper[1]^.ref^);
+                        taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg);
+                        taicpu(p).ops:=3;
                         asml.remove(hp1);
                         asml.remove(hp1);
                         hp1.free;
                         hp1.free;
                         result:=true;
                         result:=true;
@@ -695,7 +709,8 @@ Implementation
                       ldr reg2,ref
                       ldr reg2,ref
                       into ...
                       into ...
                     }
                     }
-                    if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
+                    if (taicpu(p).oper[1]^.typ = top_ref) and
+                       (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
                        GetNextInstruction(p,hp1) and
                        GetNextInstruction(p,hp1) and
                        { ldrd is not allowed here }
                        { ldrd is not allowed here }
                        MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
                        MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
@@ -728,7 +743,7 @@ Implementation
                           end
                           end
                         {
                         {
                            ...
                            ...
-                           ldrd reg1,ref
+                           ldrd reg1,reg1+1,ref
                         }
                         }
                         else if (GenerateARMCode or GenerateThumb2Code) and
                         else if (GenerateARMCode or GenerateThumb2Code) and
                           (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
                           (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
@@ -746,6 +761,9 @@ Implementation
                           AlignedToQWord(taicpu(p).oper[1]^.ref^) then
                           AlignedToQWord(taicpu(p).oper[1]^.ref^) then
                           begin
                           begin
                             DebugMsg('Peephole LdrLdr2Ldrd done', p);
                             DebugMsg('Peephole LdrLdr2Ldrd done', p);
+                            taicpu(p).loadref(2,taicpu(p).oper[1]^.ref^);
+                            taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg);
+                            taicpu(p).ops:=3;
                             taicpu(p).oppostfix:=PF_D;
                             taicpu(p).oppostfix:=PF_D;
                             asml.remove(hp1);
                             asml.remove(hp1);
                             hp1.free;
                             hp1.free;
@@ -1228,6 +1246,7 @@ Implementation
                        (taicpu(p).oppostfix = PF_NONE) and
                        (taicpu(p).oppostfix = PF_NONE) and
                        GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
                        GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
                        MatchInstruction(hp1, [A_LDR, A_STR], [taicpu(p).condition], []) and
                        MatchInstruction(hp1, [A_LDR, A_STR], [taicpu(p).condition], []) and
+                       (taicpu(hp1).oper[1]^.typ = top_ref) and
                        { We can change the base register only when the instruction uses AM_OFFSET }
                        { We can change the base register only when the instruction uses AM_OFFSET }
                        ((taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) or
                        ((taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) or
                          ((taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and
                          ((taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and
@@ -1250,6 +1269,13 @@ Implementation
                         if taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
                         if taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
                           taicpu(hp1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
                           taicpu(hp1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
 
 
+                        dealloc:=FindRegDeAlloc(taicpu(p).oper[1]^.reg, taicpu(p.Next));
+                        if Assigned(dealloc) then
+                          begin
+                            asml.remove(dealloc);
+                            asml.InsertAfter(dealloc,hp1);
+                          end;
+
                         GetNextInstruction(p, hp1);
                         GetNextInstruction(p, hp1);
                         asml.remove(p);
                         asml.remove(p);
                         p.free;
                         p.free;
@@ -1645,6 +1671,7 @@ Implementation
                         while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) and
                         while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) and
                           { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
                           { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
                           MatchInstruction(hp1, [A_LDR, A_STR], [C_None], []) and
                           MatchInstruction(hp1, [A_LDR, A_STR], [C_None], []) and
+                          (taicpu(hp1).oper[1]^.typ = top_ref) and
                           (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
                           (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
                           { don't optimize if the register is stored/overwritten }
                           { don't optimize if the register is stored/overwritten }
                           (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[1]^.reg) and
                           (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[1]^.reg) and
@@ -2418,7 +2445,7 @@ Implementation
     begin
     begin
       result:=true;
       result:=true;
 
 
-      list:=TAsmList.create_without_marker;
+      list:=TAsmList.create;
       p:=BlockStart;
       p:=BlockStart;
       while p<>BlockEnd Do
       while p<>BlockEnd Do
         begin
         begin
@@ -2439,6 +2466,7 @@ Implementation
                ) or
                ) or
                { try to prove that the memory accesses don't overlapp }
                { try to prove that the memory accesses don't overlapp }
                ((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
                ((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
+                (taicpu(p).oper[1]^.typ = top_ref) and
                 (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
                 (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
                 (taicpu(p).oppostfix=PF_None) and
                 (taicpu(p).oppostfix=PF_None) and
                 (taicpu(hp1).oppostfix=PF_None) and
                 (taicpu(hp1).oppostfix=PF_None) and

+ 8 - 5
compiler/arm/aoptcpub.pas

@@ -124,11 +124,14 @@ Implementation
     begin
     begin
       result:=false;
       result:=false;
       for i:=0 to taicpu(p1).ops-1 do
       for i:=0 to taicpu(p1).ops-1 do
-        if (taicpu(p1).oper[i]^.typ=top_reg) and (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
-          begin
-            result:=true;
-            exit;
-          end;
+        case taicpu(p1).oper[i]^.typ of
+          top_reg:
+            if (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
+              exit(true);
+          top_ref:
+            if (taicpu(p1).spilling_get_operation_type_ref(i,Reg)<>operand_read) then
+              exit(true);
+        end;
     end;
     end;
 
 
 End.
 End.

+ 73 - 67
compiler/arm/armins.dat

@@ -97,33 +97,34 @@ void                  void                            none
 reg32,reg32,reg32        \4\x0\xA0                     ARM7
 reg32,reg32,reg32        \4\x0\xA0                     ARM7
 reg32,reg32,reg32,reg32  \5\x0\xA0                     ARM7
 reg32,reg32,reg32,reg32  \5\x0\xA0                     ARM7
 reg32,reg32,reg32,imm    \6\x0\xA0                     ARM7
 reg32,reg32,reg32,imm    \6\x0\xA0                     ARM7
-reg32,reg32,imm          \7\x2\xA0                     ARM7
+reg32,reg32,immshifter   \7\x2\xA0                     ARM7
 
 
 [ADDcc]
 [ADDcc]
-reg32,reg32,reg32        \4\x0\x80                     ARM7
-reg32,reg32,reg32,reg32  \5\x0\x80                     ARM7
-reg32,reg32,reg32,imm    \6\x0\x80                     ARM7
-reg32,reg32,imm          \7\x2\x80                     ARM7
+reg32,reg32,reg32           \4\x0\x80                  ARM7
+reg32,reg32,reg32,reg32     \5\x0\x80                  ARM7
+reg32,reg32,reg32,shifterop \6\x0\x80                  ARM7
+reg32,reg32,immshifter      \7\x2\x80                  ARM7
 
 
 [ADFcc]
 [ADFcc]
 
 
 [ADRcc]
 [ADRcc]
 
 
 [ANDcc]
 [ANDcc]
-reg32,reg32,reg32        \4\x0\x00                     ARM7
-reg32,reg32,reg32,reg32  \5\x0\x00                     ARM7
-reg32,reg32,reg32,imm    \6\x0\x00                     ARM7
-reg32,reg32,imm          \7\x2\x00                     ARM7
+reg32,reg32,reg32           \4\x0\x00                  ARM7
+;reg32,reg32,reg32,reg32    \5\x0\x00                  ARM7
+;reg32,reg32,reg32,imm      \6\x0\x00                  ARM7
+reg32,reg32,reg32,shifterop \6\x0\x00                  ARM7
+reg32,reg32,immshifter      \7\x2\x00                  ARM7
 
 
 [Bcc]
 [Bcc]
 mem32                    \1\x0A                        ARM7
 mem32                    \1\x0A                        ARM7
 imm24                    \1\x0A                        ARM7
 imm24                    \1\x0A                        ARM7
 
 
 [BICcc]
 [BICcc]
-reg32,reg32,reg32        \4\x1\xC0                     ARM7
-reg32,reg32,reg32,reg32  \5\x1\xC0                     ARM7
-reg32,reg32,reg32,imm    \6\x1\xC0                     ARM7
-reg32,reg32,imm          \7\x3\xC0                     ARM7
+;reg32,reg32,reg32        \4\x1\xC0                     ARM7
+;reg32,reg32,reg32,reg32  \5\x1\xC0                     ARM7
+;reg32,reg32,reg32,imm    \6\x1\xC0                     ARM7
+reg32,reg32,immshifter    \7\x3\xC0                     ARM7
 
 
 [BLcc]
 [BLcc]
 mem32                    \1\x0B                        ARM7
 mem32                    \1\x0B                        ARM7
@@ -149,13 +150,13 @@ reg8,reg8           \300\1\x10\101                ARM7
 reg32,reg32              \xC\x1\x60                     ARM7
 reg32,reg32              \xC\x1\x60                     ARM7
 reg32,reg32,reg32        \xD\x1\x60                     ARM7
 reg32,reg32,reg32        \xD\x1\x60                     ARM7
 reg32,reg32,imm          \xE\x1\x60                     ARM7
 reg32,reg32,imm          \xE\x1\x60                     ARM7
-reg32,imm                \xF\x3\x60                     ARM7
+reg32,immshifter         \xF\x1\x60                     ARM7
 
 
 [CMPcc]
 [CMPcc]
 reg32,reg32              \xC\x1\x40                     ARM7
 reg32,reg32              \xC\x1\x40                     ARM7
 reg32,reg32,reg32        \xD\x1\x40                     ARM7
 reg32,reg32,reg32        \xD\x1\x40                     ARM7
-reg32,reg32,imm          \xE\x1\x40                     ARM7
-reg32,imm                \xF\x3\x40                     ARM7
+reg32,reg32,shifterop    \xE\x1\x40                     ARM7
+reg32,immshifter         \xF\x3\x40                     ARM7
 
 
 [CLZcc]
 [CLZcc]
 reg32,reg32              \x27\x01\x01                   ARM7
 reg32,reg32              \x27\x01\x01                   ARM7
@@ -171,10 +172,11 @@ reg32,reg32              \x27\x01\x01                   ARM7
 [DVFcc]
 [DVFcc]
 
 
 [EORcc]
 [EORcc]
-reg32,reg32,reg32        \4\x0\x20                     ARM7
-reg32,reg32,reg32,reg32  \5\x0\x20                     ARM7
-reg32,reg32,reg32,imm    \6\x0\x20                     ARM7
-reg32,reg32,imm          \7\x2\x20                     ARM7
+reg32,reg32,reg32           \4\x0\x20                     ARM7
+;reg32,reg32,reg32,reg32     \5\x0\x20                     ARM7
+;reg32,reg32,reg32,imm       \6\x0\x20                     ARM7
+reg32,reg32,reg32,shifterop \6\x0\x20                     ARM7
+reg32,reg32,immshifter      \7\x2\x20                     ARM7
 
 
 [EXPcc]
 [EXPcc]
 
 
@@ -193,14 +195,15 @@ reg32,reg32         \321\300\1\x11\101            ARM7
 
 
 [LDMcc]
 [LDMcc]
 memam4,reglist		   \x26\x81			ARM7
 memam4,reglist		   \x26\x81			ARM7
+reg32,reglist		   \x26\x81			ARM7
 
 
 [LDRBTcc]
 [LDRBTcc]
 
 
 [LDRBcc]
 [LDRBcc]
-reg32,memam2              \x17\x07\x10                            ARM7
+reg32,memam2              \x17\x04\x50                            ARM7
 
 
 [LDRcc]
 [LDRcc]
-reg32,memam2              \x17\x05\x10                   ARM7
+reg32,memam2              \x17\x04\x10                   ARM7
 ; reg32,imm32              \x17\x05\x10                   ARM7
 ; reg32,imm32              \x17\x05\x10                   ARM7
 ; reg32,reg32              \x18\x04\x10                   ARM7
 ; reg32,reg32              \x18\x04\x10                   ARM7
 ; reg32,reg32,imm32        \x19\x04\x10                   ARM7
 ; reg32,reg32,imm32        \x19\x04\x10                   ARM7
@@ -208,22 +211,24 @@ reg32,memam2              \x17\x05\x10                   ARM7
 ; reg32,reg32,reg32,imm32  \x21\x06\x10                   ARM7
 ; reg32,reg32,reg32,imm32  \x21\x06\x10                   ARM7
 
 
 [LDRHcc]
 [LDRHcc]
-reg32,imm32              \x22\x50\xB0               ARM7
-reg32,reg32              \x23\x50\xB0               ARM7
-reg32,reg32,imm32        \x24\x50\xB0                   ARM7
-reg32,reg32,reg32        \x25\x10\xB0                   ARM7
+reg32,memam2              \x22\x10\xB0               ARM7
+;reg32,imm32              \x22\x50\xB0               ARM7
+;reg32,reg32              \x23\x50\xB0               ARM7
+;reg32,reg32,imm32        \x24\x50\xB0                   ARM7
+;reg32,reg32,reg32        \x25\x10\xB0                   ARM7
 
 
 [LDRSBcc]
 [LDRSBcc]
-reg32,imm32              \x22\x50\xD0               ARM7
+reg32,memam2             \x22\x10\xD0               ARM7
 reg32,reg32              \x23\x50\xD0               ARM7
 reg32,reg32              \x23\x50\xD0               ARM7
 reg32,reg32,imm32        \x24\x50\xD0                   ARM7
 reg32,reg32,imm32        \x24\x50\xD0                   ARM7
 reg32,reg32,reg32        \x25\x10\xD0                   ARM7
 reg32,reg32,reg32        \x25\x10\xD0                   ARM7
 
 
 [LDRSHcc]
 [LDRSHcc]
-reg32,imm32              \x22\x50\xF0               ARM7
-reg32,reg32              \x23\x50\xF0               ARM7
-reg32,reg32,imm32        \x24\x50\xF0                   ARM7
-reg32,reg32,reg32        \x25\x10\xF0                   ARM7
+reg32,memam2              \x22\x10\xF0               ARM7
+;reg32,imm32              \x22\x50\xF0               ARM7
+;reg32,reg32              \x23\x50\xF0               ARM7
+;reg32,reg32,imm32        \x24\x50\xF0                   ARM7
+;reg32,reg32,reg32        \x25\x10\xF0                   ARM7
 
 
 [LDRTcc]
 [LDRTcc]
 
 
@@ -243,11 +248,10 @@ reg32,imm8,fpureg        \xF0\x02\x01                   FPA
 reg32,reg32,reg32,reg32  \x15\x00\x20\x90               ARM7
 reg32,reg32,reg32,reg32  \x15\x00\x20\x90               ARM7
 
 
 [MOVcc]
 [MOVcc]
-; reg32,shifterop              \x8\x0\0xd                   ARM7
-; reg32,immshifter             \x8\x0\0xd                  ARM7
-; reg32,reg32,reg32        \x9\x1\xA0                     ARM7
-; reg32,reg32,imm          \xA\x1\xA0                     ARM7
-; reg32,imm                \xB\x3\xA0                     ARM7
+reg32,shifterop        \x8\x1\xA0                       ARM7
+; reg32,reg32,reg32    \x9\x1\xA0                       ARM7
+reg32,reg32,shifterop  \xA\x1\xA0                       ARM7
+reg32,immshifter       \xB\x1\xA0                       ARM7
 
 
 [MRC]
 [MRC]
 ; reg32,reg32         \321\301\1\x13\110                  ARM7
 ; reg32,reg32         \321\301\1\x13\110                  ARM7
@@ -272,18 +276,18 @@ fpureg,fpureg              \xF2                      FPA
 fpureg,immfpu              \xF2                      FPA
 fpureg,immfpu              \xF2                      FPA
 
 
 [MVNcc]
 [MVNcc]
-; reg32,reg32         \x8\x0\0xf                     ARM7
-; reg32,reg32,reg32   \x9\x1\xE0                     ARM7
-; reg32,reg32,imm     \xA\x1\xE0                     ARM7
-; reg32,imm           \xB\x3\xE0                     ARM7
+reg32,reg32            \x8\x1\xE0                       ARM7
+; reg32,reg32,reg32    \x9\x1\xE0                       ARM7
+reg32,reg32,shifterop  \xA\x1\xE0                       ARM7
+reg32,immshifter       \xB\x1\xE0                       ARM7
 
 
 [NOP]
 [NOP]
 
 
 [ORRcc]
 [ORRcc]
-reg32,reg32,reg32        \4\x1\x80                     ARM7
-reg32,reg32,reg32,reg32  \5\x1\x80                     ARM7
-reg32,reg32,reg32,imm    \6\x1\x80                     ARM7
-reg32,reg32,imm          \7\x3\x80                     ARM7
+reg32,reg32,reg32            \4\x1\x80               ARM7
+reg32,reg32,reg32,reg32      \5\x1\x80               ARM7
+reg32,reg32,reg32,shifterop  \6\x1\x80               ARM7
+reg32,reg32,immshifter       \7\x3\x80               ARM7
 
 
 [RDFcc]
 [RDFcc]
 
 
@@ -296,16 +300,16 @@ reg32,reg32,imm          \7\x3\x80                     ARM7
 [RPWcc]
 [RPWcc]
 
 
 [RSBcc]
 [RSBcc]
-reg32,reg32,reg32        \4\x0\x60                     ARM7
-reg32,reg32,reg32,reg32  \5\x0\x60                     ARM7
-reg32,reg32,reg32,imm    \6\x0\x60                     ARM7
-reg32,reg32,imm          \7\x2\x60                     ARM7
+;reg32,reg32,reg32            \4\x0\x60                    ARM7
+;reg32,reg32,reg32,reg32      \5\x0\x60                    ARM7
+reg32,reg32,reg32,shifterop  \6\x0\x60                     ARM7
+reg32,reg32,immshifter       \7\x0\x60                     ARM7
 
 
 [RSCcc]
 [RSCcc]
 reg32,reg32,reg32        \4\x0\xE0                     ARM7
 reg32,reg32,reg32        \4\x0\xE0                     ARM7
 reg32,reg32,reg32,reg32  \5\x0\xE0                     ARM7
 reg32,reg32,reg32,reg32  \5\x0\xE0                     ARM7
 reg32,reg32,reg32,imm    \6\x0\xE0                     ARM7
 reg32,reg32,reg32,imm    \6\x0\xE0                     ARM7
-reg32,reg32,imm          \7\x2\xE0                     ARM7
+reg32,reg32,immshifter   \7\x2\xE0                     ARM7
 
 
 [RSFcc]
 [RSFcc]
 
 
@@ -317,7 +321,7 @@ reg32,reg32,imm          \7\x2\xE0                     ARM7
 reg32,reg32,reg32        \4\x0\xC0                     ARM7
 reg32,reg32,reg32        \4\x0\xC0                     ARM7
 reg32,reg32,reg32,reg32  \5\x0\xC0                     ARM7
 reg32,reg32,reg32,reg32  \5\x0\xC0                     ARM7
 reg32,reg32,reg32,imm    \6\x0\xC0                     ARM7
 reg32,reg32,reg32,imm    \6\x0\xC0                     ARM7
-reg32,reg32,imm          \7\x2\xC0                     ARM7
+reg32,reg32,immshifter   \7\x2\xC0                     ARM7
 
 
 [SFMcc]
 [SFMcc]
 reg32,imm8,fpureg        \xF0\x02\x00                   FPA
 reg32,imm8,fpureg        \xF0\x02\x00                   FPA
@@ -338,6 +342,7 @@ reg32,reg32,reg32,reg32  \x16\x00\xC0\x90		 ARM7
 
 
 [STMcc]
 [STMcc]
 memam4,reglist		   \x26\x80			ARM7
 memam4,reglist		   \x26\x80			ARM7
+reg32,reglist		   \x26\x80			ARM7
 
 
 [STRcc]
 [STRcc]
 reg32,memam2              \x17\x04\x00                   ARM7
 reg32,memam2              \x17\x04\x00                   ARM7
@@ -348,35 +353,36 @@ reg32,memam2              \x17\x04\x00                   ARM7
 ; reg32,reg32,reg32,imm32  \x21\x06\x00                   ARM7
 ; reg32,reg32,reg32,imm32  \x21\x06\x00                   ARM7
 
 
 [STRBcc]
 [STRBcc]
-reg32,memam2              \x17\x06\x00                           ARM7
+reg32,memam2              \x17\x04\x40                           ARM7
 
 
 [STRBTcc]
 [STRBTcc]
 
 
 ; A dummy since it is parsed as STR{cond}H
 ; A dummy since it is parsed as STR{cond}H
 [STRHcc]
 [STRHcc]
-reg32,imm32              \x22\x40\xB0              ARM7
-reg32,reg32              \x23\x40\xB0               ARM7
-reg32,reg32,imm32        \x24\x40\xB0                   ARM7
-reg32,reg32,reg32        \x25\x00\xB0                   ARM7
+reg32,memam2              \x22\x00\xB0              ARM7
+;reg32,imm32              \x22\x40\xB0              ARM7
+;reg32,reg32              \x23\x40\xB0               ARM7
+;reg32,reg32,imm32        \x24\x40\xB0                   ARM7
+;reg32,reg32,reg32        \x25\x00\xB0                   ARM7
 
 
 [STRTcc]
 [STRTcc]
 
 
 [SUBcc]
 [SUBcc]
-reg32,reg32,shifterop     \4\x0\x40                     ARM7
-reg32,reg32,immshifter    \4\x0\x40                     ARM7
-reg32,reg32,reg32        \4\x0\x40                     ARM7
-; reg32,reg32,reg32,reg32  \5\x0\x40                     ARM7
-; reg32,reg32,reg32,imm    \6\x0\x40                     ARM7
-; reg32,reg32,imm          \7\x2\x40                     ARM7
+reg32,reg32,shifterop       \4\x0\x40                     ARM7
+reg32,reg32,immshifter      \4\x0\x40                     ARM7
+reg32,reg32,reg32           \4\x0\x40                     ARM7
+; reg32,reg32,reg32,reg32     \5\x0\x40                     ARM7
+reg32,reg32,reg32,shifterop \6\x0\x40                     ARM7
+; reg32,reg32,imm           \7\x2\x40                     ARM7
 
 
 [SWIcc]
 [SWIcc]
 imm                 \2\x0F                        ARM7
 imm                 \2\x0F                        ARM7
 
 
 [SWPcc]
 [SWPcc]
-reg32,reg32,reg32   \x27\x01\x90                   ARM7
+reg32,reg32,memam2   \x27\x10\x09                   ARM7
 
 
 [SWPBcc]
 [SWPBcc]
-reg32,reg32,reg32   \x27\x01\x90                   ARM7
+reg32,reg32,reg32    \x27\x14\x09                   ARM7
 
 
 [TANcc]
 [TANcc]
 
 
@@ -387,10 +393,10 @@ reg32,reg32,imm     \xE\x1\x20                     ARM7
 reg32,imm           \xF\x3\x20                     ARM7
 reg32,imm           \xF\x3\x20                     ARM7
 
 
 [TSTcc]
 [TSTcc]
-reg32,reg32         \xC\x1\x00                     ARM7
-reg32,reg32,reg32   \xD\x1\x00                     ARM7
-reg32,reg32,imm     \xE\x1\x00                     ARM7
-reg32,imm           \xF\x3\x00                     ARM7
+reg32,reg32           \xC\x1\x00                     ARM7
+reg32,reg32,reg32     \xD\x1\x00                     ARM7
+reg32,reg32,shifterop \xE\x1\x00                     ARM7
+reg32,immshifter      \xF\x3\x00                     ARM7
 
 
 [UMLALcc]
 [UMLALcc]
 reg32,reg32,reg32,reg32  \x16\x00\xA0\x90		 ARM7
 reg32,reg32,reg32,reg32  \x16\x00\xA0\x90		 ARM7

+ 1 - 1
compiler/arm/armnop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from armins.dat }
 { don't edit, this file is generated from armins.dat }
-105;
+98;

+ 97 - 146
compiler/arm/armtab.inc

@@ -31,7 +31,7 @@
   (
   (
     opcode  : A_ADC;
     opcode  : A_ADC;
     ops     : 3;
     ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_immediate,ot_none);
+    optypes : (ot_reg32,ot_reg32,ot_immediateshifter,ot_none);
     code    : #7#2#160;
     code    : #7#2#160;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
@@ -52,14 +52,14 @@
   (
   (
     opcode  : A_ADD;
     opcode  : A_ADD;
     ops     : 4;
     ops     : 4;
-    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_immediate);
+    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_shifterop);
     code    : #6#0#128;
     code    : #6#0#128;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
   (
   (
     opcode  : A_ADD;
     opcode  : A_ADD;
     ops     : 3;
     ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_immediate,ot_none);
+    optypes : (ot_reg32,ot_reg32,ot_immediateshifter,ot_none);
     code    : #7#2#128;
     code    : #7#2#128;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
@@ -73,21 +73,14 @@
   (
   (
     opcode  : A_AND;
     opcode  : A_AND;
     ops     : 4;
     ops     : 4;
-    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_reg32);
-    code    : #5#0#0;
-    flags   : if_arm7
-  ),
-  (
-    opcode  : A_AND;
-    ops     : 4;
-    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_immediate);
+    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_shifterop);
     code    : #6#0#0;
     code    : #6#0#0;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
   (
   (
     opcode  : A_AND;
     opcode  : A_AND;
     ops     : 3;
     ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_immediate,ot_none);
+    optypes : (ot_reg32,ot_reg32,ot_immediateshifter,ot_none);
     code    : #7#2#0;
     code    : #7#2#0;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
@@ -108,28 +101,7 @@
   (
   (
     opcode  : A_BIC;
     opcode  : A_BIC;
     ops     : 3;
     ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_none);
-    code    : #4#1#192;
-    flags   : if_arm7
-  ),
-  (
-    opcode  : A_BIC;
-    ops     : 4;
-    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_reg32);
-    code    : #5#1#192;
-    flags   : if_arm7
-  ),
-  (
-    opcode  : A_BIC;
-    ops     : 4;
-    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_immediate);
-    code    : #6#1#192;
-    flags   : if_arm7
-  ),
-  (
-    opcode  : A_BIC;
-    ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_immediate,ot_none);
+    optypes : (ot_reg32,ot_reg32,ot_immediateshifter,ot_none);
     code    : #7#3#192;
     code    : #7#3#192;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
@@ -199,8 +171,8 @@
   (
   (
     opcode  : A_CMN;
     opcode  : A_CMN;
     ops     : 2;
     ops     : 2;
-    optypes : (ot_reg32,ot_immediate,ot_none,ot_none);
-    code    : #15#3#96;
+    optypes : (ot_reg32,ot_immediateshifter,ot_none,ot_none);
+    code    : #15#1#96;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
   (
   (
@@ -220,14 +192,14 @@
   (
   (
     opcode  : A_CMP;
     opcode  : A_CMP;
     ops     : 3;
     ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_immediate,ot_none);
+    optypes : (ot_reg32,ot_reg32,ot_shifterop,ot_none);
     code    : #14#1#64;
     code    : #14#1#64;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
   (
   (
     opcode  : A_CMP;
     opcode  : A_CMP;
     ops     : 2;
     ops     : 2;
-    optypes : (ot_reg32,ot_immediate,ot_none,ot_none);
+    optypes : (ot_reg32,ot_immediateshifter,ot_none,ot_none);
     code    : #15#3#64;
     code    : #15#3#64;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
@@ -248,21 +220,14 @@
   (
   (
     opcode  : A_EOR;
     opcode  : A_EOR;
     ops     : 4;
     ops     : 4;
-    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_reg32);
-    code    : #5#0#32;
-    flags   : if_arm7
-  ),
-  (
-    opcode  : A_EOR;
-    ops     : 4;
-    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_immediate);
+    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_shifterop);
     code    : #6#0#32;
     code    : #6#0#32;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
   (
   (
     opcode  : A_EOR;
     opcode  : A_EOR;
     ops     : 3;
     ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_immediate,ot_none);
+    optypes : (ot_reg32,ot_reg32,ot_immediateshifter,ot_none);
     code    : #7#2#32;
     code    : #7#2#32;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
@@ -281,52 +246,38 @@
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
   (
   (
-    opcode  : A_LDRB;
+    opcode  : A_LDM;
     ops     : 2;
     ops     : 2;
-    optypes : (ot_reg32,ot_memoryam2,ot_none,ot_none);
-    code    : #23#7#16;
+    optypes : (ot_reg32,ot_reglist,ot_none,ot_none);
+    code    : #38#129;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
   (
   (
-    opcode  : A_LDR;
+    opcode  : A_LDRB;
     ops     : 2;
     ops     : 2;
     optypes : (ot_reg32,ot_memoryam2,ot_none,ot_none);
     optypes : (ot_reg32,ot_memoryam2,ot_none,ot_none);
-    code    : #23#5#16;
+    code    : #23#4#80;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
   (
   (
-    opcode  : A_LDRH;
+    opcode  : A_LDR;
     ops     : 2;
     ops     : 2;
-    optypes : (ot_reg32,ot_immediate or ot_bits32,ot_none,ot_none);
-    code    : #34#80#176;
+    optypes : (ot_reg32,ot_memoryam2,ot_none,ot_none);
+    code    : #23#4#16;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
   (
   (
     opcode  : A_LDRH;
     opcode  : A_LDRH;
     ops     : 2;
     ops     : 2;
-    optypes : (ot_reg32,ot_reg32,ot_none,ot_none);
-    code    : #35#80#176;
-    flags   : if_arm7
-  ),
-  (
-    opcode  : A_LDRH;
-    ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_immediate or ot_bits32,ot_none);
-    code    : #36#80#176;
-    flags   : if_arm7
-  ),
-  (
-    opcode  : A_LDRH;
-    ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_none);
-    code    : #37#16#176;
+    optypes : (ot_reg32,ot_memoryam2,ot_none,ot_none);
+    code    : #34#16#176;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
   (
   (
     opcode  : A_LDRSB;
     opcode  : A_LDRSB;
     ops     : 2;
     ops     : 2;
-    optypes : (ot_reg32,ot_immediate or ot_bits32,ot_none,ot_none);
-    code    : #34#80#208;
+    optypes : (ot_reg32,ot_memoryam2,ot_none,ot_none);
+    code    : #34#16#208;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
   (
   (
@@ -353,29 +304,8 @@
   (
   (
     opcode  : A_LDRSH;
     opcode  : A_LDRSH;
     ops     : 2;
     ops     : 2;
-    optypes : (ot_reg32,ot_immediate or ot_bits32,ot_none,ot_none);
-    code    : #34#80#240;
-    flags   : if_arm7
-  ),
-  (
-    opcode  : A_LDRSH;
-    ops     : 2;
-    optypes : (ot_reg32,ot_reg32,ot_none,ot_none);
-    code    : #35#80#240;
-    flags   : if_arm7
-  ),
-  (
-    opcode  : A_LDRSH;
-    ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_immediate or ot_bits32,ot_none);
-    code    : #36#80#240;
-    flags   : if_arm7
-  ),
-  (
-    opcode  : A_LDRSH;
-    ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_none);
-    code    : #37#16#240;
+    optypes : (ot_reg32,ot_memoryam2,ot_none,ot_none);
+    code    : #34#16#240;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
   (
   (
@@ -392,6 +322,27 @@
     code    : #21#0#32#144;
     code    : #21#0#32#144;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
+  (
+    opcode  : A_MOV;
+    ops     : 2;
+    optypes : (ot_reg32,ot_shifterop,ot_none,ot_none);
+    code    : #8#1#160;
+    flags   : if_arm7
+  ),
+  (
+    opcode  : A_MOV;
+    ops     : 3;
+    optypes : (ot_reg32,ot_reg32,ot_shifterop,ot_none);
+    code    : #10#1#160;
+    flags   : if_arm7
+  ),
+  (
+    opcode  : A_MOV;
+    ops     : 2;
+    optypes : (ot_reg32,ot_immediateshifter,ot_none,ot_none);
+    code    : #11#1#160;
+    flags   : if_arm7
+  ),
   (
   (
     opcode  : A_MRS;
     opcode  : A_MRS;
     ops     : 2;
     ops     : 2;
@@ -441,6 +392,27 @@
     code    : #242;
     code    : #242;
     flags   : if_fpa
     flags   : if_fpa
   ),
   ),
+  (
+    opcode  : A_MVN;
+    ops     : 2;
+    optypes : (ot_reg32,ot_reg32,ot_none,ot_none);
+    code    : #8#1#224;
+    flags   : if_arm7
+  ),
+  (
+    opcode  : A_MVN;
+    ops     : 3;
+    optypes : (ot_reg32,ot_reg32,ot_shifterop,ot_none);
+    code    : #10#1#224;
+    flags   : if_arm7
+  ),
+  (
+    opcode  : A_MVN;
+    ops     : 2;
+    optypes : (ot_reg32,ot_immediateshifter,ot_none,ot_none);
+    code    : #11#1#224;
+    flags   : if_arm7
+  ),
   (
   (
     opcode  : A_ORR;
     opcode  : A_ORR;
     ops     : 3;
     ops     : 3;
@@ -458,43 +430,29 @@
   (
   (
     opcode  : A_ORR;
     opcode  : A_ORR;
     ops     : 4;
     ops     : 4;
-    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_immediate);
+    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_shifterop);
     code    : #6#1#128;
     code    : #6#1#128;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
   (
   (
     opcode  : A_ORR;
     opcode  : A_ORR;
     ops     : 3;
     ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_immediate,ot_none);
+    optypes : (ot_reg32,ot_reg32,ot_immediateshifter,ot_none);
     code    : #7#3#128;
     code    : #7#3#128;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
-  (
-    opcode  : A_RSB;
-    ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_none);
-    code    : #4#0#96;
-    flags   : if_arm7
-  ),
-  (
-    opcode  : A_RSB;
-    ops     : 4;
-    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_reg32);
-    code    : #5#0#96;
-    flags   : if_arm7
-  ),
   (
   (
     opcode  : A_RSB;
     opcode  : A_RSB;
     ops     : 4;
     ops     : 4;
-    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_immediate);
+    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_shifterop);
     code    : #6#0#96;
     code    : #6#0#96;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
   (
   (
     opcode  : A_RSB;
     opcode  : A_RSB;
     ops     : 3;
     ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_immediate,ot_none);
-    code    : #7#2#96;
+    optypes : (ot_reg32,ot_reg32,ot_immediateshifter,ot_none);
+    code    : #7#0#96;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
   (
   (
@@ -521,7 +479,7 @@
   (
   (
     opcode  : A_RSC;
     opcode  : A_RSC;
     ops     : 3;
     ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_immediate,ot_none);
+    optypes : (ot_reg32,ot_reg32,ot_immediateshifter,ot_none);
     code    : #7#2#224;
     code    : #7#2#224;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
@@ -549,7 +507,7 @@
   (
   (
     opcode  : A_SBC;
     opcode  : A_SBC;
     ops     : 3;
     ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_immediate,ot_none);
+    optypes : (ot_reg32,ot_reg32,ot_immediateshifter,ot_none);
     code    : #7#2#192;
     code    : #7#2#192;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
@@ -582,45 +540,31 @@
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
   (
   (
-    opcode  : A_STR;
+    opcode  : A_STM;
     ops     : 2;
     ops     : 2;
-    optypes : (ot_reg32,ot_memoryam2,ot_none,ot_none);
-    code    : #23#4#0;
+    optypes : (ot_reg32,ot_reglist,ot_none,ot_none);
+    code    : #38#128;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
   (
   (
-    opcode  : A_STRB;
+    opcode  : A_STR;
     ops     : 2;
     ops     : 2;
     optypes : (ot_reg32,ot_memoryam2,ot_none,ot_none);
     optypes : (ot_reg32,ot_memoryam2,ot_none,ot_none);
-    code    : #23#6#0;
+    code    : #23#4#0;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
   (
   (
-    opcode  : A_STRH;
+    opcode  : A_STRB;
     ops     : 2;
     ops     : 2;
-    optypes : (ot_reg32,ot_immediate or ot_bits32,ot_none,ot_none);
-    code    : #34#64#176;
+    optypes : (ot_reg32,ot_memoryam2,ot_none,ot_none);
+    code    : #23#4#64;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
   (
   (
     opcode  : A_STRH;
     opcode  : A_STRH;
     ops     : 2;
     ops     : 2;
-    optypes : (ot_reg32,ot_reg32,ot_none,ot_none);
-    code    : #35#64#176;
-    flags   : if_arm7
-  ),
-  (
-    opcode  : A_STRH;
-    ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_immediate or ot_bits32,ot_none);
-    code    : #36#64#176;
-    flags   : if_arm7
-  ),
-  (
-    opcode  : A_STRH;
-    ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_none);
-    code    : #37#0#176;
+    optypes : (ot_reg32,ot_memoryam2,ot_none,ot_none);
+    code    : #34#0#176;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
   (
   (
@@ -644,6 +588,13 @@
     code    : #4#0#64;
     code    : #4#0#64;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
+  (
+    opcode  : A_SUB;
+    ops     : 4;
+    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_shifterop);
+    code    : #6#0#64;
+    flags   : if_arm7
+  ),
   (
   (
     opcode  : A_SWI;
     opcode  : A_SWI;
     ops     : 1;
     ops     : 1;
@@ -654,15 +605,15 @@
   (
   (
     opcode  : A_SWP;
     opcode  : A_SWP;
     ops     : 3;
     ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_none);
-    code    : #39#1#144;
+    optypes : (ot_reg32,ot_reg32,ot_memoryam2,ot_none);
+    code    : #39#16#9;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
   (
   (
     opcode  : A_SWPB;
     opcode  : A_SWPB;
     ops     : 3;
     ops     : 3;
     optypes : (ot_reg32,ot_reg32,ot_reg32,ot_none);
     optypes : (ot_reg32,ot_reg32,ot_reg32,ot_none);
-    code    : #39#1#144;
+    code    : #39#20#9;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
   (
   (
@@ -710,14 +661,14 @@
   (
   (
     opcode  : A_TST;
     opcode  : A_TST;
     ops     : 3;
     ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_immediate,ot_none);
+    optypes : (ot_reg32,ot_reg32,ot_shifterop,ot_none);
     code    : #14#1#0;
     code    : #14#1#0;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),
   (
   (
     opcode  : A_TST;
     opcode  : A_TST;
     ops     : 2;
     ops     : 2;
-    optypes : (ot_reg32,ot_immediate,ot_none,ot_none);
+    optypes : (ot_reg32,ot_immediateshifter,ot_none,ot_none);
     code    : #15#3#0;
     code    : #15#3#0;
     flags   : if_arm7
     flags   : if_arm7
   ),
   ),

+ 47 - 15
compiler/arm/cgcpu.pas

@@ -71,6 +71,8 @@ unit cgcpu;
 
 
         procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
         procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
 
 
+        procedure g_profilecode(list : TAsmList); override;
+
         procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
         procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
         procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
         procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
         procedure g_maybe_got_init(list : TAsmList); override;
         procedure g_maybe_got_init(list : TAsmList); override;
@@ -100,7 +102,7 @@ unit cgcpu;
 
 
         procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle); override;
         procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle); override;
         { Transform unsupported methods into Internal errors }
         { Transform unsupported methods into Internal errors }
-        procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: TCGSize; src, dst: TRegister); override;
+        procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); override;
 
 
         { try to generate optimized 32 Bit multiplication, returns true if successful generated }
         { try to generate optimized 32 Bit multiplication, returns true if successful generated }
         function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
         function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
@@ -634,7 +636,9 @@ unit cgcpu;
         sym : TAsmSymbol;
         sym : TAsmSymbol;
       begin
       begin
         { check not really correct: should only be used for non-Thumb cpus }
         { check not really correct: should only be used for non-Thumb cpus }
-        if CPUARM_HAS_BLX_LABEL in cpu_capabilities[current_settings.cputype] then
+        if (CPUARM_HAS_BLX_LABEL in cpu_capabilities[current_settings.cputype]) and
+          { WinCE GNU AS (not sure if this applies in general) does not support BLX imm }
+          (target_info.system<>system_arm_wince) then
           branchopcode:=A_BLX
           branchopcode:=A_BLX
         else
         else
           branchopcode:=A_BL;
           branchopcode:=A_BL;
@@ -1698,7 +1702,7 @@ unit cgcpu;
       end;
       end;
 
 
 
 
-    procedure tbasecgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: TCGSize; src, dst: TRegister);
+    procedure tbasecgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister);
       begin
       begin
         if reverse then
         if reverse then
           begin
           begin
@@ -1792,6 +1796,16 @@ unit cgcpu;
         list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
         list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
       end;
       end;
 
 
+    procedure tbasecgarm.g_profilecode(list : TAsmList);
+      begin
+        if target_info.system = system_arm_linux then
+          begin
+            list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R14]));
+            a_call_name(list,'__gnu_mcount_nc',false);
+          end
+        else
+          internalerror(2014091201);
+      end;
 
 
     procedure tbasecgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
     procedure tbasecgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
       var
       var
@@ -3885,18 +3899,36 @@ unit cgcpu;
         tmpreg : TRegister;
         tmpreg : TRegister;
       begin
       begin
         href:=ref;
         href:=ref;
-        if (op in [A_STR,A_STRB,A_STRH]) and
-           (abs(ref.offset)>124) then
-          begin
-            tmpreg:=getintregister(list,OS_ADDR);
-            a_loadaddr_ref_reg(list,ref,tmpreg);
-
-            reference_reset_base(href,tmpreg,0,ref.alignment);
-          end
-        else if (op=A_LDR) and
-           (oppostfix in [PF_None]) and
-           (ref.base<>NR_STACK_POINTER_REG)  and
-           (abs(ref.offset)>124) then
+        if { LDR/STR limitations }
+           (
+            (((op=A_LDR) and (oppostfix=PF_None)) or
+             ((op=A_STR) and (oppostfix=PF_None))) and
+            (ref.base<>NR_STACK_POINTER_REG) and
+            (abs(ref.offset)>124)
+           ) or
+           { LDRB/STRB limitations }
+           (
+           (((op=A_LDR) and (oppostfix=PF_B)) or
+            ((op=A_LDRB) and (oppostfix=PF_None)) or
+            ((op=A_STR) and (oppostfix=PF_B)) or
+            ((op=A_STRB) and (oppostfix=PF_None))) and
+            ((ref.base=NR_STACK_POINTER_REG) or
+             (ref.index=NR_STACK_POINTER_REG) or
+             (abs(ref.offset)>31)
+            )
+           ) or
+           { LDRH/STRH limitations }
+           (
+            (((op=A_LDR) and (oppostfix=PF_H)) or
+             ((op=A_LDRH) and (oppostfix=PF_None)) or
+             ((op=A_STR) and (oppostfix=PF_H)) or
+             ((op=A_STRH) and (oppostfix=PF_None))) and
+            ((ref.base=NR_STACK_POINTER_REG) or
+             (ref.index=NR_STACK_POINTER_REG) or
+             (abs(ref.offset)>62) or
+             ((abs(ref.offset) mod 2)<>0)
+            )
+           ) then
           begin
           begin
             tmpreg:=getintregister(list,OS_ADDR);
             tmpreg:=getintregister(list,OS_ADDR);
             a_loadaddr_ref_reg(list,ref,tmpreg);
             a_loadaddr_ref_reg(list,ref,tmpreg);

+ 43 - 1
compiler/arm/cpuelf.pas

@@ -561,6 +561,7 @@ implementation
     rotation:longint;
     rotation:longint;
     residual,g_n:longword;
     residual,g_n:longword;
     curloc: aword;
     curloc: aword;
+    bit_S,bit_I1,bit_I2: aint;
   begin
   begin
     data:=objsec.data;
     data:=objsec.data;
     for i:=0 to objsec.ObjRelocations.Count-1 do
     for i:=0 to objsec.ObjRelocations.Count-1 do
@@ -660,7 +661,16 @@ implementation
                   2) when target is unresolved weak symbol, CALL must be changed to NOP,
                   2) when target is unresolved weak symbol, CALL must be changed to NOP,
                   while JUMP24 behavior is unspecified. }
                   while JUMP24 behavior is unspecified. }
                 tmp:=sarlongint((address and $00FFFFFF) shl 8,6);
                 tmp:=sarlongint((address and $00FFFFFF) shl 8,6);
-                tmp:=tmp+relocval-curloc;
+                tmp:=tmp+relocval;
+                if odd(tmp) then    { dest is Thumb? }
+                  begin
+                    if (reltyp=R_ARM_CALL) then
+                      { change BL to BLX, dest bit 1 goes to instruction bit 24 }
+                      address:=(address and $FE000000) or (((tmp-curloc) and 2) shl 23) or $10000000
+                    else
+                      InternalError(2014092001);
+                  end;
+                tmp:=tmp-curloc;
                 // TODO: check overflow
                 // TODO: check overflow
                 address:=(address and $FF000000) or ((tmp and $3FFFFFE) shr 2);
                 address:=(address and $FF000000) or ((tmp and $3FFFFFE) shr 2);
               end;
               end;
@@ -829,6 +839,38 @@ implementation
                   address:=address or (1 shl 23);
                   address:=address or (1 shl 23);
               end;
               end;
 
 
+            R_ARM_THM_CALL:
+              begin
+                if (not ElfTarget.relocs_use_addend) then
+                  begin
+                    address:=((address and $ffff) shl 16) or word(address shr 16);
+                    bit_S:=(address shr 26) and 1;
+                    bit_I1:=(bit_S xor ((address shr 13) and 1)) xor 1;
+                    bit_I2:=(bit_S xor ((address shr 11) and 1)) xor 1;
+                    tmp:=((-bit_S) shl 24) or (bit_I1 shl 23) or (bit_I2 shl 22) or (((address shr 16) and $3ff) shl 12) or ((address and $7ff) shl 1);
+                  end
+                else  { TODO: must read the instruction anyway }
+                  tmp:=address;
+                tmp:=tmp+relocval;       { dest address }
+                if odd(tmp) then         { if it's Thumb code, change possible BLX to BL }
+                  address:=address or $1800;
+                tmp:=tmp-curloc;         { now take PC-relative }
+                { TODO: overflow check, different limit for Thumb and Thumb-2 }
+
+                { now encode this mess back }
+                if (address and $5000)=$4000 then
+                  tmp:=(tmp+2) and (not 3);
+
+                bit_S:=(tmp shr 31) and 1;
+                address:=(address and $F800D000) or
+                  (bit_S shl 26) or
+                  (((tmp shr 12) and $3ff) shl 16) or
+                  ((tmp shr 1) and $7FF) or
+                  ((((tmp shr 23) and 1) xor 1 xor bit_S) shl 13) or
+                  ((((tmp shr 22) and 1) xor 1 xor bit_S) shl 11);
+                address:=((address and $ffff) shl 16) or word(address shr 16);
+              end;
+
             R_ARM_TLS_IE32:
             R_ARM_TLS_IE32:
               begin
               begin
                 relocval:=relocval-tlsseg.mempos+align_aword(TCB_SIZE,tlsseg.align);
                 relocval:=relocval-tlsseg.mempos+align_aword(TCB_SIZE,tlsseg.align);

+ 6 - 1
compiler/arm/cpuinfo.pas

@@ -21,6 +21,9 @@ Interface
 
 
 Type
 Type
    bestreal = double;
    bestreal = double;
+{$if FPC_FULLVERSION>20700}
+   bestrealrec = TDoubleRec;
+{$endif FPC_FULLVERSION>20700}
    ts32real = single;
    ts32real = single;
    ts64real = double;
    ts64real = double;
    ts80real = type extended;
    ts80real = type extended;
@@ -338,8 +341,10 @@ Type
       ct_thumb2bare
       ct_thumb2bare
      );
      );
 
 
-
 Const
 Const
+   { Is there support for dealing with multiple microcontrollers available }
+   { for this platform? }
+   ControllerSupport = true;
    {# Size of native extended floating point type }
    {# Size of native extended floating point type }
    extended_size = 12;
    extended_size = 12;
    {# Size of a multimedia register               }
    {# Size of a multimedia register               }

+ 58 - 10
compiler/arm/hlcgcpu.pas

@@ -34,8 +34,15 @@ interface
     hlcg2ll;
     hlcg2ll;
 
 
   type
   type
-    thlcgcpu = class(thlcg2ll)
-     procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);override;
+    tbasehlcgarm = class(thlcg2ll)
+      procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);override;
+    end;
+
+    tarmhlcgcpu = class(tbasehlcgarm)
+    end;
+
+    tthumbhlcgcpu = class(tbasehlcgarm)
+      procedure g_external_wrapper(list : TAsmList; procdef : tprocdef; const externalname : string); override;
     end;
     end;
 
 
   procedure create_hlcodegen;
   procedure create_hlcodegen;
@@ -43,14 +50,14 @@ interface
 implementation
 implementation
 
 
   uses
   uses
-    globtype,verbose,
+    globals,globtype,verbose,
     procinfo,fmodule,
     procinfo,fmodule,
     symconst,
     symconst,
-    aasmbase,aasmtai,aasmcpu,
+    aasmbase,aasmtai,aasmcpu, cpuinfo,
     hlcgobj,
     hlcgobj,
     cgbase, cgutils, cpubase, cgobj, cgcpu;
     cgbase, cgutils, cpubase, cgobj, cgcpu;
 
 
-  procedure thlcgcpu.g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);
+  procedure tbasehlcgarm.g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);
 
 
     procedure loadvmttor12;
     procedure loadvmttor12;
       var
       var
@@ -93,6 +100,7 @@ implementation
           cg.a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_R12);
           cg.a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_R12);
       end;
       end;
 
 
+
     procedure op_onr12methodaddr;
     procedure op_onr12methodaddr;
       var
       var
         tmpref,
         tmpref,
@@ -107,6 +115,7 @@ implementation
             if (href.offset in [0..124]) and ((href.offset mod 4)=0) then
             if (href.offset in [0..124]) and ((href.offset mod 4)=0) then
               begin
               begin
                 list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R0]));
                 list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R0]));
+                list.concat(taicpu.op_reg_reg(A_MOV,NR_R0,NR_R12));
                 cg.a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_R0);
                 cg.a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_R0);
                 list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_R0));
                 list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_R0));
                 list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R0]));
                 list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R0]));
@@ -124,20 +133,24 @@ implementation
                 tmpref.symbol:=l;
                 tmpref.symbol:=l;
                 tmpref.base:=NR_PC;
                 tmpref.base:=NR_PC;
                 list.concat(taicpu.op_reg_ref(A_LDR,NR_R1,tmpref));
                 list.concat(taicpu.op_reg_ref(A_LDR,NR_R1,tmpref));
+                list.concat(taicpu.op_reg_reg(A_MOV,NR_R0,NR_R12));
                 href.offset:=0;
                 href.offset:=0;
+                href.base:=NR_R0;
                 href.index:=NR_R1;
                 href.index:=NR_R1;
                 cg.a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_R0);
                 cg.a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_R0);
                 list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_R0));
                 list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_R0));
                 list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R0,RS_R1]));
                 list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R0,RS_R1]));
               end;
               end;
-            list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R12));
           end
           end
         else
         else
           begin
           begin
             reference_reset_base(href,voidpointertype,NR_R12,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),sizeof(pint));
             reference_reset_base(href,voidpointertype,NR_R12,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),sizeof(pint));
             cg.a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_R12);
             cg.a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_R12);
-            list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R12));
           end;
           end;
+        if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
+          list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R12))
+        else
+          list.concat(taicpu.op_reg(A_BX,NR_R12));
       end;
       end;
 
 
     var
     var
@@ -154,6 +167,9 @@ implementation
       if procdef.owner.symtabletype<>ObjectSymtable then
       if procdef.owner.symtabletype<>ObjectSymtable then
         Internalerror(200109191);
         Internalerror(200109191);
 
 
+        if GenerateThumbCode or GenerateThumb2Code then
+          list.concat(tai_thumb_func.create);
+
       make_global:=false;
       make_global:=false;
       if (not current_module.is_unit) or
       if (not current_module.is_unit) or
          create_smartlink or
          create_smartlink or
@@ -199,7 +215,7 @@ implementation
           cg.a_load_ref_reg(list,OS_ADDR,OS_ADDR,tmpref,NR_R0);
           cg.a_load_ref_reg(list,OS_ADDR,OS_ADDR,tmpref,NR_R0);
           list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_R0));
           list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_R0));
           list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R0]));
           list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R0]));
-          list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R12));
+          list.concat(taicpu.op_reg(A_BX,NR_R12));
         end
         end
       else
       else
         list.concat(taicpu.op_sym(A_B,current_asmdata.RefAsmSymbol(procdef.mangledname)));
         list.concat(taicpu.op_sym(A_B,current_asmdata.RefAsmSymbol(procdef.mangledname)));
@@ -212,12 +228,44 @@ implementation
     end;
     end;
 
 
 
 
+  { tthumbhlcgcpu }
+
+  procedure tthumbhlcgcpu.g_external_wrapper(list: TAsmList; procdef: tprocdef; const externalname: string);
+    var
+      tmpref : treference;
+      l : tasmlabel;
+    begin
+      { there is no branch instruction on thumb which allows big distances and which leaves LR as it is
+        and which allows to switch the instruction set }
+
+      { create const entry }
+      reference_reset(tmpref,4);
+      current_asmdata.getjumplabel(l);
+      tmpref.symbol:=l;
+      tmpref.base:=NR_PC;
+      list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R0]));
+      list.concat(taicpu.op_reg_ref(A_LDR,NR_R0,tmpref));
+      list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_R0));
+      list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R0]));
+      list.concat(taicpu.op_reg(A_BX,NR_R12));
+
+      { append const entry }
+      list.Concat(tai_align.Create(4));
+      list.Concat(tai_label.create(l));
+      list.concat(tai_const.Create_sym(current_asmdata.RefAsmSymbol(externalname)));
+    end;
+
+
+
   procedure create_hlcodegen;
   procedure create_hlcodegen;
     begin
     begin
-      hlcg:=thlcgcpu.create;
+      if GenerateThumbCode then
+        hlcg:=tthumbhlcgcpu.create
+      else
+        hlcg:=tarmhlcgcpu.create;
       create_codegen;
       create_codegen;
     end;
     end;
 
 
 begin
 begin
-  chlcgobj:=thlcgcpu;
+  chlcgobj:=tbasehlcgarm;
 end.
 end.

+ 12 - 2
compiler/arm/narmadd.pas

@@ -85,6 +85,8 @@ interface
                       GetResFlags:=F_LT;
                       GetResFlags:=F_LT;
                     gten:
                     gten:
                       GetResFlags:=F_LE;
                       GetResFlags:=F_LE;
+                    else
+                      internalerror(201408203);
                   end
                   end
                 else
                 else
                   case NodeType of
                   case NodeType of
@@ -96,6 +98,8 @@ interface
                       GetResFlags:=F_GT;
                       GetResFlags:=F_GT;
                     gten:
                     gten:
                       GetResFlags:=F_GE;
                       GetResFlags:=F_GE;
+                    else
+                      internalerror(201408204);
                   end;
                   end;
               end
               end
             else
             else
@@ -110,6 +114,8 @@ interface
                       GetResFlags:=F_CC;
                       GetResFlags:=F_CC;
                     gten:
                     gten:
                       GetResFlags:=F_LS;
                       GetResFlags:=F_LS;
+                    else
+                      internalerror(201408205);
                   end
                   end
                 else
                 else
                   case NodeType of
                   case NodeType of
@@ -121,6 +127,8 @@ interface
                       GetResFlags:=F_HI;
                       GetResFlags:=F_HI;
                     gten:
                     gten:
                       GetResFlags:=F_CS;
                       GetResFlags:=F_CS;
+                    else
+                      internalerror(201408206);
                   end;
                   end;
               end;
               end;
         end;
         end;
@@ -144,6 +152,8 @@ interface
             result:=F_GT;
             result:=F_GT;
           gten:
           gten:
             result:=F_GE;
             result:=F_GE;
+          else
+            internalerror(201408207);
         end;
         end;
       end;
       end;
 
 
@@ -379,13 +389,13 @@ interface
               tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
               tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
               if right.location.loc = LOC_CONSTANT then
               if right.location.loc = LOC_CONSTANT then
                 begin
                 begin
-                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_const(A_AND,tmpreg,left.location.register,right.location.value));
+                  cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_AND,OS_32,right.location.value,left.location.register,tmpreg);
                   cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                   cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                   current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,tmpreg,right.location.value));
                   current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,tmpreg,right.location.value));
                 end
                 end
               else
               else
                 begin
                 begin
-                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_AND,tmpreg,left.location.register,right.location.register));
+                  cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList,OP_AND,OS_32,left.location.register,right.location.register,tmpreg);
                   cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                   cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                   current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,tmpreg,right.location.register));
                   current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,tmpreg,right.location.register));
                 end;
                 end;

+ 11 - 4
compiler/arm/narmset.pas

@@ -141,6 +141,7 @@ implementation
     procedure tarmcasenode.genjumptable(hp : pcaselabel;min_,max_ : aint);
     procedure tarmcasenode.genjumptable(hp : pcaselabel;min_,max_ : aint);
       var
       var
         last : TConstExprInt;
         last : TConstExprInt;
+        tmpreg,
         basereg,
         basereg,
         indexreg : tregister;
         indexreg : tregister;
         href : treference;
         href : treference;
@@ -222,7 +223,7 @@ implementation
           begin
           begin
             if cs_create_pic in current_settings.moduleswitches then
             if cs_create_pic in current_settings.moduleswitches then
               internalerror(2013082102);
               internalerror(2013082102);
-            cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SUB,OS_ADDR,min_+1,indexreg,indexreg);
+            cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SUB,OS_ADDR,min_,indexreg,indexreg);
             current_asmdata.getaddrlabel(tablelabel);
             current_asmdata.getaddrlabel(tablelabel);
 
 
             cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHL,OS_ADDR,2,indexreg);
             cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHL,OS_ADDR,2,indexreg);
@@ -231,9 +232,15 @@ implementation
             reference_reset_symbol(href,tablelabel,0,4);
             reference_reset_symbol(href,tablelabel,0,4);
             cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList, href, basereg);
             cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList, href, basereg);
 
 
-            cg.a_op_reg_reg(current_asmdata.CurrAsmList, OP_ADD, OS_ADDr, indexreg, basereg);
-
-            current_asmdata.CurrAsmList.Concat(taicpu.op_reg(A_BX, basereg));
+            reference_reset(href,0);
+            href.base:=basereg;
+            href.index:=indexreg;
+            
+            tmpreg:=cg.getintregister(current_asmdata.CurrAsmList, OS_ADDR);
+            cg.a_load_ref_reg(current_asmdata.CurrAsmList, OS_ADDR, OS_ADDR, href, tmpreg);
+            
+            { do not use BX here to avoid switching into arm mode }
+            current_asmdata.CurrAsmList.Concat(taicpu.op_reg_reg(A_MOV, NR_PC, tmpreg));
 
 
             cg.a_label(current_asmdata.CurrAsmList,tablelabel);
             cg.a_label(current_asmdata.CurrAsmList,tablelabel);
             { generate jump table }
             { generate jump table }

+ 2 - 1
compiler/arm/raarmgas.pas

@@ -1043,6 +1043,7 @@ Unit raarmgas;
         hreg : tregister;
         hreg : tregister;
         flags : tspecialregflags;
         flags : tspecialregflags;
       begin
       begin
+        hreg:=NR_NO;
         case actasmtoken of
         case actasmtoken of
           AS_REGISTER:
           AS_REGISTER:
             begin
             begin
@@ -1219,7 +1220,7 @@ Unit raarmgas;
                   end;
                   end;
               end;
               end;
           end;
           end;
-        maxlen:=max(length(hs),5);
+        maxlen:=min(length(hs),5);
         actopcode:=A_NONE;
         actopcode:=A_NONE;
         for j:=maxlen downto 1 do
         for j:=maxlen downto 1 do
           begin
           begin

+ 10 - 0
compiler/arm/rgcpu.pas

@@ -308,6 +308,11 @@ unit rgcpu;
                    (get_alias(getsupreg(oper[0]^.reg))=orgreg) and
                    (get_alias(getsupreg(oper[0]^.reg))=orgreg) and
                    (get_alias(getsupreg(oper[1]^.reg))<>orgreg) then
                    (get_alias(getsupreg(oper[1]^.reg))<>orgreg) then
                   begin
                   begin
+                    { do not replace if we're on Thumb, ldr/str cannot be used with rX>r7 }
+                    if GenerateThumbCode and
+                       (getsupreg(oper[1]^.reg)>RS_R7) then
+                       exit;
+
                     { str expects the register in oper[0] }
                     { str expects the register in oper[0] }
                     instr.loadreg(0,oper[1]^.reg);
                     instr.loadreg(0,oper[1]^.reg);
                     instr.loadref(1,spilltemp);
                     instr.loadref(1,spilltemp);
@@ -318,6 +323,11 @@ unit rgcpu;
                    (get_alias(getsupreg(oper[1]^.reg))=orgreg) and
                    (get_alias(getsupreg(oper[1]^.reg))=orgreg) and
                    (get_alias(getsupreg(oper[0]^.reg))<>orgreg) then
                    (get_alias(getsupreg(oper[0]^.reg))<>orgreg) then
                   begin
                   begin
+                    { do not replace if we're on Thumb, ldr/str cannot be used with rX>r7 }
+                    if GenerateThumbCode and
+                       (getsupreg(oper[0]^.reg)>RS_R7) then
+                       exit;
+
                     instr.loadref(1,spilltemp);
                     instr.loadref(1,spilltemp);
                     opcode:=A_LDR;
                     opcode:=A_LDR;
                     result:=true;
                     result:=true;

+ 4 - 2
compiler/assemble.pas

@@ -87,6 +87,7 @@ interface
         function single2str(d : single) : string; virtual;
         function single2str(d : single) : string; virtual;
         function double2str(d : double) : string; virtual;
         function double2str(d : double) : string; virtual;
         function extended2str(e : extended) : string; virtual;
         function extended2str(e : extended) : string; virtual;
+        Function DoPipe:boolean;
       public
       public
         {# Returns the complete path and executable name of the assembler
         {# Returns the complete path and executable name of the assembler
            program.
            program.
@@ -309,7 +310,7 @@ Implementation
       end;
       end;
 
 
 
 
-    Function DoPipe:boolean;
+    Function TExternalAssembler.DoPipe:boolean;
       begin
       begin
         DoPipe:=(cs_asm_pipe in current_settings.globalswitches) and
         DoPipe:=(cs_asm_pipe in current_settings.globalswitches) and
                 (([cs_asm_extern,cs_asm_leave,cs_link_on_target] * current_settings.globalswitches) = []) and
                 (([cs_asm_extern,cs_asm_leave,cs_link_on_target] * current_settings.globalswitches) = []) and
@@ -1854,7 +1855,8 @@ Implementation
         if not(tf_section_threadvars in target_info.flags) then
         if not(tf_section_threadvars in target_info.flags) then
           exclude(to_do,al_threadvars);
           exclude(to_do,al_threadvars);
         for i:=low(TasmlistType) to high(TasmlistType) do
         for i:=low(TasmlistType) to high(TasmlistType) do
-          if (i in to_do) and (current_asmdata.asmlists[i]<>nil) then
+          if (i in to_do) and (current_asmdata.asmlists[i]<>nil) and
+             (not current_asmdata.asmlists[i].empty) then
             addlist(current_asmdata.asmlists[i]);
             addlist(current_asmdata.asmlists[i]);
 
 
         if SmartAsm then
         if SmartAsm then

+ 5 - 5
compiler/avr/aasmcpu.pas

@@ -246,11 +246,11 @@ implementation
       begin
       begin
         result:=operand_read;
         result:=operand_read;
         case opcode of
         case opcode of
-          A_CLR,
-          A_MOV, A_MOVW:
-           if opnr=0 then
-             result:=operand_write;
-          A_CP,A_CPC,A_CPI,A_PUSH :
+          A_CLR,A_LDD,A_LD,A_LDI,A_LDS,
+          A_MOV,A_MOVW:
+            if opnr=0 then
+              result:=operand_write;
+          A_CP,A_CPC,A_CPI,A_PUSH,A_ST,A_STD,A_STS:
             ;
             ;
           else
           else
             begin
             begin

+ 4 - 6
compiler/avr/agavrgas.pas

@@ -81,6 +81,7 @@ unit agavrgas;
         var
         var
           s : string;
           s : string;
         begin
         begin
+           s:='';
            with ref do
            with ref do
             begin
             begin
   {$ifdef extdebug}
   {$ifdef extdebug}
@@ -95,9 +96,8 @@ unit agavrgas;
               else if base<>NR_NO then
               else if base<>NR_NO then
                 begin
                 begin
                   if addressmode=AM_PREDRECEMENT then
                   if addressmode=AM_PREDRECEMENT then
-                    s:='-'
-                  else
-                    s:='';
+                    s:='-';
+
                   case base of
                   case base of
                     NR_R26:
                     NR_R26:
                       s:=s+'X';
                       s:=s+'X';
@@ -119,9 +119,7 @@ unit agavrgas;
               else if assigned(symbol) or (offset<>0) then
               else if assigned(symbol) or (offset<>0) then
                 begin
                 begin
                   if assigned(symbol) then
                   if assigned(symbol) then
-                    s:=ReplaceForbiddenAsmSymbolChars(symbol.name)
-                  else
-                     s:='';
+                    s:=ReplaceForbiddenAsmSymbolChars(symbol.name);
 
 
                   if offset<0 then
                   if offset<0 then
                     s:=s+tostr(offset)
                     s:=s+tostr(offset)

+ 6 - 0
compiler/avr/cpuinfo.pas

@@ -21,6 +21,9 @@ Interface
 
 
 Type
 Type
    bestreal = double;
    bestreal = double;
+{$if FPC_FULLVERSION>20700}
+   bestrealrec = TDoubleRec;
+{$endif FPC_FULLVERSION>20700}
    ts32real = single;
    ts32real = single;
    ts64real = double;
    ts64real = double;
    ts80real = type extended;
    ts80real = type extended;
@@ -61,6 +64,9 @@ Type
      );
      );
 
 
 Const
 Const
+   { Is there support for dealing with multiple microcontrollers available }
+   { for this platform? }
+   ControllerSupport = true;
    {# Size of native extended floating point type }
    {# Size of native extended floating point type }
    extended_size = 12;
    extended_size = 12;
    {# Size of a multimedia register               }
    {# Size of a multimedia register               }

+ 4 - 4
compiler/avr/cpupara.pas

@@ -201,13 +201,13 @@ unit cpupara;
         begin
         begin
           { In case of po_delphi_nested_cc, the parent frame pointer
           { In case of po_delphi_nested_cc, the parent frame pointer
             is always passed on the stack. }
             is always passed on the stack. }
-           if (nextintreg>RS_R8) and
+           if (nextintreg>RS_R7) and
               (not(vo_is_parentfp in hp.varoptions) or
               (not(vo_is_parentfp in hp.varoptions) or
                not(po_delphi_nested_cc in p.procoptions)) then
                not(po_delphi_nested_cc in p.procoptions)) then
              begin
              begin
                paraloc^.loc:=LOC_REGISTER;
                paraloc^.loc:=LOC_REGISTER;
                paraloc^.register:=newreg(R_INTREGISTER,nextintreg,R_SUBWHOLE);
                paraloc^.register:=newreg(R_INTREGISTER,nextintreg,R_SUBWHOLE);
-               inc(nextintreg);
+               dec(nextintreg);
              end
              end
            else
            else
              begin
              begin
@@ -251,8 +251,8 @@ unit cpupara;
               begin
               begin
                 paradef:=getpointerdef(paradef);
                 paradef:=getpointerdef(paradef);
                 loc:=LOC_REGISTER;
                 loc:=LOC_REGISTER;
-                paracgsize := OS_ADDR;
-                paralen := tcgsize2size[OS_ADDR];
+                paracgsize:=OS_ADDR;
+                paralen:=tcgsize2size[OS_ADDR];
               end
               end
             else
             else
               begin
               begin

+ 8 - 0
compiler/avr/navradd.pas

@@ -77,6 +77,8 @@ interface
                       GetResFlags:=F_LT;
                       GetResFlags:=F_LT;
                     gten:
                     gten:
                       GetResFlags:=F_NotPossible;
                       GetResFlags:=F_NotPossible;
+                    else
+                      internalerror(2014082020);
                   end
                   end
                 else
                 else
                   case NodeType of
                   case NodeType of
@@ -88,6 +90,8 @@ interface
                       GetResFlags:=F_NotPossible;
                       GetResFlags:=F_NotPossible;
                     gten:
                     gten:
                       GetResFlags:=F_GE;
                       GetResFlags:=F_GE;
+                    else
+                      internalerror(2014082021);
                   end;
                   end;
               end
               end
             else
             else
@@ -102,6 +106,8 @@ interface
                       GetResFlags:=F_CC;
                       GetResFlags:=F_CC;
                     gten:
                     gten:
                       GetResFlags:=F_NotPossible;
                       GetResFlags:=F_NotPossible;
+                    else
+                      internalerror(2014082022);
                   end
                   end
                 else
                 else
                   case NodeType of
                   case NodeType of
@@ -113,6 +119,8 @@ interface
                       GetResFlags:=F_NotPossible;
                       GetResFlags:=F_NotPossible;
                     gten:
                     gten:
                       GetResFlags:=F_CS;
                       GetResFlags:=F_CS;
+                    else
+                      internalerror(2014082023);
                   end;
                   end;
               end;
               end;
         end;
         end;

+ 385 - 0
compiler/blockutl.pas

@@ -0,0 +1,385 @@
+{
+    Copyright (c) 2014 by Jonas Maebe, Member of the Free Pascal
+    development team.
+
+    This unit implements helper routines for "blocks" support
+    (http://en.wikipedia.org/wiki/Blocks_(C_language_extension) )
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit blockutl;
+
+{$i fpcdefs.inc}
+
+interface
+
+  uses
+    node,nld,ncnv,
+    symtype,symdef;
+
+  { accepts a loadnode for a procdef
+
+    returns a node representing the converted code to implement this
+    conversion (this node is valid both for typed constant declarations and
+    in function bodies). The node is not reused }
+  function generate_block_for_procaddr(procloadnode: tloadnode): tnode;
+
+  { for a procdef, return a recorddef representing a block literal for this
+    procdef
+
+    for a procvardef, return a basic recorddef representing a block literal
+    with enough info to call this procvardef }
+  function get_block_literal_type_for_proc(pd: tabstractprocdef): trecorddef;
+
+implementation
+
+  uses
+    verbose,globtype,globals,cutils,constexp,
+    pass_1,pparautl,fmodule,
+    aasmdata,
+    nbas,ncon,nmem,nutils,
+    symbase,symconst,symtable,symsym,symcreat,objcutil,objcdef,defutil,
+    paramgr;
+
+
+  function get_block_literal_type_for_proc(pd: tabstractprocdef): trecorddef;
+    begin
+      if pd.typ=procvardef then
+        result:=trecorddef(search_named_unit_globaltype('BLOCKRTL','FPC_BLOCK_LITERAL_BASE',true).typedef)
+      else if pd.is_addressonly then
+        result:=trecorddef(search_named_unit_globaltype('BLOCKRTL','FPC_BLOCK_LITERAL_STATIC',true).typedef)
+      { todo: nested functions and Objective-C methods }
+      else if not is_nested_pd(pd) and
+              not is_objcclass(tdef(pd.owner.defowner)) then
+        result:=trecorddef(search_named_unit_globaltype('BLOCKRTL','FPC_BLOCK_LITERAL_COMPLEX_PROCVAR',true).typedef)
+      else
+        internalerror(2014071304);
+    end;
+
+
+  function get_block_literal_isa(orgpd: tprocdef): tstaticvarsym;
+    var
+      srsym: tsym;
+      srsymtable: tsymtable;
+      name: tidstring;
+    begin
+      if orgpd.is_addressonly then
+        name:='_NSCONCRETEGLOBALBLOCK'
+      else
+        name:='_NSCONCRETESTACKBLOCK';
+      if not searchsym_in_named_module('BLOCKRTL',name,srsym,srsymtable) or
+         (srsym.typ<>staticvarsym) then
+        internalerror(2014071501);
+      result:=tstaticvarsym(srsym);
+    end;
+
+
+  function get_block_literal_flags(orgpd, invokepd: tprocdef): longint;
+    { BlockLiteralFlags }
+    const
+      BLOCK_HAS_COPY_DISPOSE    = 1 shl 25;
+      BLOCK_HAS_CXX_OBJ         = 1 shl 26;
+      BLOCK_IS_GLOBAL           = 1 shl 28;
+      BLOCK_USE_STRET           = 1 shl 29;
+      BLOCK_HAS_SIGNATURE       = 1 shl 30;
+      BLOCK_HAS_EXTENDED_LAYOUT = 1 shl 31;
+    begin
+      result:=0;
+      { BLOCK_HAS_COPY_DISPOSE :
+          copy/dispose will be necessary once we support nested procedures, in
+          case they capture reference counted types, Objective-C class instances
+          or block-type variables
+      }
+
+      { BLOCK_HAS_CXX_OBJ:
+          we don't support C++ (stack-based) class instances yet
+      }
+
+      { BLOCK_IS_GLOBAL:
+          set in case the block does not capture any local state; used for
+          global functions and in theory also possible for nested functions that
+          do not access any variables from their parentfp context
+      }
+      if orgpd.is_addressonly then
+        result:=result or BLOCK_IS_GLOBAL;
+
+      { BLOCK_USE_STRET:
+          set in case the invoke function returns its result via a hidden
+          parameter
+      }
+      if paramanager.ret_in_param(invokepd.returndef,orgpd) then
+        result:=result or BLOCK_USE_STRET;
+      { BLOCK_HAS_SIGNATURE:
+          only if this bit is set, the above bit will actually be taken into
+          account (for backward compatibility). We need it so that our invoke
+          function isn't called as a variadic function, but on the downside this
+          requires Mac OS X 10.7 or later
+      }
+      result:=result or BLOCK_HAS_SIGNATURE;
+      { BLOCK_HAS_EXTENDED_LAYOUT:
+          no documentation about what this means or what it's good for (clang
+          adds it for Objective-C 1 platforms in case garbage collection is
+          switched off, but then you also have to actually generate this layout)
+      }
+    end;
+
+
+  function get_block_literal_descriptor(invokepd: tprocdef; block_literal_size: tcgint): tstaticvarsym;
+    var
+      descriptordef: tdef;
+      descriptor: tstaticvarsym;
+      name: tsymstr;
+      srsym: tsym;
+      srsymtable: tsymtable;
+    begin
+      (*
+        FPC_Block_descriptor_simple = record
+          reserved: culong;
+          Block_size: culong;
+          { signatures are only for the "ABI.2010.3.16" version, but that's all
+            we support because otherwise the callback has to be a C-style
+            variadic function, which we cannot (yet?) generate }
+          signature: pchar;
+        end;
+      *)
+
+      { must be a valid Pascal identifier, because we will reference it when
+        constructing the block initialiser }
+      { we don't have to include the moduleid in this mangledname, because
+        the invokepd is a local procedure in the current unit -> defid by
+        itself is unique }
+      name:='__FPC_BLOCK_DESCRIPTOR_SIMPLE_'+tostr(invokepd.defid);
+      { already exists -> return }
+      if searchsym(name,srsym,srsymtable) then
+        begin
+          if srsym.typ<>staticvarsym then
+            internalerror(2014071402);
+          result:=tstaticvarsym(srsym);
+          exit;
+        end;
+      { find the type of the descriptor structure }
+      descriptordef:=search_named_unit_globaltype('BLOCKRTL','FPC_BLOCK_DESCRIPTOR_SIMPLE',true).typedef;
+      { create new static variable }
+      descriptor:=cstaticvarsym.create(name,vs_value,descriptordef,[]);
+      symtablestack.top.insert(descriptor);
+      include(descriptor.symoptions,sp_internal);
+      { create typed constant for the descriptor }
+      str_parse_typedconst(current_asmdata.AsmLists[al_const],
+        '(reserved: 0; Block_size: '+tostr(block_literal_size)+
+        '; signature: '''+objcencodemethod(invokepd)+''');',descriptor);
+      result:=descriptor;
+    end;
+
+
+  { creates a wrapper function for pd with the C calling convention and an
+    extra first parameter pointing to the block "self" pointer. This wrapper is
+    what will be assigned to the "invoke" field of the block }
+  function get_invoke_wrapper(orgpd: tprocdef; orgpv: tprocvardef): tprocdef;
+    var
+      wrappername: TIDString;
+      srsym: tsym;
+      srsymtable: tsymtable;
+    begin
+      { the copy() is to ensure we don't overflow the maximum identifier length;
+        the combination of owner.moduleid and defid will make the name unique }
+      wrappername:='__FPC_BLOCK_INVOKE_'+upper(copy(orgpd.procsym.realname,1,60))+'_'+tostr(orgpd.owner.moduleid)+'_'+tostr(orgpd.defid);
+      { already an invoke wrapper for this procsym -> reuse }
+      if searchsym(wrappername,srsym,srsymtable) then
+        begin
+          if (srsym.typ<>procsym) or
+             (tprocsym(srsym).procdeflist.count<>1) then
+            internalerror(2014071503);
+          result:=tprocdef(tprocsym(srsym).procdeflist[0]);
+          exit;
+        end;
+      { bare copy, so that self etc are not inserted }
+      result:=tprocdef(orgpd.getcopyas(procdef,pc_bareproc));
+      { will be called accoding to the ABI conventions }
+      result.proccalloption:=pocall_cdecl;
+      { add po_is_block so that a block "self" pointer gets added (of the type
+        returned by get_block_literal_type_for_proc()) }
+      include(result.procoptions,po_is_block);
+      { now insert self/vmt/funcret according to the newly set calling
+        convention }
+      insert_self_and_vmt_para(result);
+      insert_funcret_para(result);
+      finish_copied_procdef(result,wrappername,current_module.localsymtable,nil);
+      if orgpd.is_addressonly then
+        begin
+          result.synthetickind:=tsk_callthrough;
+          result.skpara:=orgpd;
+        end
+      else
+        begin
+          { alias for the type to invoke the procvar, used in the symcreat
+            handling of tsk_block_invoke_procvar }
+          result.localst.insert(ctypesym.create('__FPC_BLOCK_INVOKE_PV_TYPE',orgpv));
+          result.synthetickind:=tsk_block_invoke_procvar;
+        end;
+    end;
+
+
+  { compose a block literal for a static block (one without context) }
+  function get_global_proc_literal_sym(blockliteraldef: tdef; blockisasym: tstaticvarsym; blockflags: longint; invokepd: tprocdef; descriptor: tstaticvarsym): tstaticvarsym;
+    var
+      literalname: TIDString;
+      srsym: tsym;
+      srsymtable: tsymtable;
+    begin
+      literalname:='block_literal_for_'+invokepd.procsym.realname;
+      { already exists -> return }
+      if searchsym(literalname,srsym,srsymtable) then
+        begin
+          if srsym.typ<>staticvarsym then
+            internalerror(2014071506);
+          result:=tstaticvarsym(srsym);
+          exit;
+        end;
+      { create new block literal symbol }
+      result:=cstaticvarsym.create(
+        '$'+literalname,
+        vs_value,
+        blockliteraldef,[]);
+      include(result.symoptions,sp_internal);
+      symtablestack.top.insert(result);
+      { initialise it }
+      str_parse_typedconst(current_asmdata.AsmLists[al_const],
+        '(base: (isa        : @'+blockisasym.realname+
+              '; flags     : '+tostr(blockflags)+
+              '; reserved  : 0'+
+              '; invoke    : @'+invokepd.procsym.realname+
+              '; descriptor: @'+descriptor.realname+
+              '));',
+        result);
+    end;
+
+
+  { compose an on-stack block literal for a "procedure of object" }
+  function get_pascal_method_literal(blockliteraldef: tdef; blockisasym: tstaticvarsym; blockflags: longint; procvarnode: tnode; invokepd: tprocdef; orgpv: tprocvardef; descriptor: tstaticvarsym): tnode;
+    var
+      statement: tstatementnode;
+      literaltemp: ttempcreatenode;
+    begin
+      result:=internalstatements(statement);
+      { create new block literal structure }
+      literaltemp:=ctempcreatenode.create(blockliteraldef,blockliteraldef.size,tt_persistent,false);
+      addstatement(statement,literaltemp);
+      { temp.base.isa:=@blockisasym }
+      addstatement(statement,cassignmentnode.create(
+        genloadfield(genloadfield(ctemprefnode.create(literaltemp),'BASE'),'ISA'),
+        caddrnode.create(cloadnode.create(blockisasym,blockisasym.owner))));
+      { temp.base.flags:=blockflags }
+      addstatement(statement,cassignmentnode.create(
+        genloadfield(genloadfield(ctemprefnode.create(literaltemp),'BASE'),'FLAGS'),
+        genintconstnode(blockflags)));
+      { temp.base.reserved:=0 }
+      addstatement(statement,cassignmentnode.create(
+        genloadfield(genloadfield(ctemprefnode.create(literaltemp),'BASE'),'RESERVED'),
+        genintconstnode(0)));
+      { temp.base.invoke:=tmethod(@invokepd) }
+      addstatement(statement,cassignmentnode.create(
+        genloadfield(genloadfield(ctemprefnode.create(literaltemp),'BASE'),'INVOKE'),
+        ctypeconvnode.create_proc_to_procvar(
+          cloadnode.create_procvar(invokepd.procsym,invokepd,invokepd.owner))));
+      { temp.base.descriptor:=@descriptor }
+      addstatement(statement,cassignmentnode.create(
+        genloadfield(genloadfield(ctemprefnode.create(literaltemp),'BASE'),'DESCRIPTOR'),
+        caddrnode.create(cloadnode.create(descriptor,descriptor.owner))));
+      { temp.pv:=tmethod(@orgpd) }
+      addstatement(statement,cassignmentnode.create(
+        ctypeconvnode.create_explicit(genloadfield(ctemprefnode.create(literaltemp),'PV'),orgpv),
+          procvarnode.getcopy));
+      { and return the address of the temp }
+      addstatement(statement,caddrnode.create(ctemprefnode.create(literaltemp)));
+      { typecheck this now, because the current source may be written in TP/
+        Delphi/MacPas mode and the above node tree has been constructed for
+        ObjFPC mode, which has been set by replace_scanner (in Delphi, the
+        assignment to invoke would be without the proc_to_procvar conversion) }
+      typecheckpass(result);
+    end;
+
+
+  function generate_block_for_procaddr(procloadnode: tloadnode): tnode;
+    var
+      procvarnode: tnode;
+      { procvardef representing the original function we want to invoke }
+      orgpv: tprocvardef;
+      { procdef of the original function we want to invoke }
+      orgpd,
+      { procdef for the invoke-wrapper that we generated to call the original
+        function via a procvar }
+      invokepd: tprocdef;
+      blockliteraldef: tdef;
+      descriptor,
+      blockisasym,
+      blockliteralsym: tstaticvarsym;
+      blockflags: longint;
+      old_symtablestack: tsymtablestack;
+      sstate: tscannerstate;
+    begin
+      result:=nil;
+      { supported? (should be caught earlier) }
+      if (procloadnode.resultdef.typ<>procdef) or
+         is_nested_pd(tprocdef(procloadnode.resultdef)) or
+         is_objcclass(tdef(procloadnode.resultdef.owner.defowner)) then
+        internalerror(2014071401);
+
+      { add every symbol that we create here to the unit-level symbol table }
+      old_symtablestack:=symtablestack;
+      symtablestack:=old_symtablestack.getcopyuntil(current_module.localsymtable);
+      { save scanner state }
+      replace_scanner('block literal creation',sstate);
+
+      { def representing the original function }
+      orgpd:=tprocdef(procloadnode.resultdef);
+      { def representing the corresponding procvar type }
+      procvarnode:=ctypeconvnode.create_proc_to_procvar(procloadnode.getcopy);
+      typecheckpass(procvarnode);
+      orgpv:=tprocvardef(procvarnode.resultdef);
+      { get blockdef for this kind of procdef }
+      blockliteraldef:=get_block_literal_type_for_proc(orgpd);
+      { get the invoke wrapper }
+      invokepd:=get_invoke_wrapper(orgpd,orgpv);
+      { get the descriptor }
+      descriptor:=get_block_literal_descriptor(invokepd,blockliteraldef.size);
+      { get the ISA pointer for the literal }
+      blockisasym:=get_block_literal_isa(orgpd);
+      { get the flags for the block }
+      blockflags:=get_block_literal_flags(orgpd,invokepd);
+      { global/simple procedure -> block literal is a typed constant }
+      if orgpd.is_addressonly then
+        begin
+          blockliteralsym:=get_global_proc_literal_sym(blockliteraldef,blockisasym,blockflags,invokepd,descriptor);
+          { result: address of the block literal }
+          result:=caddrnode.create(cloadnode.create(blockliteralsym,blockliteralsym.owner));
+        end
+      else
+        begin
+          result:=get_pascal_method_literal(blockliteraldef,blockisasym,blockflags,procvarnode,invokepd,orgpv,descriptor)
+        end;
+
+      procvarnode.free;
+
+      { restore scanner }
+      restore_scanner(sstate);
+      { restore symtable stack }
+      symtablestack.free;
+      symtablestack:=old_symtablestack;
+    end;
+
+end.
+

+ 3 - 0
compiler/cfileutl.pas

@@ -23,7 +23,9 @@ unit cfileutl;
 
 
 {$i fpcdefs.inc}
 {$i fpcdefs.inc}
 
 
+{$ifndef DragonFly}
 {$define usedircache}
 {$define usedircache}
+{$endif DragonFly}
 
 
 interface
 interface
 
 
@@ -124,6 +126,7 @@ interface
     function  GetShortName(const n:TCmdStr):TCmdStr;
     function  GetShortName(const n:TCmdStr):TCmdStr;
     function maybequoted(const s:string):string;
     function maybequoted(const s:string):string;
     function maybequoted(const s:ansistring):ansistring;
     function maybequoted(const s:ansistring):ansistring;
+    function maybequoted_for_script(const s:ansistring; quote_script: tscripttype):ansistring;
 
 
     procedure InitFileUtils;
     procedure InitFileUtils;
     procedure DoneFileUtils;
     procedure DoneFileUtils;

+ 13 - 0
compiler/cgbase.pas

@@ -101,6 +101,12 @@ interface
          ,addr_dgroup      // the data segment group
          ,addr_dgroup      // the data segment group
          ,addr_seg         // used for getting the segment of an object, e.g. 'mov ax, SEG symbol'
          ,addr_seg         // used for getting the segment of an object, e.g. 'mov ax, SEG symbol'
          {$ENDIF}
          {$ENDIF}
+         {$IFDEF AARCH64}
+         ,addr_page
+         ,addr_pageoffset
+         ,addr_gotpage
+         ,addr_gotpageoffset
+         {$ENDIF AARCH64}
          );
          );
 
 
 
 
@@ -331,6 +337,13 @@ interface
           OS_M8,OS_M16,OS_M32,OS_M64,OS_M128,OS_M256,OS_M8,OS_M16,OS_M32,
           OS_M8,OS_M16,OS_M32,OS_M64,OS_M128,OS_M256,OS_M8,OS_M16,OS_M32,
           OS_M64,OS_M128,OS_M256);
           OS_M64,OS_M128,OS_M256);
 
 
+       tcgsize2signed : array[tcgsize] of tcgsize = (OS_NO,
+          OS_S8,OS_S16,OS_S32,OS_S64,OS_S128,OS_S8,OS_S16,OS_S32,OS_S64,OS_S128,
+          OS_F32,OS_F64,OS_F80,OS_C64,OS_F128,
+          OS_M8,OS_M16,OS_M32,OS_M64,OS_M128,OS_M256,OS_M8,OS_M16,OS_M32,
+          OS_M64,OS_M128,OS_M256);
+
+
        tcgloc2str : array[TCGLoc] of string[12] = (
        tcgloc2str : array[TCGLoc] of string[12] = (
             'LOC_INVALID',
             'LOC_INVALID',
             'LOC_VOID',
             'LOC_VOID',

+ 2 - 2
compiler/cghlcpu.pas

@@ -65,14 +65,14 @@ uses
       procedure a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel); override;
       procedure a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel); override;
       procedure a_call_reg(list: TAsmList; reg: tregister); override;
       procedure a_call_reg(list: TAsmList; reg: tregister); override;
       procedure a_call_name(list: TAsmList; const s: string; weak: boolean); override;
       procedure a_call_name(list: TAsmList; const s: string; weak: boolean); override;
-      procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tcgsize; src, dst: TRegister); override;
+      procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); override;
     end;
     end;
 
 
 implementation
 implementation
 
 
    { thlbasecgcpu }
    { thlbasecgcpu }
 
 
-    procedure thlbasecgcpu.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tcgsize; src, dst: TRegister);
+    procedure thlbasecgcpu.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister);
       begin
       begin
         internalerror(2012042801);
         internalerror(2012042801);
       end;
       end;

+ 29 - 10
compiler/cgobj.pas

@@ -248,7 +248,7 @@ unit cgobj;
           procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);virtual; abstract;
           procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);virtual; abstract;
 
 
           { bit scan instructions }
           { bit scan instructions }
-          procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tcgsize; src, dst: TRegister); virtual;
+          procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); virtual;
 
 
           { Multiplication with doubling result size.
           { Multiplication with doubling result size.
             dstlo or dsthi may be NR_NO, in which case corresponding half of result is discarded. }
             dstlo or dsthi may be NR_NO, in which case corresponding half of result is discarded. }
@@ -785,8 +785,15 @@ implementation
 
 
 
 
     procedure tcg.translate_register(var reg : tregister);
     procedure tcg.translate_register(var reg : tregister);
+      var
+        rt: tregistertype;
       begin
       begin
-        rg[getregtype(reg)].translate_register(reg);
+        { Getting here without assigned rg is possible for an "assembler nostackframe"
+          function returning x87 float, compiler tries to translate NR_ST which is used for
+          result.  }
+        rt:=getregtype(reg);
+        if assigned(rg[rt]) then
+          rg[rt].translate_register(reg);
       end;
       end;
 
 
 
 
@@ -798,7 +805,8 @@ implementation
 
 
     procedure tcg.a_reg_dealloc(list : TAsmList;r : tregister);
     procedure tcg.a_reg_dealloc(list : TAsmList;r : tregister);
       begin
       begin
-         list.concat(tai_regalloc.dealloc(r,nil));
+        if (r<>NR_NO) then
+          list.concat(tai_regalloc.dealloc(r,nil));
       end;
       end;
 
 
 
 
@@ -827,7 +835,14 @@ implementation
          ref : treference;
          ref : treference;
          tmpreg : tregister;
          tmpreg : tregister;
       begin
       begin
-         cgpara.check_simple_location;
+         if assigned(cgpara.location^.next) then
+           begin
+             tg.gethltemp(list,cgpara.def,cgpara.def.size,tt_persistent,ref);
+             a_load_reg_ref(list,size,size,r,ref);
+             a_load_ref_cgpara(list,size,ref,cgpara);
+             tg.ungettemp(list,ref);
+             exit;
+           end;
          paramanager.alloccgpara(list,cgpara);
          paramanager.alloccgpara(list,cgpara);
          if cgpara.location^.shiftval<0 then
          if cgpara.location^.shiftval<0 then
            begin
            begin
@@ -1295,8 +1310,10 @@ implementation
                       dec(tmpref.offset)
                       dec(tmpref.offset)
                     else
                     else
                       inc(tmpref.offset);
                       inc(tmpref.offset);
-                    a_load_ref_reg(list,OS_8,OS_16,tmpref,register);
-                    a_op_reg_reg(list,OP_OR,OS_16,tmpreg,register);
+                    tmpreg2:=makeregsize(list,register,OS_16);
+                    a_load_ref_reg(list,OS_8,OS_16,tmpref,tmpreg2);
+                    a_op_reg_reg(list,OP_OR,OS_16,tmpreg,tmpreg2);
+                    a_load_reg_reg(list,OS_16,tosize,tmpreg2,register);
                   end;
                   end;
               OS_32,OS_S32:
               OS_32,OS_S32:
                 if ref.alignment=2 then
                 if ref.alignment=2 then
@@ -1310,8 +1327,10 @@ implementation
                       dec(tmpref.offset,2)
                       dec(tmpref.offset,2)
                     else
                     else
                       inc(tmpref.offset,2);
                       inc(tmpref.offset,2);
-                    a_load_ref_reg(list,OS_16,OS_32,tmpref,register);
-                    a_op_reg_reg(list,OP_OR,OS_32,tmpreg,register);
+                    tmpreg2:=makeregsize(list,register,OS_32);
+                    a_load_ref_reg(list,OS_16,OS_32,tmpref,tmpreg2);
+                    a_op_reg_reg(list,OP_OR,OS_32,tmpreg,tmpreg2);
+                    a_load_reg_reg(list,OS_32,tosize,tmpreg2,register);
                   end
                   end
                 else
                 else
                   begin
                   begin
@@ -1330,7 +1349,7 @@ implementation
                         a_load_ref_reg(list,OS_8,OS_32,tmpref,tmpreg2);
                         a_load_ref_reg(list,OS_8,OS_32,tmpref,tmpreg2);
                         a_op_reg_reg(list,OP_OR,OS_32,tmpreg2,tmpreg);
                         a_op_reg_reg(list,OP_OR,OS_32,tmpreg2,tmpreg);
                       end;
                       end;
-                    a_load_reg_reg(list,OS_32,OS_32,tmpreg,register);
+                    a_load_reg_reg(list,OS_32,tosize,tmpreg,register);
                   end
                   end
               else
               else
                 a_load_ref_reg(list,fromsize,tosize,tmpref,register);
                 a_load_ref_reg(list,fromsize,tosize,tmpref,register);
@@ -2454,7 +2473,7 @@ implementation
       end;
       end;
 
 
 
 
-    procedure tcg.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tcgsize; src, dst: TRegister);
+    procedure tcg.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister);
       begin
       begin
         internalerror(2014070601);
         internalerror(2014070601);
       end;
       end;

+ 6 - 0
compiler/cgutils.pas

@@ -63,6 +63,12 @@ unit cgutils;
          addressmode : taddressmode;
          addressmode : taddressmode;
          shiftmode   : tshiftmode;
          shiftmode   : tshiftmode;
 {$endif arm}
 {$endif arm}
+{$ifdef aarch64}
+         symboldata  : tlinkedlistitem;
+         shiftimm    : byte;
+         addressmode : taddressmode;
+         shiftmode   : tshiftmode;
+{$endif aarch64}
 {$ifdef avr}
 {$ifdef avr}
          addressmode : taddressmode;
          addressmode : taddressmode;
 {$endif avr}
 {$endif avr}

+ 1 - 0
compiler/compinnr.inc

@@ -88,6 +88,7 @@ const
    in_unbox_x_y         = 78; { manage platforms: extract from class instance }
    in_unbox_x_y         = 78; { manage platforms: extract from class instance }
    in_popcnt_x          = 79;
    in_popcnt_x          = 79;
    in_aligned_x         = 80;
    in_aligned_x         = 80;
+   in_setstring_x_y_z   = 81;
 
 
 { Internal constant functions }
 { Internal constant functions }
    in_const_sqr        = 100;
    in_const_sqr        = 100;

+ 1 - 1
compiler/comprsrc.pas

@@ -377,7 +377,7 @@ procedure TWinLikeResourceFile.Collect(const fn: ansistring);
 begin
 begin
   if fResScript=nil then
   if fResScript=nil then
     fResScript:=TScript.Create(fScriptName);
     fResScript:=TScript.Create(fScriptName);
-  fResScript.Add(MaybeQuoted(fn));
+  fResScript.Add(maybequoted_for_script(fn,script_fpcres));
   inc(fCollectCount);
   inc(fCollectCount);
 end;
 end;
 
 

+ 77 - 43
compiler/dbgdwarf.pas

@@ -375,7 +375,7 @@ interface
         procedure appendsym_property(list:TAsmList;sym:tpropertysym);override;
         procedure appendsym_property(list:TAsmList;sym:tpropertysym);override;
 
 
         function symdebugname(sym:tsym): String; virtual;
         function symdebugname(sym:tsym): String; virtual;
-        function symname(sym:tsym): String; virtual;
+        function symname(sym: tsym; manglename: boolean): String; virtual;
         procedure append_visibility(vis: tvisibility);
         procedure append_visibility(vis: tvisibility);
 
 
         procedure enum_membersyms_callback(p:TObject;arg:pointer);
         procedure enum_membersyms_callback(p:TObject;arg:pointer);
@@ -931,10 +931,10 @@ implementation
                   begin
                   begin
                     if not assigned(def.typesym) then
                     if not assigned(def.typesym) then
                       internalerror(200610011);
                       internalerror(200610011);
-                    def.dwarf_lab:=current_asmdata.RefAsmSymbol(make_mangledname('DBG',def.owner,symname(def.typesym)),AT_DATA);
-                    def.dwarf_ref_lab:=current_asmdata.RefAsmSymbol(make_mangledname('DBGREF',def.owner,symname(def.typesym)),AT_DATA);
+                    def.dwarf_lab:=current_asmdata.RefAsmSymbol(make_mangledname('DBG',def.owner,symname(def.typesym, true)),AT_DATA);
+                    def.dwarf_ref_lab:=current_asmdata.RefAsmSymbol(make_mangledname('DBGREF',def.owner,symname(def.typesym, true)),AT_DATA);
                     if is_class_or_interface_or_dispinterface(def) or is_objectpascal_helper(def) then
                     if is_class_or_interface_or_dispinterface(def) or is_objectpascal_helper(def) then
-                      tobjectdef(def).dwarf_struct_lab:=current_asmdata.RefAsmSymbol(make_mangledname('DBG2',def.owner,symname(def.typesym)),AT_DATA);
+                      tobjectdef(def).dwarf_struct_lab:=current_asmdata.RefAsmSymbol(make_mangledname('DBG2',def.owner,symname(def.typesym, true)),AT_DATA);
                     def.dbg_state:=dbg_state_written;
                     def.dbg_state:=dbg_state_written;
                   end
                   end
                 else
                 else
@@ -945,10 +945,10 @@ implementation
                        (def.owner.symtabletype=globalsymtable) and
                        (def.owner.symtabletype=globalsymtable) and
                        (def.owner.iscurrentunit) then
                        (def.owner.iscurrentunit) then
                       begin
                       begin
-                        def.dwarf_lab:=current_asmdata.DefineAsmSymbol(make_mangledname('DBG',def.owner,symname(def.typesym)),AB_GLOBAL,AT_DATA);
-                        def.dwarf_ref_lab:=current_asmdata.DefineAsmSymbol(make_mangledname('DBGREF',def.owner,symname(def.typesym)),AB_GLOBAL,AT_DATA);
+                        def.dwarf_lab:=current_asmdata.DefineAsmSymbol(make_mangledname('DBG',def.owner,symname(def.typesym, true)),AB_GLOBAL,AT_DATA);
+                        def.dwarf_ref_lab:=current_asmdata.DefineAsmSymbol(make_mangledname('DBGREF',def.owner,symname(def.typesym, true)),AB_GLOBAL,AT_DATA);
                         if is_class_or_interface_or_dispinterface(def) or is_objectpascal_helper(def) then
                         if is_class_or_interface_or_dispinterface(def) or is_objectpascal_helper(def) then
-                          tobjectdef(def).dwarf_struct_lab:=current_asmdata.DefineAsmSymbol(make_mangledname('DBG2',def.owner,symname(def.typesym)),AB_GLOBAL,AT_DATA);
+                          tobjectdef(def).dwarf_struct_lab:=current_asmdata.DefineAsmSymbol(make_mangledname('DBG2',def.owner,symname(def.typesym, true)),AB_GLOBAL,AT_DATA);
                         include(def.defstates,ds_dwarf_dbg_info_written);
                         include(def.defstates,ds_dwarf_dbg_info_written);
                       end
                       end
                     else
                     else
@@ -1371,7 +1371,7 @@ implementation
                 { base type such as byte/shortint/word/... }
                 { base type such as byte/shortint/word/... }
                 if assigned(def.typesym) then
                 if assigned(def.typesym) then
                   append_entry(DW_TAG_base_type,false,[
                   append_entry(DW_TAG_base_type,false,[
-                    DW_AT_name,DW_FORM_string,symname(def.typesym)+#0,
+                    DW_AT_name,DW_FORM_string,symname(def.typesym, false)+#0,
                     DW_AT_encoding,DW_FORM_data1,sign,
                     DW_AT_encoding,DW_FORM_data1,sign,
                     DW_AT_byte_size,DW_FORM_data1,fullbytesize])
                     DW_AT_byte_size,DW_FORM_data1,fullbytesize])
                 else
                 else
@@ -1385,7 +1385,7 @@ implementation
                   {       to be always clipped to s32bit for some reason }
                   {       to be always clipped to s32bit for some reason }
                   if assigned(def.typesym) then
                   if assigned(def.typesym) then
                     append_entry(DW_TAG_subrange_type,false,[
                     append_entry(DW_TAG_subrange_type,false,[
-                      DW_AT_name,DW_FORM_string,symname(def.typesym)+#0,
+                      DW_AT_name,DW_FORM_string,symname(def.typesym, false)+#0,
                       DW_AT_lower_bound,signform,int64(def.low),
                       DW_AT_lower_bound,signform,int64(def.low),
                       DW_AT_upper_bound,signform,int64(def.high)
                       DW_AT_upper_bound,signform,int64(def.high)
                       ])
                       ])
@@ -1544,7 +1544,7 @@ implementation
             if assigned(def.typesym) then
             if assigned(def.typesym) then
               begin
               begin
                 append_entry(DW_TAG_base_type,false,[
                 append_entry(DW_TAG_base_type,false,[
-                  DW_AT_name,DW_FORM_string,symname(def.typesym)+#0,
+                  DW_AT_name,DW_FORM_string,symname(def.typesym, false)+#0,
                   DW_AT_encoding,DW_FORM_data1,DW_ATE_float,
                   DW_AT_encoding,DW_FORM_data1,DW_ATE_float,
                   DW_AT_byte_size,DW_FORM_data1,def.size
                   DW_AT_byte_size,DW_FORM_data1,def.size
                   ]);
                   ]);
@@ -1569,7 +1569,7 @@ implementation
             { we should use DW_ATE_signed_fixed, however it isn't supported yet by GDB (FK) }
             { we should use DW_ATE_signed_fixed, however it isn't supported yet by GDB (FK) }
             if assigned(def.typesym) then
             if assigned(def.typesym) then
               append_entry(DW_TAG_base_type,false,[
               append_entry(DW_TAG_base_type,false,[
-                DW_AT_name,DW_FORM_string,symname(def.typesym)+#0,
+                DW_AT_name,DW_FORM_string,symname(def.typesym, false)+#0,
                 DW_AT_encoding,DW_FORM_data1,DW_ATE_signed,
                 DW_AT_encoding,DW_FORM_data1,DW_ATE_signed,
                 DW_AT_byte_size,DW_FORM_data1,8
                 DW_AT_byte_size,DW_FORM_data1,8
                 ])
                 ])
@@ -1581,7 +1581,7 @@ implementation
           s64comp:
           s64comp:
             if assigned(def.typesym) then
             if assigned(def.typesym) then
               append_entry(DW_TAG_base_type,false,[
               append_entry(DW_TAG_base_type,false,[
-                DW_AT_name,DW_FORM_string,symname(def.typesym)+#0,
+                DW_AT_name,DW_FORM_string,symname(def.typesym, false)+#0,
                 DW_AT_encoding,DW_FORM_data1,DW_ATE_signed,
                 DW_AT_encoding,DW_FORM_data1,DW_ATE_signed,
                 DW_AT_byte_size,DW_FORM_data1,8
                 DW_AT_byte_size,DW_FORM_data1,8
                 ])
                 ])
@@ -1604,7 +1604,7 @@ implementation
       begin
       begin
         if assigned(def.typesym) then
         if assigned(def.typesym) then
           append_entry(DW_TAG_enumeration_type,true,[
           append_entry(DW_TAG_enumeration_type,true,[
-            DW_AT_name,DW_FORM_string,symname(def.typesym)+#0,
+            DW_AT_name,DW_FORM_string,symname(def.typesym, false)+#0,
             DW_AT_byte_size,DW_FORM_data1,def.size
             DW_AT_byte_size,DW_FORM_data1,def.size
             ])
             ])
         else
         else
@@ -1625,7 +1625,7 @@ implementation
             if hp.value>def.maxval then
             if hp.value>def.maxval then
               break;
               break;
             append_entry(DW_TAG_enumerator,false,[
             append_entry(DW_TAG_enumerator,false,[
-              DW_AT_name,DW_FORM_string,symname(hp)+#0,
+              DW_AT_name,DW_FORM_string,symname(hp, false)+#0,
               DW_AT_const_value,DW_FORM_data4,hp.value
               DW_AT_const_value,DW_FORM_data4,hp.value
             ]);
             ]);
             finish_entry;
             finish_entry;
@@ -1668,7 +1668,7 @@ implementation
             { no known size, no known upper bound }
             { no known size, no known upper bound }
             if assigned(def.typesym) then
             if assigned(def.typesym) then
               append_entry(DW_TAG_array_type,true,[
               append_entry(DW_TAG_array_type,true,[
-                DW_AT_name,DW_FORM_string,symname(def.typesym)+#0
+                DW_AT_name,DW_FORM_string,symname(def.typesym, false)+#0
                 ])
                 ])
             else
             else
               append_entry(DW_TAG_array_type,true,[]);
               append_entry(DW_TAG_array_type,true,[]);
@@ -1685,7 +1685,7 @@ implementation
             size:=def.size;
             size:=def.size;
             if assigned(def.typesym) then
             if assigned(def.typesym) then
               append_entry(DW_TAG_array_type,true,[
               append_entry(DW_TAG_array_type,true,[
-                DW_AT_name,DW_FORM_string,symname(def.typesym)+#0,
+                DW_AT_name,DW_FORM_string,symname(def.typesym, false)+#0,
                 DW_AT_byte_size,DW_FORM_udata,size
                 DW_AT_byte_size,DW_FORM_udata,size
                 ])
                 ])
             else
             else
@@ -1867,7 +1867,7 @@ implementation
         begin
         begin
           if assigned(def.typesym) then
           if assigned(def.typesym) then
             append_entry(DW_TAG_subroutine_type,true,[
             append_entry(DW_TAG_subroutine_type,true,[
-              DW_AT_name,DW_FORM_string,symname(def.typesym)+#0,
+              DW_AT_name,DW_FORM_string,symname(def.typesym, false)+#0,
               DW_AT_prototyped,DW_FORM_flag,true
               DW_AT_prototyped,DW_FORM_flag,true
             ])
             ])
           else
           else
@@ -1882,7 +1882,7 @@ implementation
           for i:=0 to def.paras.count-1 do
           for i:=0 to def.paras.count-1 do
             begin
             begin
               append_entry(DW_TAG_formal_parameter,false,[
               append_entry(DW_TAG_formal_parameter,false,[
-                DW_AT_name,DW_FORM_string,symname(tsym(def.paras[i]))+#0
+                DW_AT_name,DW_FORM_string,symname(tsym(def.paras[i]), false)+#0
               ]);
               ]);
               append_labelentry_ref(DW_AT_type,def_dwarf_lab(tparavarsym(def.paras[i]).vardef));
               append_labelentry_ref(DW_AT_type,def_dwarf_lab(tparavarsym(def.paras[i]).vardef));
               finish_entry;
               finish_entry;
@@ -2017,7 +2017,7 @@ implementation
           begin
           begin
             current_asmdata.getaddrlabel(TAsmLabel(pointer(labsym)));
             current_asmdata.getaddrlabel(TAsmLabel(pointer(labsym)));
             append_entry(DW_TAG_typedef,false,[
             append_entry(DW_TAG_typedef,false,[
-              DW_AT_name,DW_FORM_string,symname(def.typesym)+#0
+              DW_AT_name,DW_FORM_string,symname(def.typesym, false)+#0
             ]);
             ]);
             append_labelentry_ref(DW_AT_type,labsym);
             append_labelentry_ref(DW_AT_type,labsym);
             finish_entry;
             finish_entry;
@@ -2114,7 +2114,7 @@ implementation
         current_asmdata.asmlists[al_dwarf_info].concat(tai_comment.Create(strpnew('Procdef '+def.fullprocname(true))));
         current_asmdata.asmlists[al_dwarf_info].concat(tai_comment.Create(strpnew('Procdef '+def.fullprocname(true))));
         if not is_objc_class_or_protocol(def.struct) then
         if not is_objc_class_or_protocol(def.struct) then
           append_entry(DW_TAG_subprogram,true,
           append_entry(DW_TAG_subprogram,true,
-            [DW_AT_name,DW_FORM_string,symname(def.procsym)+#0
+            [DW_AT_name,DW_FORM_string,symname(def.procsym, false)+#0
             { data continues below }
             { data continues below }
             { problem: base reg isn't known here
             { problem: base reg isn't known here
               DW_AT_frame_base,DW_FORM_block1,1
               DW_AT_frame_base,DW_FORM_block1,1
@@ -2318,7 +2318,7 @@ implementation
 
 
     procedure TDebugInfoDwarf.appendsym_var(list:TAsmList;sym:tabstractnormalvarsym);
     procedure TDebugInfoDwarf.appendsym_var(list:TAsmList;sym:tabstractnormalvarsym);
       begin
       begin
-        appendsym_var_with_name_type_offset(list,sym,symname(sym),sym.vardef,0,[]);
+        appendsym_var_with_name_type_offset(list,sym,symname(sym, false),sym.vardef,0,[]);
       end;
       end;
 
 
 
 
@@ -2404,7 +2404,12 @@ implementation
                     if (vo_is_thread_var in sym.varoptions) then
                     if (vo_is_thread_var in sym.varoptions) then
                       begin
                       begin
 { TODO: !!! FIXME: dwarf for thread vars !!!}
 { TODO: !!! FIXME: dwarf for thread vars !!!}
-                        blocksize:=0;
+{ This is only a minimal change to at least be able to get a value
+  in only one thread is present PM 2014-11-21, like for stabs format }
+                        templist.concat(tai_const.create_8bit(ord(DW_OP_addr)));
+                        templist.concat(tai_const.Create_type_name(aitconst_ptr,sym.mangledname,
+                          offset+sizeof(pint)));
+                        blocksize:=1+sizeof(puint);
                       end
                       end
                     else
                     else
                       begin
                       begin
@@ -2554,7 +2559,7 @@ implementation
 
 
     procedure TDebugInfoDwarf.appendsym_fieldvar(list:TAsmList;sym: tfieldvarsym);
     procedure TDebugInfoDwarf.appendsym_fieldvar(list:TAsmList;sym: tfieldvarsym);
       begin
       begin
-        appendsym_fieldvar_with_name_offset(list,sym,symname(sym),sym.vardef,0);
+        appendsym_fieldvar_with_name_offset(list,sym,symname(sym, false),sym.vardef,0);
       end;
       end;
 
 
 
 
@@ -2601,7 +2606,7 @@ implementation
             if (target_info.endian=endian_little) then
             if (target_info.endian=endian_little) then
               bitoffset:=(fieldnatsize*8)-bitoffset-sym.vardef.packedbitsize;
               bitoffset:=(fieldnatsize*8)-bitoffset-sym.vardef.packedbitsize;
             append_entry(DW_TAG_member,false,[
             append_entry(DW_TAG_member,false,[
-              DW_AT_name,DW_FORM_string,symname(sym)+#0,
+              DW_AT_name,DW_FORM_string,symname(sym, false)+#0,
               { gcc also generates both a bit and byte size attribute }
               { gcc also generates both a bit and byte size attribute }
               { we don't support ordinals >= 256 bits }
               { we don't support ordinals >= 256 bits }
               DW_AT_byte_size,DW_FORM_data1,fieldnatsize,
               DW_AT_byte_size,DW_FORM_data1,fieldnatsize,
@@ -2642,7 +2647,7 @@ implementation
 
 
         if ismember then
         if ismember then
           append_entry(DW_TAG_member,false,[
           append_entry(DW_TAG_member,false,[
-            DW_AT_name,DW_FORM_string,symname(sym)+#0,
+            DW_AT_name,DW_FORM_string,symname(sym, false)+#0,
           { The DW_AT_declaration tag is invalid according to the DWARF specifications.
           { The DW_AT_declaration tag is invalid according to the DWARF specifications.
             But gcc adds this to static const members and gdb checks
             But gcc adds this to static const members and gdb checks
             for this flag. So we have to set it also.
             for this flag. So we have to set it also.
@@ -2652,7 +2657,7 @@ implementation
             ])
             ])
         else
         else
           append_entry(DW_TAG_variable,false,[
           append_entry(DW_TAG_variable,false,[
-            DW_AT_name,DW_FORM_string,symname(sym)+#0
+            DW_AT_name,DW_FORM_string,symname(sym, false)+#0
             ]);
             ]);
         { for string constants, constdef isn't set because they have no real type }
         { for string constants, constdef isn't set because they have no real type }
         case sym.consttyp of
         case sym.consttyp of
@@ -2821,10 +2826,10 @@ implementation
           begin
           begin
             if (tosym.typ=fieldvarsym) then
             if (tosym.typ=fieldvarsym) then
               internalerror(2009031404);
               internalerror(2009031404);
-            appendsym_var_with_name_type_offset(list,tabstractnormalvarsym(tosym),symname(sym),sym.propdef,offset,[])
+            appendsym_var_with_name_type_offset(list,tabstractnormalvarsym(tosym),symname(sym, false),sym.propdef,offset,[])
           end
           end
         else
         else
-          appendsym_fieldvar_with_name_offset(list,tfieldvarsym(tosym),symname(sym),sym.propdef,offset)
+          appendsym_fieldvar_with_name_offset(list,tfieldvarsym(tosym),symname(sym, false),sym.propdef,offset)
       end;
       end;
 
 
 
 
@@ -2883,7 +2888,7 @@ implementation
                   flags:=[];
                   flags:=[];
                   if (sym.owner.symtabletype=localsymtable) then
                   if (sym.owner.symtabletype=localsymtable) then
                     include(flags,dvf_force_local_var);
                     include(flags,dvf_force_local_var);
-                  appendsym_var_with_name_type_offset(list,tabstractnormalvarsym(tosym),symname(sym),tabstractvarsym(sym).vardef,offset,flags);
+                  appendsym_var_with_name_type_offset(list,tabstractnormalvarsym(tosym),symname(sym, false),tabstractvarsym(sym).vardef,offset,flags);
                 end;
                 end;
               templist.free;
               templist.free;
               exit;
               exit;
@@ -2893,7 +2898,7 @@ implementation
         end;
         end;
 
 
         append_entry(DW_TAG_variable,false,[
         append_entry(DW_TAG_variable,false,[
-          DW_AT_name,DW_FORM_string,symname(sym)+#0,
+          DW_AT_name,DW_FORM_string,symname(sym, false)+#0,
           {
           {
           DW_AT_decl_file,DW_FORM_data1,0,
           DW_AT_decl_file,DW_FORM_data1,0,
           DW_AT_decl_line,DW_FORM_data1,
           DW_AT_decl_line,DW_FORM_data1,
@@ -2914,7 +2919,7 @@ implementation
 
 
     procedure TDebugInfoDwarf.beforeappendsym(list:TAsmList;sym:tsym);
     procedure TDebugInfoDwarf.beforeappendsym(list:TAsmList;sym:tsym);
       begin
       begin
-        current_asmdata.asmlists[al_dwarf_info].concat(tai_comment.Create(strpnew('Symbol '+symname(sym))));
+        current_asmdata.asmlists[al_dwarf_info].concat(tai_comment.Create(strpnew('Symbol '+symname(sym, true))));
       end;
       end;
 
 
 
 
@@ -3270,7 +3275,7 @@ implementation
       end;
       end;
 
 
 
 
-    function TDebugInfoDwarf.symname(sym: tsym): String;
+    function TDebugInfoDwarf.symname(sym: tsym; manglename: boolean): String;
       begin
       begin
         if (sym.typ=paravarsym) and
         if (sym.typ=paravarsym) and
            (vo_is_self in tparavarsym(sym).varoptions) then
            (vo_is_self in tparavarsym(sym).varoptions) then
@@ -3290,9 +3295,20 @@ implementation
         else if (ds_dwarf_method_class_prefix in current_settings.debugswitches) and
         else if (ds_dwarf_method_class_prefix in current_settings.debugswitches) and
                 (sym.typ=procsym) and
                 (sym.typ=procsym) and
                 (tprocsym(sym).owner.symtabletype in [objectsymtable,recordsymtable]) then
                 (tprocsym(sym).owner.symtabletype in [objectsymtable,recordsymtable]) then
-          result:=tprocsym(sym).owner.name^+'__'+symdebugname(sym)
+          begin
+            result:=tprocsym(sym).owner.name^+'__';
+            if manglename then
+              result := result + sym.name
+            else
+              result := result + symdebugname(sym);
+          end
         else
         else
-          result:=symdebugname(sym);
+          begin
+            if manglename then
+              result := sym.name
+            else
+              result := symdebugname(sym);
+          end;
       end;
       end;
 
 
 
 
@@ -3316,6 +3332,7 @@ implementation
         currfileinfo,
         currfileinfo,
         lastfileinfo : tfileposinfo;
         lastfileinfo : tfileposinfo;
         currfuncname : pshortstring;
         currfuncname : pshortstring;
+        currstatement: boolean;
         currsectype  : TAsmSectiontype;
         currsectype  : TAsmSectiontype;
         hp, hpend : tai;
         hp, hpend : tai;
         infile : tinputfile;
         infile : tinputfile;
@@ -3334,6 +3351,7 @@ implementation
         currfuncname:=nil;
         currfuncname:=nil;
         currsectype:=sec_code;
         currsectype:=sec_code;
         hp:=Tai(list.first);
         hp:=Tai(list.first);
+        currstatement:=true;
         prevcolumn := 0;
         prevcolumn := 0;
         prevline := 1;
         prevline := 1;
         prevfileidx := 1;
         prevfileidx := 1;
@@ -3365,8 +3383,7 @@ implementation
             end;
             end;
 
 
             if (currsectype=sec_code) and
             if (currsectype=sec_code) and
-               (hp.typ=ait_instruction) and
-               (nolineinfolevel=0) then
+               (hp.typ=ait_instruction) then
               begin
               begin
                 currfileinfo:=tailineinfo(hp).fileinfo;
                 currfileinfo:=tailineinfo(hp).fileinfo;
                 { file changed ? (must be before line info) }
                 { file changed ? (must be before line info) }
@@ -3397,8 +3414,12 @@ implementation
                       end;
                       end;
                   end;
                   end;
 
 
+                { Set the line-nr to 0 if the code does not corresponds to a particular line  }
+                if nolineinfolevel>0 then
+                  currfileinfo.line := 0;
+
                 { line changed ? }
                 { line changed ? }
-                if (lastfileinfo.line<>currfileinfo.line) and (currfileinfo.line<>0) then
+                if (lastfileinfo.line<>currfileinfo.line) and ((currfileinfo.line<>0) or (nolineinfolevel>0)) then
                   begin
                   begin
                     { set address }
                     { set address }
                     current_asmdata.getlabel(currlabel, alt_dbgline);
                     current_asmdata.getlabel(currlabel, alt_dbgline);
@@ -3431,6 +3452,19 @@ implementation
                         prevcolumn := currfileinfo.column;
                         prevcolumn := currfileinfo.column;
                       end;
                       end;
 
 
+                    { set statement }
+                    if (currfileinfo.line=0) and currstatement then
+                      begin
+                        currstatement := false;
+                        asmline.concat(tai_const.create_8bit(DW_LNS_negate_stmt));
+                      end;
+
+                    if not currstatement and (currfileinfo.line>0) then
+                      begin
+                        currstatement := true;
+                        asmline.concat(tai_const.create_8bit(DW_LNS_negate_stmt));
+                      end;
+
                     { set line }
                     { set line }
                     diffline := currfileinfo.line - prevline;
                     diffline := currfileinfo.line - prevline;
                     if (diffline >= LINE_BASE) and (OPCODE_BASE + diffline - LINE_BASE <= 255) then
                     if (diffline >= LINE_BASE) and (OPCODE_BASE + diffline - LINE_BASE <= 255) then
@@ -3517,7 +3551,7 @@ implementation
           file recs. are less than 1k so using data2 is enough }
           file recs. are less than 1k so using data2 is enough }
         if assigned(def.typesym) then
         if assigned(def.typesym) then
           append_entry(DW_TAG_structure_type,false,[
           append_entry(DW_TAG_structure_type,false,[
-           DW_AT_name,DW_FORM_string,symname(def.typesym)+#0,
+           DW_AT_name,DW_FORM_string,symname(def.typesym, false)+#0,
            DW_AT_byte_size,DW_FORM_udata,def.size
            DW_AT_byte_size,DW_FORM_udata,def.size
           ])
           ])
         else
         else
@@ -3660,7 +3694,7 @@ implementation
 
 
             if assigned(def.typesym) then
             if assigned(def.typesym) then
               append_entry(DW_TAG_set_type,false,[
               append_entry(DW_TAG_set_type,false,[
-                DW_AT_name,DW_FORM_string,symname(def.typesym)+#0,
+                DW_AT_name,DW_FORM_string,symname(def.typesym, false)+#0,
                 DW_AT_byte_size,DW_FORM_data2,def.size
                 DW_AT_byte_size,DW_FORM_data2,def.size
                 ])
                 ])
             else
             else
@@ -3696,7 +3730,7 @@ implementation
             { info of modules that contain set tags                          }
             { info of modules that contain set tags                          }
             if assigned(def.typesym) then
             if assigned(def.typesym) then
               append_entry(DW_TAG_base_type,false,[
               append_entry(DW_TAG_base_type,false,[
-                DW_AT_name,DW_FORM_string,symname(def.typesym)+#0,
+                DW_AT_name,DW_FORM_string,symname(def.typesym, false)+#0,
                 DW_AT_encoding,DW_FORM_data1,DW_ATE_unsigned,
                 DW_AT_encoding,DW_FORM_data1,DW_ATE_unsigned,
                 DW_AT_byte_size,DW_FORM_data2,def.size
                 DW_AT_byte_size,DW_FORM_data2,def.size
                 ])
                 ])
@@ -3764,7 +3798,7 @@ implementation
 
 
         if assigned(def.typesym) then
         if assigned(def.typesym) then
           append_entry(DW_TAG_array_type,true,[
           append_entry(DW_TAG_array_type,true,[
-            DW_AT_name,DW_FORM_string,symname(def.typesym)+#0,
+            DW_AT_name,DW_FORM_string,symname(def.typesym, false)+#0,
             DW_AT_data_location,DW_FORM_block1,2
             DW_AT_data_location,DW_FORM_block1,2
             ])
             ])
         else
         else
@@ -3935,7 +3969,7 @@ implementation
       begin
       begin
         if assigned(def.typesym) then
         if assigned(def.typesym) then
           append_entry(DW_TAG_file_type,false,[
           append_entry(DW_TAG_file_type,false,[
-            DW_AT_name,DW_FORM_string,symname(def.typesym)+#0,
+            DW_AT_name,DW_FORM_string,symname(def.typesym, false)+#0,
             DW_AT_byte_size,DW_FORM_data2,def.size
             DW_AT_byte_size,DW_FORM_data2,def.size
             ])
             ])
         else
         else
@@ -4073,7 +4107,7 @@ implementation
         { ??? can a undefined def have a typename ? }
         { ??? can a undefined def have a typename ? }
         if assigned(def.typesym) then
         if assigned(def.typesym) then
           append_entry(DW_TAG_unspecified_type,false,[
           append_entry(DW_TAG_unspecified_type,false,[
-            DW_AT_name,DW_FORM_string,symname(def.typesym)+#0
+            DW_AT_name,DW_FORM_string,symname(def.typesym, false)+#0
             ])
             ])
         else
         else
           append_entry(DW_TAG_unspecified_type,false,[
           append_entry(DW_TAG_unspecified_type,false,[

+ 6 - 10
compiler/dbgstabs.pas

@@ -1408,20 +1408,16 @@ implementation
             if target_info.system in systems_dotted_function_names then
             if target_info.system in systems_dotted_function_names then
               mangledname:='.'+mangledname;
               mangledname:='.'+mangledname;
             // LBRAC
             // LBRAC
-            ss:=tostr(STABS_N_LBRAC)+',0,0,'+mangledname;
-            if not(af_stabs_use_function_absolute_addresses in target_asm.flags) then
-              begin
-                ss:=ss+'-';
-                ss:=ss+mangledname;
-              end;
+            if af_stabs_use_function_absolute_addresses in target_asm.flags then
+              ss:=tostr(STABS_N_LBRAC)+',0,0,'+mangledname
+            else
+              ss:=tostr(STABS_N_LBRAC)+',0,0,0';
             result.concat(Tai_stab.Create_ansistr(stab_stabn,ss));
             result.concat(Tai_stab.Create_ansistr(stab_stabn,ss));
+
             // RBRAC
             // RBRAC
             ss:=tostr(STABS_N_RBRAC)+',0,0,'+stabsendlabel.name;
             ss:=tostr(STABS_N_RBRAC)+',0,0,'+stabsendlabel.name;
             if not(af_stabs_use_function_absolute_addresses in target_asm.flags) then
             if not(af_stabs_use_function_absolute_addresses in target_asm.flags) then
-              begin
-                ss:=ss+'-';
-                ss:=ss+mangledname;
-              end;
+              ss:=ss+'-'+mangledname;
             result.concat(Tai_stab.Create_ansistr(stab_stabn,ss));
             result.concat(Tai_stab.Create_ansistr(stab_stabn,ss));
 
 
             { the stabsendlabel must come after all other stabs for this }
             { the stabsendlabel must come after all other stabs for this }

+ 27 - 9
compiler/defcmp.pas

@@ -1533,6 +1533,10 @@ implementation
                    begin
                    begin
                      { procvar -> procvar }
                      { procvar -> procvar }
                      eq:=proc_to_procvar_equal(tprocvardef(def_from),tprocvardef(def_to),cdo_warn_incompatible_univ in cdoptions);
                      eq:=proc_to_procvar_equal(tprocvardef(def_from),tprocvardef(def_to),cdo_warn_incompatible_univ in cdoptions);
+                     if eq<te_equal then
+                       doconv:=tc_proc_2_procvar
+                     else
+                       doconv:=tc_equal;
                    end;
                    end;
                  pointerdef :
                  pointerdef :
                    begin
                    begin
@@ -2176,30 +2180,38 @@ implementation
          if not(assigned(def1)) or not(assigned(def2)) then
          if not(assigned(def1)) or not(assigned(def2)) then
            exit;
            exit;
          { check for method pointer and local procedure pointer:
          { check for method pointer and local procedure pointer:
-             a) if one is a procedure of object, the other also has to be one
-             b) if one is a pure address, the other also has to be one
+             a) anything but procvars can be assigned to blocks
+             b) if one is a procedure of object, the other also has to be one
+                (except for block)
+             c) if one is a pure address, the other also has to be one
                 except if def1 is a global proc and def2 is a nested procdef
                 except if def1 is a global proc and def2 is a nested procdef
                 (global procedures can be converted into nested procvars)
                 (global procedures can be converted into nested procvars)
-             c) if def1 is a nested procedure, then def2 has to be a nested
+             d) if def1 is a nested procedure, then def2 has to be a nested
                 procvar and def1 has to have the po_delphi_nested_cc option
                 procvar and def1 has to have the po_delphi_nested_cc option
-             d) if def1 is a procvar, def1 and def2 both have to be nested or
+             e) if def1 is a procvar, def1 and def2 both have to be nested or
                 non-nested (we don't allow assignments from non-nested to
                 non-nested (we don't allow assignments from non-nested to
                 nested procvars to make sure that we can still implement
                 nested procvars to make sure that we can still implement
                 nested procvars using trampolines -- e.g., this would be
                 nested procvars using trampolines -- e.g., this would be
                 necessary for LLVM or CIL as long as they do not have support
                 necessary for LLVM or CIL as long as they do not have support
                 for Delphi-style frame pointer parameter passing) }
                 for Delphi-style frame pointer parameter passing) }
-         if (def1.is_methodpointer<>def2.is_methodpointer) or  { a) }
-            ((def1.is_addressonly<>def2.is_addressonly) and    { b) }
+         if is_block(def2) then                                     { a) }
+           { can't explicitly check against procvars here, because
+             def1 may already be a procvar due to a proc_to_procvar;
+             this is checked in the type conversion node itself -> ok }
+         else if (def1.is_methodpointer<>def2.is_methodpointer) or  { b) }
+            ((def1.is_addressonly<>def2.is_addressonly) and         { c) }
              (is_nested_pd(def1) or
              (is_nested_pd(def1) or
               not is_nested_pd(def2))) or
               not is_nested_pd(def2))) or
-            ((def1.typ=procdef) and                            { c) }
+            ((def1.typ=procdef) and                                 { d) }
              is_nested_pd(def1) and
              is_nested_pd(def1) and
              (not(po_delphi_nested_cc in def1.procoptions) or
              (not(po_delphi_nested_cc in def1.procoptions) or
               not is_nested_pd(def2))) or
               not is_nested_pd(def2))) or
-            ((def1.typ=procvardef) and                         { d) }
+            ((def1.typ=procvardef) and                              { e) }
              (is_nested_pd(def1)<>is_nested_pd(def2))) then
              (is_nested_pd(def1)<>is_nested_pd(def2))) then
            exit;
            exit;
          pa_comp:=[cpo_ignoreframepointer];
          pa_comp:=[cpo_ignoreframepointer];
+         if is_block(def2) then
+           include(pa_comp,cpo_ignorehidden);
          if checkincompatibleuniv then
          if checkincompatibleuniv then
            include(pa_comp,cpo_warn_incompatible_univ);
            include(pa_comp,cpo_warn_incompatible_univ);
          { check return value and options, methodpointer is already checked }
          { check return value and options, methodpointer is already checked }
@@ -2209,7 +2221,10 @@ implementation
            include(po_comp,po_staticmethod);
            include(po_comp,po_staticmethod);
          if (m_delphi in current_settings.modeswitches) then
          if (m_delphi in current_settings.modeswitches) then
            exclude(po_comp,po_varargs);
            exclude(po_comp,po_varargs);
-         if (def1.proccalloption=def2.proccalloption) and
+         { for blocks, the calling convention doesn't matter because we have to
+           generate a wrapper anyway }
+         if ((po_is_block in def2.procoptions) or
+             (def1.proccalloption=def2.proccalloption)) and
             ((po_comp * def1.procoptions)= (po_comp * def2.procoptions)) and
             ((po_comp * def1.procoptions)= (po_comp * def2.procoptions)) and
             equal_defs(def1.returndef,def2.returndef) then
             equal_defs(def1.returndef,def2.returndef) then
           begin
           begin
@@ -2224,6 +2239,9 @@ implementation
                 { prefer non-nested to non-nested over non-nested to nested }
                 { prefer non-nested to non-nested over non-nested to nested }
                 if (is_nested_pd(def1)<>is_nested_pd(def2)) then
                 if (is_nested_pd(def1)<>is_nested_pd(def2)) then
                   eq:=te_convert_l1;
                   eq:=te_convert_l1;
+                { in case of non-block to block, we need a type conversion }
+                if (po_is_block in def1.procoptions) <> (po_is_block in def2.procoptions) then
+                  eq:=te_convert_l1;
               end;
               end;
             proc_to_procvar_equal:=eq;
             proc_to_procvar_equal:=eq;
           end;
           end;

+ 9 - 0
compiler/defutil.pas

@@ -331,6 +331,9 @@ interface
     { returns true of def is a methodpointer }
     { returns true of def is a methodpointer }
     function is_methodpointer(def : tdef) : boolean;
     function is_methodpointer(def : tdef) : boolean;
 
 
+    { returns true if def is a C "block" }
+    function is_block(def: tdef): boolean;
+
 implementation
 implementation
 
 
     uses
     uses
@@ -1425,4 +1428,10 @@ implementation
         result:=(def.typ=procvardef) and (po_methodpointer in tprocvardef(def).procoptions);
         result:=(def.typ=procvardef) and (po_methodpointer in tprocvardef(def).procoptions);
       end;
       end;
 
 
+
+    function is_block(def: tdef): boolean;
+      begin
+        result:=(def.typ=procvardef) and (po_is_block in tprocvardef(def).procoptions)
+      end;
+
 end.
 end.

+ 1 - 1
compiler/expunix.pas

@@ -154,7 +154,7 @@ begin
            current_asmdata.asmlists[al_procedures].concat(Tai_symbol.Createname_global(hp2.name^,AT_FUNCTION,0));
            current_asmdata.asmlists[al_procedures].concat(Tai_symbol.Createname_global(hp2.name^,AT_FUNCTION,0));
            if (cs_create_pic in current_settings.moduleswitches) and
            if (cs_create_pic in current_settings.moduleswitches) and
              { other targets need to be checked how it works }
              { other targets need to be checked how it works }
-             (target_info.system in [system_i386_freebsd,system_x86_64_freebsd,system_x86_64_linux,system_i386_linux,system_x86_64_solaris,system_i386_solaris,system_i386_android]) then
+             (target_info.system in [system_i386_freebsd,system_x86_64_freebsd,system_x86_64_linux,system_i386_linux,system_x86_64_solaris,system_i386_solaris,system_i386_android,system_x86_64_dragonfly]) then
              begin
              begin
 {$ifdef x86}
 {$ifdef x86}
                sym:=current_asmdata.RefAsmSymbol(pd.mangledname);
                sym:=current_asmdata.RefAsmSymbol(pd.mangledname);

+ 1 - 1
compiler/finput.pas

@@ -454,7 +454,7 @@ uses
         fileopen:=false;
         fileopen:=false;
         try
         try
           f:=CFileStreamClass.Create(filename,fmOpenRead);
           f:=CFileStreamClass.Create(filename,fmOpenRead);
-          fileopen:=true;
+          fileopen:=CStreamError=0;
         except
         except
         end;
         end;
       end;
       end;

+ 1 - 0
compiler/fpcdefs.inc

@@ -242,6 +242,7 @@
   {$define cpurox}
   {$define cpurox}
   {$define cputargethasfixedstack}
   {$define cputargethasfixedstack}
   {$define cpurefshaveindexreg}
   {$define cpurefshaveindexreg}
+  {$define SUPPORT_GET_FRAME}
 {$endif aarch64}
 {$endif aarch64}
 
 
 {$IFDEF MACOS}
 {$IFDEF MACOS}

+ 10 - 2
compiler/fppu.pas

@@ -23,6 +23,8 @@ unit fppu;
 
 
 {$i fpcdefs.inc}
 {$i fpcdefs.inc}
 
 
+{ $define DEBUG_UNIT_CRC_CHANGES}
+
 { close ppufiles on system that are
 { close ppufiles on system that are
   short on file handles like DOS system PM }
   short on file handles like DOS system PM }
 {$ifdef GO32V2}
 {$ifdef GO32V2}
@@ -836,7 +838,13 @@ var
                 end;
                 end;
              end
              end
            else
            else
-             temp:=' not available';
+             begin
+               { still register the source module for proper error messages
+                 since source_avail for the module is still false, this should not hurt }
+               sourcefiles.register_file(tdosinputfile.create(hs));
+
+               temp:=' not available';
+             end;
            if is_main then
            if is_main then
              begin
              begin
                mainsource:=hs;
                mainsource:=hs;
@@ -1471,7 +1479,7 @@ var
         { we can now derefence all pointers to the implementation parts }
         { we can now derefence all pointers to the implementation parts }
         tstoredsymtable(globalsymtable).derefimpl;
         tstoredsymtable(globalsymtable).derefimpl;
         if assigned(localsymtable) then
         if assigned(localsymtable) then
-          tstoredsymtable(localsymtable).derefimpl;
+            tstoredsymtable(localsymtable).derefimpl;
 
 
          { read whole program optimisation-related information }
          { read whole program optimisation-related information }
          wpoinfo:=tunitwpoinfo.ppuload(ppufile);
          wpoinfo:=tunitwpoinfo.ppuload(ppufile);

+ 24 - 0
compiler/generic/cpuinfo.pas

@@ -22,6 +22,13 @@ Interface
 
 
 Type
 Type
    bestreal = extended;
    bestreal = extended;
+{$if FPC_FULLVERSION>20700}
+{$ifdef FPC_HAS_TYPE_EXTENDED}
+   bestrealrec = TExtended80Rec;
+{$else}
+   bestrealrec = TDoubleRec;
+{$endif}
+{$endif FPC_FULLVERSION>20700}
    ts32real = single;
    ts32real = single;
    ts64real = double;
    ts64real = double;
    ts80real = type extended;
    ts80real = type extended;
@@ -42,7 +49,24 @@ Type
       fpu_soft
       fpu_soft
      );
      );
 
 
+   tcontrollertype =
+     (ct_none
+     );
+
 Const
 Const
+   { Is there support for dealing with multiple microcontrollers available }
+   { for this platform? }
+   ControllerSupport = false;
+
+   { We know that there are fields after sramsize
+     but we don't care about this warning }
+   {$PUSH}
+    {$WARN 3177 OFF}
+   embedded_controllers : array [tcontrollertype] of tcontrollerdatatype =
+   (
+      (controllertypestr:''; controllerunitstr:''; flashbase:0; flashsize:0; srambase:0; sramsize:0));
+   {$POP}
+
    cputypestr : array[tcputype] of string[8] = ('none');
    cputypestr : array[tcputype] of string[8] = ('none');
    fputypestr : array[tfputype] of string[6] = ('none','soft');
    fputypestr : array[tfputype] of string[6] = ('none','soft');
 
 

+ 51 - 37
compiler/globals.pas

@@ -41,7 +41,6 @@ interface
 {$ELSE}
 {$ELSE}
       fksysutl,
       fksysutl,
 {$ENDIF}
 {$ENDIF}
-
       { comphook pulls in sysutils anyways }
       { comphook pulls in sysutils anyways }
       cutils,cclasses,cfileutl,
       cutils,cclasses,cfileutl,
       cpuinfo,
       cpuinfo,
@@ -52,7 +51,7 @@ interface
          [m_delphi,m_class,m_objpas,m_result,m_string_pchar,
          [m_delphi,m_class,m_objpas,m_result,m_string_pchar,
           m_pointer_2_procedure,m_autoderef,m_tp_procvar,m_initfinal,m_default_ansistring,
           m_pointer_2_procedure,m_autoderef,m_tp_procvar,m_initfinal,m_default_ansistring,
           m_out,m_default_para,m_duplicate_names,m_hintdirective,
           m_out,m_default_para,m_duplicate_names,m_hintdirective,
-          m_property,m_default_inline,m_except,m_advanced_records];
+          m_property,m_default_inline,m_except,m_advanced_records,m_type_helpers];
        delphiunicodemodeswitches = delphimodeswitches + [m_systemcodepage,m_default_unicodestring];
        delphiunicodemodeswitches = delphimodeswitches + [m_systemcodepage,m_default_unicodestring];
        fpcmodeswitches =
        fpcmodeswitches =
          [m_fpc,m_string_pchar,m_nested_comment,m_repeat_forward,
          [m_fpc,m_string_pchar,m_nested_comment,m_repeat_forward,
@@ -84,23 +83,23 @@ interface
        treelogfilename = 'tree.log';
        treelogfilename = 'tree.log';
 
 
 {$if defined(CPUARM) and defined(FPUFPA)}
 {$if defined(CPUARM) and defined(FPUFPA)}
-       MathQNaN : tdoublerec = (bytes : (0,0,252,255,0,0,0,0));
-       MathInf : tdoublerec = (bytes : (0,0,240,127,0,0,0,0));
-       MathNegInf : tdoublerec = (bytes : (0,0,240,255,0,0,0,0));
-       MathPi : tdoublerec =  (bytes : (251,33,9,64,24,45,68,84));
+       MathQNaN : tcompdoublerec = (bytes : (0,0,252,255,0,0,0,0));
+       MathInf : tcompdoublerec = (bytes : (0,0,240,127,0,0,0,0));
+       MathNegInf : tcompdoublerec = (bytes : (0,0,240,255,0,0,0,0));
+       MathPi : tcompdoublerec =  (bytes : (251,33,9,64,24,45,68,84));
 {$else}
 {$else}
 {$ifdef FPC_LITTLE_ENDIAN}
 {$ifdef FPC_LITTLE_ENDIAN}
-       MathQNaN : tdoublerec = (bytes : (0,0,0,0,0,0,252,255));
-       MathInf : tdoublerec = (bytes : (0,0,0,0,0,0,240,127));
-       MathNegInf : tdoublerec = (bytes : (0,0,0,0,0,0,240,255));
-       MathPi : tdoublerec = (bytes : (24,45,68,84,251,33,9,64));
-       MathPiExtended : textendedrec = (bytes : (53,194,104,33,162,218,15,201,0,64));
+       MathQNaN : tcompdoublerec = (bytes : (0,0,0,0,0,0,252,255));
+       MathInf : tcompdoublerec = (bytes : (0,0,0,0,0,0,240,127));
+       MathNegInf : tcompdoublerec = (bytes : (0,0,0,0,0,0,240,255));
+       MathPi : tcompdoublerec = (bytes : (24,45,68,84,251,33,9,64));
+       MathPiExtended : tcompextendedrec = (bytes : (53,194,104,33,162,218,15,201,0,64));
 {$else FPC_LITTLE_ENDIAN}
 {$else FPC_LITTLE_ENDIAN}
-       MathQNaN : tdoublerec = (bytes : (255,252,0,0,0,0,0,0));
-       MathInf : tdoublerec = (bytes : (127,240,0,0,0,0,0,0));
-       MathNegInf : tdoublerec = (bytes : (255,240,0,0,0,0,0,0));
-       MathPi : tdoublerec =  (bytes : (64,9,33,251,84,68,45,24));
-       MathPiExtended : textendedrec = (bytes : (64,0,201,15,218,162,33,104,194,53));
+       MathQNaN : tcompdoublerec = (bytes : (255,252,0,0,0,0,0,0));
+       MathInf : tcompdoublerec = (bytes : (127,240,0,0,0,0,0,0));
+       MathNegInf : tcompdoublerec = (bytes : (255,240,0,0,0,0,0,0));
+       MathPi : tcompdoublerec =  (bytes : (64,9,33,251,84,68,45,24));
+       MathPiExtended : tcompextendedrec = (bytes : (64,0,201,15,218,162,33,104,194,53));
 {$endif FPC_LITTLE_ENDIAN}
 {$endif FPC_LITTLE_ENDIAN}
 {$endif}
 {$endif}
 
 
@@ -163,9 +162,8 @@ interface
 {$endif defined(ARM)}
 {$endif defined(ARM)}
 
 
         { CPU targets with microcontroller support can add a controller specific unit }
         { CPU targets with microcontroller support can add a controller specific unit }
-{$if defined(ARM) or defined(AVR) or defined(MIPSEL)}
          controllertype   : tcontrollertype;
          controllertype   : tcontrollertype;
-{$endif defined(ARM) or defined(AVR) or defined(MIPSEL)}
+
          { WARNING: this pointer cannot be written as such in record token }
          { WARNING: this pointer cannot be written as such in record token }
          pmessage : pmessagestaterecord;
          pmessage : pmessagestaterecord;
        end;
        end;
@@ -238,7 +236,9 @@ interface
        paralinkoptions   : TCmdStr;
        paralinkoptions   : TCmdStr;
        paradynamiclinker : string;
        paradynamiclinker : string;
        paraprintnodetree : byte;
        paraprintnodetree : byte;
+{$ifdef PREPROCWRITE}
        parapreprocess    : boolean;
        parapreprocess    : boolean;
+{$endif PREPROCWRITE}
        printnodefile     : text;
        printnodefile     : text;
 
 
        {  typical cross compiling params}
        {  typical cross compiling params}
@@ -410,7 +410,7 @@ interface
 {$endif i8086}
 {$endif i8086}
         maxfpuregisters : 0;
         maxfpuregisters : 0;
 
 
-{ Note: GENERIC_CPU is sued together with generic subdirectory to
+{ Note: GENERIC_CPU is used together with generic subdirectory to
   be able to compile some of the units without any real CPU.
   be able to compile some of the units without any real CPU.
   This is used to generate a CPU independant PPUDUMP utility. PM }
   This is used to generate a CPU independant PPUDUMP utility. PM }
 {$ifdef GENERIC_CPU}
 {$ifdef GENERIC_CPU}
@@ -444,8 +444,8 @@ interface
         fputype : fpu_hard;
         fputype : fpu_hard;
   {$endif sparc}
   {$endif sparc}
   {$ifdef arm}
   {$ifdef arm}
-        cputype : cpu_armv3;
-        optimizecputype : cpu_armv3;
+        cputype : cpu_armv4;
+        optimizecputype : cpu_armv4;
         fputype : fpu_fpa;
         fputype : fpu_fpa;
   {$endif arm}
   {$endif arm}
   {$ifdef x86_64}
   {$ifdef x86_64}
@@ -501,9 +501,7 @@ interface
 {$if defined(ARM)}
 {$if defined(ARM)}
         instructionset : is_arm;
         instructionset : is_arm;
 {$endif defined(ARM)}
 {$endif defined(ARM)}
-{$if defined(ARM) or defined(AVR) or defined(MIPSEL)}
         controllertype : ct_none;
         controllertype : ct_none;
-{$endif defined(ARM) or defined(AVR) or defined(MIPSEL)}
         pmessage : nil;
         pmessage : nil;
       );
       );
 
 
@@ -535,9 +533,7 @@ interface
     function Setoptimizecputype(const s:string;var a:tcputype):boolean;
     function Setoptimizecputype(const s:string;var a:tcputype):boolean;
     function Setcputype(const s:string;var a:tsettings):boolean;
     function Setcputype(const s:string;var a:tsettings):boolean;
     function SetFpuType(const s:string;var a:tfputype):boolean;
     function SetFpuType(const s:string;var a:tfputype):boolean;
-{$if defined(arm) or defined(avr) or defined(mipsel)}
     function SetControllerType(const s:string;var a:tcontrollertype):boolean;
     function SetControllerType(const s:string;var a:tcontrollertype):boolean;
-{$endif defined(arm) or defined(avr) or defined(mipsel)}
     function IncludeFeature(const s : string) : boolean;
     function IncludeFeature(const s : string) : boolean;
     function SetMinFPConstPrec(const s: string; var a: tfloattype) : boolean;
     function SetMinFPConstPrec(const s: string; var a: tfloattype) : boolean;
 
 
@@ -837,6 +833,13 @@ implementation
            Replace(s,'$FPCTARGET',target_os_string)
            Replace(s,'$FPCTARGET',target_os_string)
          else
          else
            Replace(s,'$FPCTARGET',target_full_string);
            Replace(s,'$FPCTARGET',target_full_string);
+         Replace(s,'$FPCSUBARCH',lower(cputypestr[init_settings.cputype]));
+         Replace(s,'$FPCABI',lower(abiinfo[target_info.abi].name));
+{$ifdef i8086}
+         Replace(s,'$FPCMEMORYMODEL',lower(x86memorymodelstr[init_settings.x86memorymodel]));
+{$else i8086}
+         Replace(s,'$FPCMEMORYMODEL','flat');
+{$endif i8086}
 {$ifdef mswindows}
 {$ifdef mswindows}
          ReplaceSpecialFolder('$LOCAL_APPDATA',CSIDL_LOCAL_APPDATA);
          ReplaceSpecialFolder('$LOCAL_APPDATA',CSIDL_LOCAL_APPDATA);
          ReplaceSpecialFolder('$APPDATA',CSIDL_APPDATA);
          ReplaceSpecialFolder('$APPDATA',CSIDL_APPDATA);
@@ -967,7 +970,7 @@ implementation
           result := -1;
           result := -1;
       end;
       end;
 
 
-    function convertdoublerec(d : tdoublerec) : tdoublerec;{$ifdef USEINLINE}inline;{$endif}
+    function convertdoublerec(d : tcompdoublerec) : tcompdoublerec;{$ifdef USEINLINE}inline;{$endif}
 {$ifdef CPUARM}
 {$ifdef CPUARM}
       var
       var
         i : longint;
         i : longint;
@@ -1176,23 +1179,34 @@ implementation
       end;
       end;
 
 
 
 
-{$if defined(arm) or defined(avr) or defined(mipsel)}
     function SetControllerType(const s:string;var a:tcontrollertype):boolean;
     function SetControllerType(const s:string;var a:tcontrollertype):boolean;
       var
       var
         t  : tcontrollertype;
         t  : tcontrollertype;
         hs : string;
         hs : string;
       begin
       begin
-        result:=false;
-        hs:=Upper(s);
-        for t:=low(tcontrollertype) to high(tcontrollertype) do
-          if embedded_controllers[t].controllertypestr=hs then
-            begin
-              a:=t;
-              result:=true;
-              break;
-            end;
+{ The following check allows to reduce amount of code for platforms  }
+{ not supporting microcontrollers due to evaluation at compile time. }
+{$PUSH}
+ {$WARN 6018 OFF} (* Unreachable code due to compile time evaluation *)
+        if ControllerSupport then
+         begin
+          result:=false;
+          hs:=Upper(s);
+          for t:=low(tcontrollertype) to high(tcontrollertype) do
+            if embedded_controllers[t].controllertypestr=hs then
+              begin
+                a:=t;
+                result:=true;
+                break;
+              end;
+         end
+        else
+         begin
+          a := ct_none;
+          Result := true;
+         end;
+{$POP}
       end;
       end;
-{$endif defined(arm) or defined(avr) or defined(mipsel)}
 
 
 
 
     function IncludeFeature(const s : string) : boolean;
     function IncludeFeature(const s : string) : boolean;

+ 20 - 8
compiler/globtype.pas

@@ -110,12 +110,12 @@ interface
 {$endif i8086}
 {$endif i8086}
 
 
        { Use a variant record to be sure that the array if aligned correctly }
        { Use a variant record to be sure that the array if aligned correctly }
-       tdoublerec=record
+       tcompdoublerec=record
          case byte of
          case byte of
            0 : (bytes:array[0..7] of byte);
            0 : (bytes:array[0..7] of byte);
            1 : (value:double);
            1 : (value:double);
        end;
        end;
-       textendedrec=record
+       tcompextendedrec=record
          case byte of
          case byte of
            0 : (bytes:array[0..9] of byte);
            0 : (bytes:array[0..9] of byte);
            1 : (value:extended);
            1 : (value:extended);
@@ -267,7 +267,7 @@ interface
      type
      type
        { optimizer }
        { optimizer }
        toptimizerswitch = (cs_opt_none,
        toptimizerswitch = (cs_opt_none,
-         cs_opt_level1,cs_opt_level2,cs_opt_level3,
+         cs_opt_level1,cs_opt_level2,cs_opt_level3,cs_opt_level4,
          cs_opt_regvar,cs_opt_uncertain,cs_opt_size,cs_opt_stackframe,
          cs_opt_regvar,cs_opt_uncertain,cs_opt_size,cs_opt_stackframe,
          cs_opt_peephole,cs_opt_asmcse,cs_opt_loopunroll,cs_opt_tailrecursion,cs_opt_nodecse,
          cs_opt_peephole,cs_opt_asmcse,cs_opt_loopunroll,cs_opt_tailrecursion,cs_opt_nodecse,
          cs_opt_nodedfa,cs_opt_loopstrength,cs_opt_scheduler,cs_opt_autoinline,cs_useebp,cs_userbp,
          cs_opt_nodedfa,cs_opt_loopstrength,cs_opt_scheduler,cs_opt_autoinline,cs_useebp,cs_userbp,
@@ -313,7 +313,7 @@ interface
 
 
     const
     const
        OptimizerSwitchStr : array[toptimizerswitch] of string[17] = ('',
        OptimizerSwitchStr : array[toptimizerswitch] of string[17] = ('',
-         'LEVEL1','LEVEL2','LEVEL3',
+         'LEVEL1','LEVEL2','LEVEL3','LEVEL4',
          'REGVAR','UNCERTAIN','SIZE','STACKFRAME',
          'REGVAR','UNCERTAIN','SIZE','STACKFRAME',
          'PEEPHOLE','ASMCSE','LOOPUNROLL','TAILREC','CSE',
          'PEEPHOLE','ASMCSE','LOOPUNROLL','TAILREC','CSE',
          'DFA','STRENGTH','SCHEDULE','AUTOINLINE','USEEBP','USERBP',
          'DFA','STRENGTH','SCHEDULE','AUTOINLINE','USEEBP','USERBP',
@@ -345,7 +345,7 @@ interface
        genericlevel1optimizerswitches = [cs_opt_level1,cs_opt_peephole];
        genericlevel1optimizerswitches = [cs_opt_level1,cs_opt_peephole];
        genericlevel2optimizerswitches = [cs_opt_level2,cs_opt_remove_emtpy_proc];
        genericlevel2optimizerswitches = [cs_opt_level2,cs_opt_remove_emtpy_proc];
        genericlevel3optimizerswitches = [cs_opt_level3,cs_opt_constant_propagate,cs_opt_nodedfa];
        genericlevel3optimizerswitches = [cs_opt_level3,cs_opt_constant_propagate,cs_opt_nodedfa];
-       genericlevel4optimizerswitches = [cs_opt_reorder_fields,cs_opt_dead_values,cs_opt_fastmath];
+       genericlevel4optimizerswitches = [cs_opt_level4,cs_opt_reorder_fields,cs_opt_dead_values,cs_opt_fastmath];
 
 
        { whole program optimizations whose information generation requires
        { whole program optimizations whose information generation requires
          information from all loaded units
          information from all loaded units
@@ -400,8 +400,9 @@ interface
                                   fields in Java) }
                                   fields in Java) }
          m_default_unicodestring, { makes the default string type in $h+ mode unicodestring rather than
          m_default_unicodestring, { makes the default string type in $h+ mode unicodestring rather than
                                     ansistring; similarly, char becomes unicodechar rather than ansichar }
                                     ansistring; similarly, char becomes unicodechar rather than ansichar }
-         m_type_helpers         { allows the declaration of "type helper" (non-Delphi) or "record helper"
+         m_type_helpers,        { allows the declaration of "type helper" (non-Delphi) or "record helper"
                                   (Delphi) for primitive types }
                                   (Delphi) for primitive types }
+         m_blocks               { support for http://en.wikipedia.org/wiki/Blocks_(C_language_extension) }
        );
        );
        tmodeswitches = set of tmodeswitch;
        tmodeswitches = set of tmodeswitch;
 
 
@@ -566,7 +567,8 @@ interface
          'SYSTEMCODEPAGE',
          'SYSTEMCODEPAGE',
          'FINALFIELDS',
          'FINALFIELDS',
          'UNICODESTRINGS',
          'UNICODESTRINGS',
-         'TYPEHELPERS');
+         'TYPEHELPERS',
+         'CBLOCKS');
 
 
 
 
      type
      type
@@ -615,7 +617,9 @@ interface
          { allocates memory on stack, so stack is unbalanced on exit }
          { allocates memory on stack, so stack is unbalanced on exit }
          pi_has_stack_allocs,
          pi_has_stack_allocs,
          { set if the stack frame of the procedure is estimated }
          { set if the stack frame of the procedure is estimated }
-         pi_estimatestacksize
+         pi_estimatestacksize,
+         { the routine calls a C-style varargs function }
+         pi_calls_c_varargs
        );
        );
        tprocinfoflags=set of tprocinfoflag;
        tprocinfoflags=set of tprocinfoflag;
 
 
@@ -705,6 +709,14 @@ interface
 
 
     type
     type
       tx86memorymodel = (mm_tiny,mm_small,mm_medium,mm_compact,mm_large,mm_huge);
       tx86memorymodel = (mm_tiny,mm_small,mm_medium,mm_compact,mm_large,mm_huge);
+    const
+      x86memorymodelstr : array[tx86memorymodel] of string[7]=(
+        'TINY',
+        'SMALL',
+        'MEDIUM',
+        'COMPACT',
+        'LARGE',
+        'HUGE');
 
 
   { hide Sysutils.ExecuteProcess in units using this one after SysUtils}
   { hide Sysutils.ExecuteProcess in units using this one after SysUtils}
   const
   const

+ 4 - 3
compiler/hlcg2ll.pas

@@ -171,7 +171,7 @@ unit hlcg2ll;
           procedure a_loadaddr_ref_reg(list : TAsmList;fromsize, tosize : tdef;const ref : treference;r : tregister);override;
           procedure a_loadaddr_ref_reg(list : TAsmList;fromsize, tosize : tdef;const ref : treference;r : tregister);override;
 
 
           { bit scan instructions }
           { bit scan instructions }
-          procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tdef; src, dst: tregister); override;
+          procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tdef; src, dst: tregister); override;
 
 
           { fpu move instructions }
           { fpu move instructions }
           procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister); override;
           procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister); override;
@@ -580,9 +580,9 @@ implementation
       cg.a_loadaddr_ref_reg(list,ref,r);
       cg.a_loadaddr_ref_reg(list,ref,r);
     end;
     end;
 
 
-  procedure thlcg2ll.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tdef; src, dst: tregister);
+  procedure thlcg2ll.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tdef; src, dst: tregister);
     begin
     begin
-      cg.a_bit_scan_reg_reg(list,reverse,def_cgsize(size),src,dst);
+      cg.a_bit_scan_reg_reg(list,reverse,def_cgsize(srcsize),def_cgsize(dstsize),src,dst);
     end;
     end;
 
 
   procedure thlcg2ll.a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister);
   procedure thlcg2ll.a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister);
@@ -650,6 +650,7 @@ implementation
         internalerror(2012071226);
         internalerror(2012071226);
       tocgsize:=getintmmcgsize(reg,def_cgmmsize(tosize));
       tocgsize:=getintmmcgsize(reg,def_cgmmsize(tosize));
       case loc.loc of
       case loc.loc of
+        LOC_CONSTANT,
         LOC_SUBSETREG,LOC_CSUBSETREG,
         LOC_SUBSETREG,LOC_CSUBSETREG,
         LOC_SUBSETREF,LOC_CSUBSETREF:
         LOC_SUBSETREF,LOC_CSUBSETREF:
           begin
           begin

+ 30 - 14
compiler/hlcgobj.pas

@@ -306,7 +306,7 @@ unit hlcgobj;
          public
          public
 
 
           { bit scan instructions (still need transformation to thlcgobj) }
           { bit scan instructions (still need transformation to thlcgobj) }
-          procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tdef; src, dst: tregister); virtual; abstract;
+          procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tdef; src, dst: tregister); virtual; abstract;
 
 
           { fpu move instructions }
           { fpu move instructions }
           procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister); virtual; abstract;
           procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister); virtual; abstract;
@@ -591,9 +591,9 @@ unit hlcgobj;
           procedure paravarsym_set_initialloc_to_paraloc(vs: tparavarsym); virtual;
           procedure paravarsym_set_initialloc_to_paraloc(vs: tparavarsym); virtual;
 
 
           procedure gen_load_para_value(list:TAsmList);virtual;
           procedure gen_load_para_value(list:TAsmList);virtual;
-         protected
           { helpers called by gen_load_para_value }
           { helpers called by gen_load_para_value }
           procedure g_copyvalueparas(p:TObject;arg:pointer);virtual;
           procedure g_copyvalueparas(p:TObject;arg:pointer);virtual;
+         protected
           procedure gen_loadfpu_loc_cgpara(list: TAsmList; size: tdef; const l: tlocation;const cgpara: tcgpara;locintsize: longint);virtual;
           procedure gen_loadfpu_loc_cgpara(list: TAsmList; size: tdef; const l: tlocation;const cgpara: tcgpara;locintsize: longint);virtual;
           procedure init_paras(p:TObject;arg:pointer);
           procedure init_paras(p:TObject;arg:pointer);
          protected
          protected
@@ -2384,7 +2384,9 @@ implementation
 
 
   function thlcgobj.get_bit_reg_ref_sref(list: TAsmList; bitnumbersize, refsize: tdef; bitnumber: tregister; const ref: treference): tsubsetreference;
   function thlcgobj.get_bit_reg_ref_sref(list: TAsmList; bitnumbersize, refsize: tdef; bitnumber: tregister; const ref: treference): tsubsetreference;
     var
     var
-      tmpreg: tregister;
+      refptrdef: tdef;
+      tmpreg,
+      newbase: tregister;
     begin
     begin
       result.ref:=ref;
       result.ref:=ref;
       result.startbit:=0;
       result.startbit:=0;
@@ -2396,13 +2398,15 @@ implementation
 
 
       { don't assign to ref.base, that one is for pointers and this is an index
       { don't assign to ref.base, that one is for pointers and this is an index
         (important for platforms like LLVM) }
         (important for platforms like LLVM) }
-      if (result.ref.index=NR_NO) then
-        result.ref.index:=tmpreg
-      else
+      if result.ref.index<>NR_NO then
         begin
         begin
-          a_op_reg_reg(list,OP_ADD,ptruinttype,result.ref.index,tmpreg);
-          result.ref.index:=tmpreg;
+          { don't just add to ref.index, as it may be scaled }
+          refptrdef:=getpointerdef(refsize);
+          newbase:=getaddressregister(list,refptrdef);
+          a_loadaddr_ref_reg(list,refsize,refptrdef,ref,newbase);
+          reference_reset_base(result.ref,refptrdef,newbase,0,result.ref.alignment);
         end;
         end;
+      result.ref.index:=tmpreg;
       tmpreg:=getintregister(list,ptruinttype);
       tmpreg:=getintregister(list,ptruinttype);
       a_load_reg_reg(list,bitnumbersize,ptruinttype,bitnumber,tmpreg);
       a_load_reg_reg(list,bitnumbersize,ptruinttype,bitnumber,tmpreg);
       a_op_const_reg(list,OP_AND,ptruinttype,7,tmpreg);
       a_op_const_reg(list,OP_AND,ptruinttype,7,tmpreg);
@@ -4571,7 +4575,7 @@ implementation
     var
     var
       href : treference;
       href : treference;
     begin
     begin
-      if (tsym(p).typ=staticvarsym) then
+      if (tsym(p).typ=staticvarsym) and not(tstaticvarsym(p).noregvarinitneeded) then
        begin
        begin
          { Static variables can have the initialloc only set to LOC_CxREGISTER
          { Static variables can have the initialloc only set to LOC_CxREGISTER
            or LOC_INVALID, for explaination see gen_alloc_symtable (PFV) }
            or LOC_INVALID, for explaination see gen_alloc_symtable (PFV) }
@@ -4703,7 +4707,11 @@ implementation
          if (tparavarsym(p).varspez=vs_value) then
          if (tparavarsym(p).varspez=vs_value) then
           begin
           begin
             include(current_procinfo.flags,pi_needs_implicit_finally);
             include(current_procinfo.flags,pi_needs_implicit_finally);
-            location_get_data_ref(list,tparavarsym(p).vardef,tparavarsym(p).localloc,href,is_open_array(tparavarsym(p).vardef),sizeof(pint));
+            location_get_data_ref(list,tparavarsym(p).vardef,tparavarsym(p).localloc,href,
+              is_open_array(tparavarsym(p).vardef) or
+              ((target_info.system in systems_caller_copy_addr_value_para) and
+               paramanager.push_addr_param(vs_value,tparavarsym(p).vardef,current_procinfo.procdef.proccalloption)),
+              sizeof(pint));
             if is_open_array(tparavarsym(p).vardef) then
             if is_open_array(tparavarsym(p).vardef) then
               begin
               begin
                 if paramanager.push_high_param(tparavarsym(p).varspez,tparavarsym(p).vardef,current_procinfo.procdef.proccalloption) then
                 if paramanager.push_high_param(tparavarsym(p).varspez,tparavarsym(p).vardef,current_procinfo.procdef.proccalloption) then
@@ -4723,7 +4731,8 @@ implementation
           end;
           end;
        end;
        end;
       { open arrays can contain elements requiring init/final code, so the else has been removed here }
       { open arrays can contain elements requiring init/final code, so the else has been removed here }
-      if (tparavarsym(p).varspez=vs_value) and
+      if not(target_info.system in systems_caller_copy_addr_value_para) and
+         (tparavarsym(p).varspez=vs_value) and
          (is_open_array(tparavarsym(p).vardef) or
          (is_open_array(tparavarsym(p).vardef) or
           is_array_of_const(tparavarsym(p).vardef)) then
           is_array_of_const(tparavarsym(p).vardef)) then
         begin
         begin
@@ -4761,7 +4770,11 @@ implementation
                  if not((tparavarsym(p).vardef.typ=variantdef) and
                  if not((tparavarsym(p).vardef.typ=variantdef) and
                    paramanager.push_addr_param(tparavarsym(p).varspez,tparavarsym(p).vardef,current_procinfo.procdef.proccalloption)) then
                    paramanager.push_addr_param(tparavarsym(p).varspez,tparavarsym(p).vardef,current_procinfo.procdef.proccalloption)) then
                    begin
                    begin
-                     location_get_data_ref(list,tparavarsym(p).vardef,tparavarsym(p).initialloc,href,is_open_array(tparavarsym(p).vardef),sizeof(pint));
+                     location_get_data_ref(list,tparavarsym(p).vardef,tparavarsym(p).initialloc,href,
+                       is_open_array(tparavarsym(p).vardef) or
+                       ((target_info.system in systems_caller_copy_addr_value_para) and
+                        paramanager.push_addr_param(vs_value,tparavarsym(p).vardef,current_procinfo.procdef.proccalloption)),
+                       sizeof(pint));
                      if is_open_array(tparavarsym(p).vardef) then
                      if is_open_array(tparavarsym(p).vardef) then
                        begin
                        begin
                          if paramanager.push_high_param(tparavarsym(p).varspez,tparavarsym(p).vardef,current_procinfo.procdef.proccalloption) then
                          if paramanager.push_high_param(tparavarsym(p).varspez,tparavarsym(p).vardef,current_procinfo.procdef.proccalloption) then
@@ -4862,8 +4875,11 @@ implementation
     begin
     begin
       list:=TAsmList(arg);
       list:=TAsmList(arg);
       if (tsym(p).typ=paravarsym) and
       if (tsym(p).typ=paravarsym) and
-         (tparavarsym(p).varspez=vs_value) and
-        (paramanager.push_addr_param(tparavarsym(p).varspez,tparavarsym(p).vardef,current_procinfo.procdef.proccalloption)) then
+         ((vo_has_local_copy in tparavarsym(p).varoptions) or
+          (not(target_info.system in systems_caller_copy_addr_value_para) and
+           (is_open_array(tparavarsym(p).vardef) or
+            is_array_of_const(tparavarsym(p).vardef)) and
+           (tparavarsym(p).varspez=vs_value))) then
         begin
         begin
           { we have no idea about the alignment at the caller side }
           { we have no idea about the alignment at the caller side }
           location_get_data_ref(list,tparavarsym(p).vardef,tparavarsym(p).initialloc,href,true,1);
           location_get_data_ref(list,tparavarsym(p).vardef,tparavarsym(p).initialloc,href,true,1);

+ 63 - 22
compiler/htypechk.pas

@@ -26,7 +26,7 @@ unit htypechk;
 interface
 interface
 
 
     uses
     uses
-      cclasses,tokens,cpuinfo,
+      cclasses,cmsgs,tokens,cpuinfo,
       node,globtype,
       node,globtype,
       symconst,symtype,symdef,symsym,symbase;
       symconst,symtype,symdef,symsym,symbase;
 
 
@@ -178,6 +178,8 @@ interface
       arrays, records and objects are checked recursively }
       arrays, records and objects are checked recursively }
     function is_valid_for_default(def:tdef):boolean;
     function is_valid_for_default(def:tdef):boolean;
 
 
+    procedure UninitializedVariableMessage(pos : tfileposinfo;warning,local,managed : boolean;name : TMsgStr);
+
 implementation
 implementation
 
 
     uses
     uses
@@ -1092,6 +1094,23 @@ implementation
       end;
       end;
 
 
 
 
+    procedure UninitializedVariableMessage(pos : tfileposinfo;warning,local,managed : boolean;name : TMsgStr);
+      const
+        msg : array[false..true,false..true,false..true] of dword = (
+            (
+              (sym_h_uninitialized_variable,sym_h_uninitialized_managed_variable),
+              (sym_h_uninitialized_local_variable,sym_h_uninitialized_managed_local_variable)
+            ),
+            (
+              (sym_w_uninitialized_variable,sym_w_uninitialized_managed_variable),
+              (sym_w_uninitialized_local_variable,sym_w_uninitialized_managed_local_variable)
+            )
+          );
+      begin
+        CGMessagePos1(pos,msg[warning,local,managed],name);
+      end;
+
+
     procedure set_varstate(p:tnode;newstate:tvarstate;varstateflags:tvarstateflags);
     procedure set_varstate(p:tnode;newstate:tvarstate;varstateflags:tvarstateflags);
       const
       const
         vstrans: array[tvarstate,tvarstate] of tvarstate = (
         vstrans: array[tvarstate,tvarstate] of tvarstate = (
@@ -1197,32 +1216,29 @@ implementation
                                  if (vo_is_funcret in hsym.varoptions) then
                                  if (vo_is_funcret in hsym.varoptions) then
                                    begin
                                    begin
                                      if (vsf_use_hints in varstateflags) then
                                      if (vsf_use_hints in varstateflags) then
-                                       CGMessagePos(p.fileinfo,sym_h_function_result_uninitialized)
-                                     else
-                                       CGMessagePos(p.fileinfo,sym_w_function_result_uninitialized)
-                                   end
-                                 else
-                                   begin
-                                     if tloadnode(p).symtable.symtabletype=localsymtable then
                                        begin
                                        begin
-                                         { on the JVM, an uninitialized var-parameter
-                                           is just as fatal as a nil pointer dereference }
-                                         if (vsf_use_hints in varstateflags) and
-                                            not(target_info.system in systems_jvm) then
-                                           CGMessagePos1(p.fileinfo,sym_h_uninitialized_local_variable,hsym.realname)
+                                         if is_managed_type(hsym.vardef) then
+                                           CGMessagePos(p.fileinfo,sym_h_managed_function_result_uninitialized)
                                          else
                                          else
-                                           CGMessagePos1(p.fileinfo,sym_w_uninitialized_local_variable,hsym.realname);
+                                           CGMessagePos(p.fileinfo,sym_h_function_result_uninitialized);
                                        end
                                        end
                                      else
                                      else
                                        begin
                                        begin
-                                         { on the JVM, an uninitialized var-parameter
-                                           is just as fatal as a nil pointer dereference }
-                                         if (vsf_use_hints in varstateflags) and
-                                            not(target_info.system in systems_jvm) then
-                                           CGMessagePos1(p.fileinfo,sym_h_uninitialized_variable,hsym.realname)
+                                         if is_managed_type(hsym.vardef) then
+                                           CGMessagePos(p.fileinfo,sym_w_managed_function_result_uninitialized)
                                          else
                                          else
-                                           CGMessagePos1(p.fileinfo,sym_w_uninitialized_variable,hsym.realname);
+                                          CGMessagePos(p.fileinfo,sym_w_function_result_uninitialized);
                                        end;
                                        end;
+                                   end
+                                 else
+                                   begin
+                                     UninitializedVariableMessage(p.fileinfo,
+                                       { on the JVM, an uninitialized var-parameter
+                                         is just as fatal as a nil pointer dereference }
+                                       not((vsf_use_hints in varstateflags) and not(target_info.system in systems_jvm)),
+                                       tloadnode(p).symtable.symtabletype=localsymtable,
+                                       is_managed_type(tloadnode(p).resultdef),
+                                       hsym.realname);
                                    end;
                                    end;
                                end;
                                end;
                            end
                            end
@@ -2227,7 +2243,7 @@ implementation
                  break;
                  break;
              end;
              end;
            if is_objectpascal_helper(structdef) and
            if is_objectpascal_helper(structdef) and
-              (tobjectdef(structdef).typ in [recorddef,objectdef]) then
+              (tobjectdef(structdef).extendeddef.typ in [recorddef,objectdef]) then
              begin
              begin
                { search methods in the extended type as well }
                { search methods in the extended type as well }
                srsym:=tprocsym(tabstractrecorddef(tobjectdef(structdef).extendeddef).symtable.FindWithHash(hashedid));
                srsym:=tprocsym(tabstractrecorddef(tobjectdef(structdef).extendeddef).symtable.FindWithHash(hashedid));
@@ -2583,7 +2599,8 @@ implementation
         def_to   : tdef;
         def_to   : tdef;
         currpt,
         currpt,
         pt       : tcallparanode;
         pt       : tcallparanode;
-        eq       : tequaltype;
+        eq,
+        mineq    : tequaltype;
         convtype : tconverttype;
         convtype : tconverttype;
         pdtemp,
         pdtemp,
         pdoper   : tprocdef;
         pdoper   : tprocdef;
@@ -2763,6 +2780,30 @@ implementation
                    eq:=compare_defs_ext(n.resultdef,def_to,n.nodetype,convtype,pdoper,cdoptions);
                    eq:=compare_defs_ext(n.resultdef,def_to,n.nodetype,convtype,pdoper,cdoptions);
                    n.free;
                    n.free;
                  end
                  end
+              else if is_open_array(def_to) and
+                      is_class_or_interface_or_dispinterface_or_objc_or_java(tarraydef(def_to).elementdef) and
+                      is_array_constructor(currpt.left.resultdef) and
+                      assigned(tarrayconstructornode(currpt.left).left) then
+                begin
+                  { ensure that [nil] can be converted to "array of tobject",
+                    because if we just try to convert "array of pointer" to
+                    "array of tobject", we get type conversion errors in
+                    non-Delphi modes }
+                  n:=currpt.left;
+                  mineq:=te_exact;
+                  repeat
+                    if tarrayconstructornode(n).left.nodetype=arrayconstructorrangen then
+                      eq:=te_incompatible
+                    else
+                      eq:=compare_defs_ext(tarrayconstructornode(n).left.resultdef,tarraydef(def_to).elementdef,tarrayconstructornode(n).left.nodetype,convtype,pdoper,cdoptions);
+                    if eq<mineq then
+                      mineq:=eq;
+                    if eq=te_incompatible then
+                      break;
+                    n:=tarrayconstructornode(n).right;
+                  until not assigned(n);
+                  eq:=mineq;
+                end
               else
               else
               { generic type comparision }
               { generic type comparision }
                begin
                begin

+ 8 - 1
compiler/i386/cgcpu.pas

@@ -310,6 +310,13 @@ unit cgcpu;
         end;
         end;
 
 
       begin
       begin
+        { Release PIC register }
+        if (cs_create_pic in current_settings.moduleswitches) and
+           (tf_pic_uses_got in target_info.flags) and
+           (pi_needs_got in current_procinfo.flags) and
+           not(target_info.system in systems_darwin) then
+          list.concat(tai_regalloc.dealloc(NR_PIC_OFFSET_REG,nil));
+
         { MMX needs to call EMMS }
         { MMX needs to call EMMS }
         if assigned(rg[R_MMXREGISTER]) and
         if assigned(rg[R_MMXREGISTER]) and
            (rg[R_MMXREGISTER].uses_registers) then
            (rg[R_MMXREGISTER].uses_registers) then
@@ -332,7 +339,7 @@ unit cgcpu;
               begin
               begin
                 if (not paramanager.use_fixed_stack) then
                 if (not paramanager.use_fixed_stack) then
                   internal_restore_regs(list,not (pi_has_stack_allocs in current_procinfo.flags));
                   internal_restore_regs(list,not (pi_has_stack_allocs in current_procinfo.flags));
-                list.concat(Taicpu.op_none(A_LEAVE,S_NO));
+                generate_leave(list);
               end;
               end;
             list.concat(tai_regalloc.dealloc(current_procinfo.framepointer,nil));
             list.concat(tai_regalloc.dealloc(current_procinfo.framepointer,nil));
           end;
           end;

+ 29 - 8
compiler/i386/cpuinfo.pas

@@ -30,6 +30,9 @@ Interface
 
 
 Type
 Type
    bestreal = extended;
    bestreal = extended;
+{$if FPC_FULLVERSION>20700}
+   bestrealrec = TExtended80Rec;
+{$endif FPC_FULLVERSION>20700}
    ts32real = single;
    ts32real = single;
    ts64real = double;
    ts64real = double;
    ts80real = extended;
    ts80real = extended;
@@ -66,8 +69,25 @@ Type
       fpu_avx2
       fpu_avx2
      );
      );
 
 
+   tcontrollertype =
+     (ct_none
+     );
+
 
 
 Const
 Const
+   { Is there support for dealing with multiple microcontrollers available }
+   { for this platform? }
+   ControllerSupport = false;
+
+   { We know that there are fields after sramsize
+     but we don't care about this warning }
+   {$PUSH}
+    {$WARN 3177 OFF}
+   embedded_controllers : array [tcontrollertype] of tcontrollerdatatype =
+   (
+      (controllertypestr:''; controllerunitstr:''; flashbase:0; flashsize:0; srambase:0; sramsize:0));
+   {$POP}
+
    { calling conventions supported by the code generator }
    { calling conventions supported by the code generator }
    supported_calling_conventions : tproccalloptions = [
    supported_calling_conventions : tproccalloptions = [
      pocall_internproc,
      pocall_internproc,
@@ -131,7 +151,8 @@ Const
 
 
 type
 type
    tcpuflags =
    tcpuflags =
-      (CPUX86_HAS_SSEUNIT,
+      (CPUX86_HAS_CMOV,
+       CPUX86_HAS_SSEUNIT,
        CPUX86_HAS_BMI1,
        CPUX86_HAS_BMI1,
        CPUX86_HAS_BMI2,
        CPUX86_HAS_BMI2,
        CPUX86_HAS_POPCNT,
        CPUX86_HAS_POPCNT,
@@ -147,13 +168,13 @@ type
      { cpu_none      } [],
      { cpu_none      } [],
      { cpu_386       } [],
      { cpu_386       } [],
      { cpu_Pentium   } [],
      { cpu_Pentium   } [],
-     { cpu_Pentium2  } [],
-     { cpu_Pentium3  } [CPUX86_HAS_SSEUNIT],
-     { cpu_Pentium4  } [CPUX86_HAS_SSEUNIT],
-     { cpu_PentiumM  } [CPUX86_HAS_SSEUNIT],
-     { cpu_core_i    } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT],
-     { cpu_core_avx  } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT],
-     { cpu_core_avx2 } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT,CPUX86_HAS_BMI1,CPUX86_HAS_BMI2,CPUX86_HAS_LZCNT,CPUX86_HAS_MOVBE,CPUX86_HAS_FMA]
+     { cpu_Pentium2  } [CPUX86_HAS_CMOV],
+     { cpu_Pentium3  } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT],
+     { cpu_Pentium4  } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT],
+     { cpu_PentiumM  } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT],
+     { cpu_core_i    } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT],
+     { cpu_core_avx  } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT],
+     { cpu_core_avx2 } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT,CPUX86_HAS_BMI1,CPUX86_HAS_BMI2,CPUX86_HAS_LZCNT,CPUX86_HAS_MOVBE,CPUX86_HAS_FMA]
    );
    );
 
 
 
 

+ 20 - 1
compiler/i386/cpupara.pas

@@ -114,6 +114,23 @@ unit cpupara;
                   end;
                   end;
               end;
               end;
             end;
             end;
+          system_i386_os2,
+          system_i386_emx:
+            begin
+              case def.typ of
+                recorddef :
+                  begin
+                    { EMX port of GCC returns small records in the FUNCTION_RETURN_REG up to 4 bytes in registers. }
+                    if ((pd.proccalloption in [pocall_cdecl,pocall_cppdecl]) and
+                        (def.size>0) and
+                        (def.size<=4)) then
+                     begin
+                       result:=false;
+                       exit;
+                     end;
+                  end;
+              end;
+            end;
           system_i386_freebsd,
           system_i386_freebsd,
           system_i386_openbsd,
           system_i386_openbsd,
           system_i386_darwin,
           system_i386_darwin,
@@ -283,7 +300,9 @@ unit cpupara;
           usedef:=forcetempdef;
           usedef:=forcetempdef;
         { on darwin/i386, if a record has only one field and that field is a
         { on darwin/i386, if a record has only one field and that field is a
           single or double, it has to be returned like a single/double }
           single or double, it has to be returned like a single/double }
-        if (target_info.system in [system_i386_darwin,system_i386_iphonesim]) and
+        if (target_info.system in [system_i386_darwin,system_i386_iphonesim,
+                                   system_i386_freebsd,system_i386_openbsd,
+                                   system_i386_os2,system_i386_emx]) and
            ((usedef.typ=recorddef) or
            ((usedef.typ=recorddef) or
             is_object(usedef)) and
             is_object(usedef)) and
            tabstractrecordsymtable(tabstractrecorddef(usedef).symtable).has_single_field(sym) and
            tabstractrecordsymtable(tabstractrecorddef(usedef).symtable).has_single_field(sym) and

+ 8 - 0
compiler/i386/i386att.inc

@@ -956,6 +956,14 @@
 'vpsravd',
 'vpsravd',
 'vpsrlvd',
 'vpsrlvd',
 'vpsrlvq',
 'vpsrlvq',
+'vgatherdpd',
+'vgatherdps',
+'vgatherqpd',
+'vgatherqps',
+'vpgatherdd',
+'vpgatherdq',
+'vpgatherqd',
+'vpgatherqq',
 'vfmadd132pd',
 'vfmadd132pd',
 'vfmadd213pd',
 'vfmadd213pd',
 'vfmadd231pd',
 'vfmadd231pd',

+ 8 - 0
compiler/i386/i386atts.inc

@@ -1016,5 +1016,13 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufNONE
 attsufNONE
 );
 );

+ 8 - 0
compiler/i386/i386int.inc

@@ -956,6 +956,14 @@
 'vpsravd',
 'vpsravd',
 'vpsrlvd',
 'vpsrlvd',
 'vpsrlvq',
 'vpsrlvq',
+'vgatherdpd',
+'vgatherdps',
+'vgatherqpd',
+'vgatherqps',
+'vpgatherdd',
+'vpgatherdq',
+'vpgatherqd',
+'vpgatherqq',
 'vfmadd132pd',
 'vfmadd132pd',
 'vfmadd213pd',
 'vfmadd213pd',
 'vfmadd231pd',
 'vfmadd231pd',

+ 1 - 1
compiler/i386/i386nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
 { don't edit, this file is generated from x86ins.dat }
-1926;
+1942;

+ 8 - 0
compiler/i386/i386op.inc

@@ -956,6 +956,14 @@ A_VPSLLVQ,
 A_VPSRAVD,
 A_VPSRAVD,
 A_VPSRLVD,
 A_VPSRLVD,
 A_VPSRLVQ,
 A_VPSRLVQ,
+A_VGATHERDPD,
+A_VGATHERDPS,
+A_VGATHERQPD,
+A_VGATHERQPS,
+A_VPGATHERDD,
+A_VPGATHERDQ,
+A_VPGATHERQD,
+A_VPGATHERQQ,
 A_VFMADD132PD,
 A_VFMADD132PD,
 A_VFMADD213PD,
 A_VFMADD213PD,
 A_VFMADD231PD,
 A_VFMADD231PD,

+ 9 - 1
compiler/i386/i386prop.inc

@@ -910,7 +910,7 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
-(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
@@ -956,6 +956,14 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),

Some files were not shown because too many files changed in this diff