ソースを参照

merge with trunk

git-svn-id: branches/tg74/avx2@29994 -
tg74 10 年 前
コミット
634338dcd2
100 ファイル変更9983 行追加3069 行削除
  1. 290 48
      .gitattributes
  2. 18 27
      .gitignore
  3. 94 13
      Makefile
  4. 8 5
      Makefile.fpc
  5. 125 12
      compiler/Makefile
  6. 13 7
      compiler/Makefile.fpc
  7. 93 35
      compiler/aarch64/a64att.inc
  8. 58 0
      compiler/aarch64/a64atts.inc
  9. 159 43
      compiler/aarch64/a64ins.dat
  10. 93 35
      compiler/aarch64/a64op.inc
  11. 168 161
      compiler/aarch64/a64reg.dat
  12. 462 1125
      compiler/aarch64/aasmcpu.pas
  13. 288 0
      compiler/aarch64/agcpugas.pas
  14. 25 7
      compiler/aarch64/aoptcpub.pas
  15. 2284 0
      compiler/aarch64/cgcpu.pas
  16. 200 38
      compiler/aarch64/cpubase.pas
  17. 17 2
      compiler/aarch64/cpuinfo.pas
  18. 40 0
      compiler/aarch64/cpunode.pas
  19. 457 521
      compiler/aarch64/cpupara.pas
  20. 68 0
      compiler/aarch64/cpupi.pas
  21. 70 0
      compiler/aarch64/cputarg.pas
  22. 156 0
      compiler/aarch64/hlcgcpu.pas
  23. 398 0
      compiler/aarch64/ncpuadd.pas
  24. 201 0
      compiler/aarch64/ncpucnv.pas
  25. 184 0
      compiler/aarch64/ncpuinl.pas
  26. 187 0
      compiler/aarch64/ncpumat.pas
  27. 142 0
      compiler/aarch64/ncpumem.pas
  28. 175 0
      compiler/aarch64/ncpuset.pas
  29. 5 0
      compiler/aarch64/ra64con.inc
  30. 162 157
      compiler/aarch64/ra64dwa.inc
  31. 1 1
      compiler/aarch64/ra64nor.inc
  32. 6 1
      compiler/aarch64/ra64num.inc
  33. 101 96
      compiler/aarch64/ra64rni.inc
  34. 48 43
      compiler/aarch64/ra64sri.inc
  35. 162 157
      compiler/aarch64/ra64sta.inc
  36. 6 1
      compiler/aarch64/ra64std.inc
  37. 5 0
      compiler/aarch64/ra64sup.inc
  38. 88 0
      compiler/aarch64/racpu.pas
  39. 1053 0
      compiler/aarch64/racpugas.pas
  40. 171 0
      compiler/aarch64/rgcpu.pas
  41. 4 2
      compiler/aasmbase.pas
  42. 15 16
      compiler/aasmdata.pas
  43. 34 9
      compiler/aasmtai.pas
  44. 16 3
      compiler/aggas.pas
  45. 4 0
      compiler/agjasmin.pas
  46. 23 0
      compiler/alpha/cpuinfo.pas
  47. 22 5
      compiler/aoptobj.pas
  48. 294 21
      compiler/arm/aasmcpu.pas
  49. 1 2
      compiler/arm/agarmgas.pas
  50. 26 6
      compiler/arm/aoptcpu.pas
  51. 8 5
      compiler/arm/aoptcpub.pas
  52. 73 67
      compiler/arm/armins.dat
  53. 1 1
      compiler/arm/armnop.inc
  54. 97 146
      compiler/arm/armtab.inc
  55. 86 18
      compiler/arm/cgcpu.pas
  56. 43 1
      compiler/arm/cpuelf.pas
  57. 6 1
      compiler/arm/cpuinfo.pas
  58. 20 4
      compiler/arm/narmadd.pas
  59. 4 3
      compiler/arm/narmmem.pas
  60. 11 4
      compiler/arm/narmset.pas
  61. 2 1
      compiler/arm/raarmgas.pas
  62. 10 0
      compiler/arm/rgcpu.pas
  63. 4 2
      compiler/assemble.pas
  64. 5 5
      compiler/avr/aasmcpu.pas
  65. 4 6
      compiler/avr/agavrgas.pas
  66. 6 0
      compiler/avr/cpuinfo.pas
  67. 4 4
      compiler/avr/cpupara.pas
  68. 10 2
      compiler/avr/navradd.pas
  69. 2 2
      compiler/avr/navrmat.pas
  70. 1 1
      compiler/avr/raavrgas.pas
  71. 385 0
      compiler/blockutl.pas
  72. 7 4
      compiler/cfileutl.pas
  73. 13 0
      compiler/cgbase.pas
  74. 2 2
      compiler/cghlcpu.pas
  75. 29 10
      compiler/cgobj.pas
  76. 6 0
      compiler/cgutils.pas
  77. 3 0
      compiler/compiler.pas
  78. 1 0
      compiler/compinnr.inc
  79. 1 1
      compiler/comprsrc.pas
  80. 77 43
      compiler/dbgdwarf.pas
  81. 6 10
      compiler/dbgstabs.pas
  82. 27 9
      compiler/defcmp.pas
  83. 4 17
      compiler/defutil.pas
  84. 1 1
      compiler/expunix.pas
  85. 1 1
      compiler/finput.pas
  86. 1 0
      compiler/fpcdefs.inc
  87. 10 2
      compiler/fppu.pas
  88. 24 0
      compiler/generic/cpuinfo.pas
  89. 51 37
      compiler/globals.pas
  90. 20 8
      compiler/globtype.pas
  91. 4 3
      compiler/hlcg2ll.pas
  92. 30 14
      compiler/hlcgobj.pas
  93. 63 22
      compiler/htypechk.pas
  94. 8 1
      compiler/i386/cgcpu.pas
  95. 1 1
      compiler/i386/cpuelf.pas
  96. 29 8
      compiler/i386/cpuinfo.pas
  97. 21 1
      compiler/i386/cpupara.pas
  98. 3 0
      compiler/i386/cputarg.pas
  99. 2 0
      compiler/i386/n386add.pas
  100. 36 2
      compiler/i386/n386cal.pas

ファイルの差分が大きいため隠しています
+ 290 - 48
.gitattributes


+ 18 - 27
.gitignore

@@ -1344,6 +1344,24 @@ packages/fcl-net/src/*.o
 packages/fcl-net/src/*.ppu
 packages/fcl-net/src/*.s
 packages/fcl-net/src/Package.fpc
+packages/fcl-net/src/amiga/*.bak
+packages/fcl-net/src/amiga/*.exe
+packages/fcl-net/src/amiga/*.o
+packages/fcl-net/src/amiga/*.ppu
+packages/fcl-net/src/amiga/*.s
+packages/fcl-net/src/amiga/Package.fpc
+packages/fcl-net/src/amiga/build-stamp.*
+packages/fcl-net/src/amiga/fpcmade.*
+packages/fcl-net/src/amiga/units
+packages/fcl-net/src/aros/*.bak
+packages/fcl-net/src/aros/*.exe
+packages/fcl-net/src/aros/*.o
+packages/fcl-net/src/aros/*.ppu
+packages/fcl-net/src/aros/*.s
+packages/fcl-net/src/aros/Package.fpc
+packages/fcl-net/src/aros/build-stamp.*
+packages/fcl-net/src/aros/fpcmade.*
+packages/fcl-net/src/aros/units
 packages/fcl-net/src/fpcmade.*
 packages/fcl-net/src/netware/*.bak
 packages/fcl-net/src/netware/*.exe
@@ -1801,15 +1819,6 @@ packages/gdbint/src/Package.fpc
 packages/gdbint/src/build-stamp.*
 packages/gdbint/src/fpcmade.*
 packages/gdbint/src/units
-packages/gdbint/tests/*.bak
-packages/gdbint/tests/*.exe
-packages/gdbint/tests/*.o
-packages/gdbint/tests/*.ppu
-packages/gdbint/tests/*.s
-packages/gdbint/tests/Package.fpc
-packages/gdbint/tests/build-stamp.*
-packages/gdbint/tests/fpcmade.*
-packages/gdbint/tests/units
 packages/gdbint/units
 packages/gdbm/*.bak
 packages/gdbm/*.exe
@@ -2723,15 +2732,6 @@ packages/imlib/*.ppu
 packages/imlib/*.s
 packages/imlib/Package.fpc
 packages/imlib/build-stamp.*
-packages/imlib/examples/*.bak
-packages/imlib/examples/*.exe
-packages/imlib/examples/*.o
-packages/imlib/examples/*.ppu
-packages/imlib/examples/*.s
-packages/imlib/examples/Package.fpc
-packages/imlib/examples/build-stamp.*
-packages/imlib/examples/fpcmade.*
-packages/imlib/examples/units
 packages/imlib/fpcmade.*
 packages/imlib/src/*.bak
 packages/imlib/src/*.exe
@@ -2742,15 +2742,6 @@ packages/imlib/src/Package.fpc
 packages/imlib/src/build-stamp.*
 packages/imlib/src/fpcmade.*
 packages/imlib/src/units
-packages/imlib/tests/*.bak
-packages/imlib/tests/*.exe
-packages/imlib/tests/*.o
-packages/imlib/tests/*.ppu
-packages/imlib/tests/*.s
-packages/imlib/tests/Package.fpc
-packages/imlib/tests/build-stamp.*
-packages/imlib/tests/fpcmade.*
-packages/imlib/tests/units
 packages/imlib/units
 packages/ldap/*.bak
 packages/ldap/*.exe

+ 94 - 13
Makefile

@@ -1,9 +1,9 @@
 #
-# Don't edit, this file is generated by FPCMake Version 2.0.0 [2014-02-06 rev 26692]
+# Don't edit, this file is generated by FPCMake Version 2.0.0 [2015-01-04 rev 29399]
 #
 default: help
-MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim i386-android m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii powerpc-aix sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-netbsd x86_64-solaris x86_64-openbsd x86_64-darwin x86_64-win64 x86_64-embedded arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian arm-android powerpc64-linux powerpc64-darwin powerpc64-embedded powerpc64-aix avr-embedded armeb-linux armeb-embedded mips-linux mipsel-linux mipsel-android jvm-java jvm-android i8086-msdos
-BSDs = freebsd netbsd openbsd darwin
+MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim i386-android i386-aros m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii powerpc-aix sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-netbsd x86_64-solaris x86_64-openbsd x86_64-darwin x86_64-win64 x86_64-embedded x86_64-iphonesim x86_64-dragonfly arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian arm-android powerpc64-linux powerpc64-darwin powerpc64-embedded powerpc64-aix avr-embedded armeb-linux armeb-embedded mips-linux mipsel-linux mipsel-embedded mipsel-android jvm-java jvm-android i8086-msdos aarch64-darwin
+BSDs = freebsd netbsd openbsd darwin dragonfly
 UNIXs = linux $(BSDs) solaris qnx haiku aix
 LIMIT83fs = go32v2 os2 emx watcom msdos
 OSNeedsComspecToRunBatch = go32v2 watcom
@@ -184,6 +184,12 @@ $(error When compiling for arm-embedded, a sub-architecture (e.g. SUBARCH=armv4t
 endif
 override FPCOPT+=-Cp$(SUBARCH)
 endif
+ifeq ($(FULL_TARGET),mipsel-embedded)
+ifeq ($(SUBARCH),)
+$(error When compiling for mipsel-embedded, a sub-architecture (e.g. SUBARCH=pic32mx) must be defined)
+endif
+override FPCOPT+=-Cp$(SUBARCH)
+endif
 ifneq ($(findstring $(OS_SOURCE),$(LIMIT83fs)),)
 TARGETSUFFIX=$(OS_TARGET)
 SOURCESUFFIX=$(OS_SOURCE)
@@ -320,7 +326,7 @@ FPCFPMAKE=$(FPC)
 endif
 endif
 override PACKAGE_NAME=fpc
-override PACKAGE_VERSION=2.7.1
+override PACKAGE_VERSION=3.1.1
 REQUIREDVERSION=2.6.4
 REQUIREDVERSION2=2.6.2
 ifndef inOS2
@@ -373,6 +379,9 @@ endif
 ifeq ($(CPU_TARGET),avr)
 PPSUF=avr
 endif
+ifeq ($(CPU_TARGET),aarch64)
+PPSUF=a64
+endif
 ifdef CROSSCOMPILE
 ifneq ($(CPU_TARGET),jvm)
 PPPRE=ppcross
@@ -461,7 +470,7 @@ endif
 endif
 BuildOnlyBaseCPUs=jvm
 ifneq ($(wildcard utils),)
-NOUTILSTARGETS=embedded gba $(BuildOnlyBaseCPUs)
+NOUTILSTARGETS=embedded gba msdos $(BuildOnlyBaseCPUs)
 ifeq ($(findstring $(OS_TARGET),$(NOUTILSTARGETS)),)
 ifdef BUILDFULLNATIVE
 UTILS=1
@@ -537,6 +546,9 @@ endif
 ifeq ($(FULL_TARGET),i386-android)
 override TARGET_DIRS+=compiler rtl utils packages ide installer
 endif
+ifeq ($(FULL_TARGET),i386-aros)
+override TARGET_DIRS+=compiler rtl utils packages ide installer
+endif
 ifeq ($(FULL_TARGET),m68k-linux)
 override TARGET_DIRS+=compiler rtl utils packages ide installer
 endif
@@ -624,6 +636,12 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 override TARGET_DIRS+=compiler rtl utils packages ide installer
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+override TARGET_DIRS+=compiler rtl utils packages ide installer
+endif
+ifeq ($(FULL_TARGET),x86_64-dragonfly)
+override TARGET_DIRS+=compiler rtl utils packages ide installer
+endif
 ifeq ($(FULL_TARGET),arm-linux)
 override TARGET_DIRS+=compiler rtl utils packages ide installer
 endif
@@ -678,6 +696,9 @@ endif
 ifeq ($(FULL_TARGET),mipsel-linux)
 override TARGET_DIRS+=compiler rtl utils packages ide installer
 endif
+ifeq ($(FULL_TARGET),mipsel-embedded)
+override TARGET_DIRS+=compiler rtl utils packages ide installer
+endif
 ifeq ($(FULL_TARGET),mipsel-android)
 override TARGET_DIRS+=compiler rtl utils packages ide installer
 endif
@@ -690,6 +711,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 override TARGET_DIRS+=compiler rtl utils packages ide installer
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+override TARGET_DIRS+=compiler rtl utils packages ide installer
+endif
 override INSTALL_FPCPACKAGE=y
 ifdef REQUIRE_UNITSDIR
 override UNITSDIR+=$(REQUIRE_UNITSDIR)
@@ -933,6 +957,12 @@ EXEEXT=
 HASSHAREDLIB=1
 SHORTSUFFIX=lnx
 endif
+ifeq ($(OS_TARGET),dragonfly)
+BATCHEXT=.sh
+EXEEXT=
+HASSHAREDLIB=1
+SHORTSUFFIX=df
+endif
 ifeq ($(OS_TARGET),freebsd)
 BATCHEXT=.sh
 EXEEXT=
@@ -978,6 +1008,11 @@ EXEEXT=
 SHAREDLIBEXT=.library
 SHORTSUFFIX=amg
 endif
+ifeq ($(OS_TARGET),aros)
+EXEEXT=
+SHAREDLIBEXT=.library
+SHORTSUFFIX=aros
+endif
 ifeq ($(OS_TARGET),morphos)
 EXEEXT=
 SHAREDLIBEXT=.library
@@ -1442,8 +1477,8 @@ endif
 ifdef CREATESHARED
 override FPCOPT+=-Cg
 endif
-ifneq ($(findstring $(OS_TARGET),freebsd openbsd netbsd linux solaris),)
-ifeq ($(CPU_TARGET),x86_64)
+ifneq ($(findstring $(OS_TARGET),dragonfly freebsd openbsd netbsd linux solaris),)
+ifneq ($(findstring $(CPU_TARGET),x86_64 mips mipsel),)
 override FPCOPT+=-Cg
 endif
 endif
@@ -1472,17 +1507,23 @@ ifdef ACROSSCOMPILE
 override FPCOPT+=$(CROSSOPT)
 endif
 override COMPILER:=$(strip $(FPC) $(FPCOPT))
-ifeq (,$(findstring -s ,$(COMPILER)))
+ifneq (,$(findstring -sh ,$(COMPILER)))
+UseEXECPPAS=1
+endif
+ifneq (,$(findstring -s ,$(COMPILER)))
+ifeq ($(FULL_SOURCE),$(FULL_TARGET))
+UseEXECPPAS=1
+endif
+endif
+ifneq ($(UseEXECPPAS),1)
 EXECPPAS=
 else
-ifeq ($(FULL_SOURCE),$(FULL_TARGET))
 ifdef RUNBATCH
 EXECPPAS:=@$(RUNBATCH) $(PPAS)
 else
 EXECPPAS:=@$(PPAS)
 endif
 endif
-endif
 ifdef TARGET_RSTS
 override RSTFILES=$(addsuffix $(RSTEXT),$(TARGET_RSTS))
 override CLEANRSTFILES+=$(RSTFILES)
@@ -2008,6 +2049,14 @@ TARGET_DIRS_PACKAGES=1
 TARGET_DIRS_IDE=1
 TARGET_DIRS_INSTALLER=1
 endif
+ifeq ($(FULL_TARGET),i386-aros)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_IDE=1
+TARGET_DIRS_INSTALLER=1
+endif
 ifeq ($(FULL_TARGET),m68k-linux)
 TARGET_DIRS_COMPILER=1
 TARGET_DIRS_RTL=1
@@ -2240,6 +2289,22 @@ TARGET_DIRS_PACKAGES=1
 TARGET_DIRS_IDE=1
 TARGET_DIRS_INSTALLER=1
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_IDE=1
+TARGET_DIRS_INSTALLER=1
+endif
+ifeq ($(FULL_TARGET),x86_64-dragonfly)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_IDE=1
+TARGET_DIRS_INSTALLER=1
+endif
 ifeq ($(FULL_TARGET),arm-linux)
 TARGET_DIRS_COMPILER=1
 TARGET_DIRS_RTL=1
@@ -2384,6 +2449,14 @@ TARGET_DIRS_PACKAGES=1
 TARGET_DIRS_IDE=1
 TARGET_DIRS_INSTALLER=1
 endif
+ifeq ($(FULL_TARGET),mipsel-embedded)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_IDE=1
+TARGET_DIRS_INSTALLER=1
+endif
 ifeq ($(FULL_TARGET),mipsel-android)
 TARGET_DIRS_COMPILER=1
 TARGET_DIRS_RTL=1
@@ -2416,6 +2489,14 @@ TARGET_DIRS_PACKAGES=1
 TARGET_DIRS_IDE=1
 TARGET_DIRS_INSTALLER=1
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_IDE=1
+TARGET_DIRS_INSTALLER=1
+endif
 ifdef TARGET_DIRS_COMPILER
 compiler_all:
 	$(MAKE) -C compiler all
@@ -2726,14 +2807,14 @@ BUILDSTAMP=build-stamp.$(FULL_TARGET)
 .PHONY: all clean distclean build buildbase install installbase
 .PHONY: installother zipinstallbase zipinstallotherzipinstall
 .PHONY: singlezipinstall versioncheckstartingcompiler
-versioncheckstartingcompiler: 
+versioncheckstartingcompiler:
 ifndef CROSSCOMPILE
 ifneq ($(FPC_VERSION),$(REQUIREDVERSION))
 ifneq ($(FPC_VERSION),$(REQUIREDVERSION2))
 ifndef OVERRIDEVERSIONCHECK
-	$(error The only supported starting compiler version is $(REQUIREDVERSION). You are trying to build with $(FPC_VERSION). If you are absolutely sure that the current compiler is built from the exact same version/revision, you can try to use OVERRIDEVERSIONCHECK=1 to override )
+	$(error The only supported starting compiler version is $(REQUIREDVERSION). You are trying to build with $(FPC_VERSION).)
 else
-	@$(ECHO) You have overriden the starting compiler versioncheck while using starting compiler version $(FPC_VERSION). This situation is not supported and strange things and errors may happen. Remove OVERRIDEVERSIONCHECK=1 to fix this. 
+	@$(ECHO) You have overriden the starting compiler versioncheck while using starting compiler version $(FPC_VERSION). This situation is not supported and strange things and errors may happen. Remove OVERRIDEVERSIONCHECK=1 to fix this.
 endif
 endif
 endif

+ 8 - 5
Makefile.fpc

@@ -4,7 +4,7 @@
 
 [package]
 name=fpc
-version=2.7.1
+version=3.1.1
 
 [target]
 dirs=compiler rtl utils packages ide installer
@@ -79,6 +79,9 @@ endif
 ifeq ($(CPU_TARGET),avr)
 PPSUF=avr
 endif
+ifeq ($(CPU_TARGET),aarch64)
+PPSUF=a64
+endif
 
 # cross compilers uses full cpu_target, not just ppc-suffix
 # (except if the target cannot run a native compiler)
@@ -204,7 +207,7 @@ endif
 BuildOnlyBaseCPUs=jvm
 
 ifneq ($(wildcard utils),)
-NOUTILSTARGETS=embedded gba $(BuildOnlyBaseCPUs)
+NOUTILSTARGETS=embedded gba msdos $(BuildOnlyBaseCPUs)
 ifeq ($(findstring $(OS_TARGET),$(NOUTILSTARGETS)),)
 ifdef BUILDFULLNATIVE
 UTILS=1
@@ -263,14 +266,14 @@ BUILDSTAMP=build-stamp.$(FULL_TARGET)
 .PHONY: installother zipinstallbase zipinstallotherzipinstall
 .PHONY: singlezipinstall versioncheckstartingcompiler
 
-versioncheckstartingcompiler: 
+versioncheckstartingcompiler:
 ifndef CROSSCOMPILE
 ifneq ($(FPC_VERSION),$(REQUIREDVERSION))
 ifneq ($(FPC_VERSION),$(REQUIREDVERSION2))
 ifndef OVERRIDEVERSIONCHECK
-	$(error The only supported starting compiler version is $(REQUIREDVERSION). You are trying to build with $(FPC_VERSION). If you are absolutely sure that the current compiler is built from the exact same version/revision, you can try to use OVERRIDEVERSIONCHECK=1 to override )
+	$(error The only supported starting compiler version is $(REQUIREDVERSION). You are trying to build with $(FPC_VERSION).)
 else
-	@$(ECHO) You have overriden the starting compiler versioncheck while using starting compiler version $(FPC_VERSION). This situation is not supported and strange things and errors may happen. Remove OVERRIDEVERSIONCHECK=1 to fix this. 
+	@$(ECHO) You have overriden the starting compiler versioncheck while using starting compiler version $(FPC_VERSION). This situation is not supported and strange things and errors may happen. Remove OVERRIDEVERSIONCHECK=1 to fix this.
 endif
 endif
 endif

+ 125 - 12
compiler/Makefile

@@ -1,9 +1,9 @@
 #
-# Don't edit, this file is generated by FPCMake Version 2.0.0 [2014-04-01 rev 27428]
+# Don't edit, this file is generated by FPCMake Version 2.0.0 [2015-01-04 rev 29399]
 #
 default: all
-MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim i386-android m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii powerpc-aix sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-netbsd x86_64-solaris x86_64-openbsd x86_64-darwin x86_64-win64 x86_64-embedded arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian arm-android powerpc64-linux powerpc64-darwin powerpc64-embedded powerpc64-aix avr-embedded armeb-linux armeb-embedded mips-linux mipsel-linux mipsel-embedded mipsel-android jvm-java jvm-android i8086-msdos
-BSDs = freebsd netbsd openbsd darwin
+MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim i386-android i386-aros m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii powerpc-aix sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-netbsd x86_64-solaris x86_64-openbsd x86_64-darwin x86_64-win64 x86_64-embedded x86_64-iphonesim x86_64-dragonfly arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian arm-android powerpc64-linux powerpc64-darwin powerpc64-embedded powerpc64-aix avr-embedded armeb-linux armeb-embedded mips-linux mipsel-linux mipsel-embedded mipsel-android jvm-java jvm-android i8086-msdos aarch64-darwin
+BSDs = freebsd netbsd openbsd darwin dragonfly
 UNIXs = linux $(BSDs) solaris qnx haiku aix
 LIMIT83fs = go32v2 os2 emx watcom msdos
 OSNeedsComspecToRunBatch = go32v2 watcom
@@ -326,9 +326,9 @@ FPCFPMAKE=$(FPC)
 endif
 endif
 override PACKAGE_NAME=compiler
-override PACKAGE_VERSION=2.7.1
+override PACKAGE_VERSION=3.1.1
 unexport FPC_VERSION FPC_COMPILERINFO
-CYCLETARGETS=i386 powerpc sparc arm x86_64 powerpc64 m68k armeb mipsel mips avr jvm i8086
+CYCLETARGETS=i386 powerpc sparc arm x86_64 powerpc64 m68k armeb mipsel mips avr jvm i8086 aarch64
 ALLTARGETS=$(CYCLETARGETS)
 ifdef ALPHA
 PPC_TARGET=alpha
@@ -372,6 +372,9 @@ endif
 ifdef I8086
 PPC_TARGET=i8086
 endif
+ifdef AARCH64
+PPC_TARGET=aarch64
+endif
 ifndef PPC_TARGET
 PPC_TARGET=$(CPU_TARGET)
 endif
@@ -469,6 +472,9 @@ endif
 ifeq ($(CPC_TARGET),i8086)
 CPUSUF=8086
 endif
+ifeq ($(CPC_TARGET),aarch64)
+CPUSUF=a64
+endif
 NOCPUDEF=1
 MSGFILE=msg/error$(FPCLANG).msg
 SVNVERSION:=$(firstword $(wildcard $(addsuffix /svnversion$(SRCEXEEXT),$(SEARCHPATH))))
@@ -477,7 +483,7 @@ ifneq ($(REVINC),)
 override LOCALOPT+=-dREVINC
 ifeq ($(REVSTR),)
 ifneq ($(SVNVERSION),)
-REVSTR:=$(shell $(SVNVERSION) -c .)
+REVSTR:=$(subst r,,$(subst r1:,,r$(shell $(SVNVERSION) -c .)))
 export REVSTR
 else
 ifeq ($(REVINC),force)
@@ -525,7 +531,7 @@ override LOCALOPT+=-Fux86
 endif
 OPTWPOCOLLECT=-OWdevirtcalls,optvmts -FW$(BASEDIR)/pp1.wpo
 OPTWPOPERFORM=-Owdevirtcalls,optvmts -Fw$(BASEDIR)/pp1.wpo
-ifneq ($(findstring $(OS_TARGET),darwin linux freebsd solaris),)
+ifneq ($(findstring $(OS_TARGET),darwin linux dragonfly freebsd solaris),)
 ifdef LINKSMART
 ifdef CREATESMART
 OPTWPOCOLLECT+=-OWsymbolliveness -Xs-
@@ -614,6 +620,9 @@ endif
 ifeq ($(FULL_TARGET),i386-android)
 override TARGET_DIRS+=utils
 endif
+ifeq ($(FULL_TARGET),i386-aros)
+override TARGET_DIRS+=utils
+endif
 ifeq ($(FULL_TARGET),m68k-linux)
 override TARGET_DIRS+=utils
 endif
@@ -701,6 +710,12 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 override TARGET_DIRS+=utils
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+override TARGET_DIRS+=utils
+endif
+ifeq ($(FULL_TARGET),x86_64-dragonfly)
+override TARGET_DIRS+=utils
+endif
 ifeq ($(FULL_TARGET),arm-linux)
 override TARGET_DIRS+=utils
 endif
@@ -770,6 +785,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 override TARGET_DIRS+=utils
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+override TARGET_DIRS+=utils
+endif
 ifeq ($(FULL_TARGET),i386-linux)
 override TARGET_PROGRAMS+=pp
 endif
@@ -839,6 +857,9 @@ endif
 ifeq ($(FULL_TARGET),i386-android)
 override TARGET_PROGRAMS+=pp
 endif
+ifeq ($(FULL_TARGET),i386-aros)
+override TARGET_PROGRAMS+=pp
+endif
 ifeq ($(FULL_TARGET),m68k-linux)
 override TARGET_PROGRAMS+=pp
 endif
@@ -926,6 +947,12 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 override TARGET_PROGRAMS+=pp
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+override TARGET_PROGRAMS+=pp
+endif
+ifeq ($(FULL_TARGET),x86_64-dragonfly)
+override TARGET_PROGRAMS+=pp
+endif
 ifeq ($(FULL_TARGET),arm-linux)
 override TARGET_PROGRAMS+=pp
 endif
@@ -995,6 +1022,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 override TARGET_PROGRAMS+=pp
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+override TARGET_PROGRAMS+=pp
+endif
 override INSTALL_FPCPACKAGE=y
 ifeq ($(FULL_TARGET),i386-linux)
 override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
@@ -1065,6 +1095,9 @@ endif
 ifeq ($(FULL_TARGET),i386-android)
 override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
 endif
+ifeq ($(FULL_TARGET),i386-aros)
+override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
+endif
 ifeq ($(FULL_TARGET),m68k-linux)
 override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
 endif
@@ -1152,6 +1185,12 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
+endif
+ifeq ($(FULL_TARGET),x86_64-dragonfly)
+override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
+endif
 ifeq ($(FULL_TARGET),arm-linux)
 override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
 endif
@@ -1221,6 +1260,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
+endif
 ifeq ($(FULL_TARGET),i386-linux)
 override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
 endif
@@ -1290,6 +1332,9 @@ endif
 ifeq ($(FULL_TARGET),i386-android)
 override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
 endif
+ifeq ($(FULL_TARGET),i386-aros)
+override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
+endif
 ifeq ($(FULL_TARGET),m68k-linux)
 override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
 endif
@@ -1377,6 +1422,12 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
+endif
+ifeq ($(FULL_TARGET),x86_64-dragonfly)
+override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
+endif
 ifeq ($(FULL_TARGET),arm-linux)
 override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
 endif
@@ -1446,6 +1497,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
+endif
 ifeq ($(FULL_TARGET),i386-linux)
 override COMPILER_TARGETDIR+=.
 endif
@@ -1515,6 +1569,9 @@ endif
 ifeq ($(FULL_TARGET),i386-android)
 override COMPILER_TARGETDIR+=.
 endif
+ifeq ($(FULL_TARGET),i386-aros)
+override COMPILER_TARGETDIR+=.
+endif
 ifeq ($(FULL_TARGET),m68k-linux)
 override COMPILER_TARGETDIR+=.
 endif
@@ -1602,6 +1659,12 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 override COMPILER_TARGETDIR+=.
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+override COMPILER_TARGETDIR+=.
+endif
+ifeq ($(FULL_TARGET),x86_64-dragonfly)
+override COMPILER_TARGETDIR+=.
+endif
 ifeq ($(FULL_TARGET),arm-linux)
 override COMPILER_TARGETDIR+=.
 endif
@@ -1671,6 +1734,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 override COMPILER_TARGETDIR+=.
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+override COMPILER_TARGETDIR+=.
+endif
 ifeq ($(FULL_TARGET),i386-linux)
 override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 endif
@@ -1740,6 +1806,9 @@ endif
 ifeq ($(FULL_TARGET),i386-android)
 override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 endif
+ifeq ($(FULL_TARGET),i386-aros)
+override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
+endif
 ifeq ($(FULL_TARGET),m68k-linux)
 override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 endif
@@ -1827,6 +1896,12 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
+endif
+ifeq ($(FULL_TARGET),x86_64-dragonfly)
+override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
+endif
 ifeq ($(FULL_TARGET),arm-linux)
 override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 endif
@@ -1896,6 +1971,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
+endif
 ifdef REQUIRE_UNITSDIR
 override UNITSDIR+=$(REQUIRE_UNITSDIR)
 endif
@@ -2138,6 +2216,12 @@ EXEEXT=
 HASSHAREDLIB=1
 SHORTSUFFIX=lnx
 endif
+ifeq ($(OS_TARGET),dragonfly)
+BATCHEXT=.sh
+EXEEXT=
+HASSHAREDLIB=1
+SHORTSUFFIX=df
+endif
 ifeq ($(OS_TARGET),freebsd)
 BATCHEXT=.sh
 EXEEXT=
@@ -2183,6 +2267,11 @@ EXEEXT=
 SHAREDLIBEXT=.library
 SHORTSUFFIX=amg
 endif
+ifeq ($(OS_TARGET),aros)
+EXEEXT=
+SHAREDLIBEXT=.library
+SHORTSUFFIX=aros
+endif
 ifeq ($(OS_TARGET),morphos)
 EXEEXT=
 SHAREDLIBEXT=.library
@@ -2604,6 +2693,9 @@ endif
 ifeq ($(FULL_TARGET),i386-android)
 REQUIRE_PACKAGES_RTL=1
 endif
+ifeq ($(FULL_TARGET),i386-aros)
+REQUIRE_PACKAGES_RTL=1
+endif
 ifeq ($(FULL_TARGET),m68k-linux)
 REQUIRE_PACKAGES_RTL=1
 endif
@@ -2691,6 +2783,12 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 REQUIRE_PACKAGES_RTL=1
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+REQUIRE_PACKAGES_RTL=1
+endif
+ifeq ($(FULL_TARGET),x86_64-dragonfly)
+REQUIRE_PACKAGES_RTL=1
+endif
 ifeq ($(FULL_TARGET),arm-linux)
 REQUIRE_PACKAGES_RTL=1
 endif
@@ -2760,6 +2858,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 REQUIRE_PACKAGES_RTL=1
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+REQUIRE_PACKAGES_RTL=1
+endif
 ifdef REQUIRE_PACKAGES_RTL
 PACKAGEDIR_RTL:=$(firstword $(subst /Makefile.fpc,,$(strip $(wildcard $(addsuffix /rtl/Makefile.fpc,$(PACKAGESDIR))))))
 ifneq ($(PACKAGEDIR_RTL),)
@@ -2911,8 +3012,8 @@ endif
 ifdef CREATESHARED
 override FPCOPT+=-Cg
 endif
-ifneq ($(findstring $(OS_TARGET),freebsd openbsd netbsd linux solaris),)
-ifeq ($(CPU_TARGET),x86_64)
+ifneq ($(findstring $(OS_TARGET),dragonfly freebsd openbsd netbsd linux solaris),)
+ifneq ($(findstring $(CPU_TARGET),x86_64 mips mipsel),)
 override FPCOPT+=-Cg
 endif
 endif
@@ -3461,6 +3562,9 @@ endif
 ifeq ($(FULL_TARGET),i386-android)
 TARGET_DIRS_UTILS=1
 endif
+ifeq ($(FULL_TARGET),i386-aros)
+TARGET_DIRS_UTILS=1
+endif
 ifeq ($(FULL_TARGET),m68k-linux)
 TARGET_DIRS_UTILS=1
 endif
@@ -3548,6 +3652,12 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 TARGET_DIRS_UTILS=1
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+TARGET_DIRS_UTILS=1
+endif
+ifeq ($(FULL_TARGET),x86_64-dragonfly)
+TARGET_DIRS_UTILS=1
+endif
 ifeq ($(FULL_TARGET),arm-linux)
 TARGET_DIRS_UTILS=1
 endif
@@ -3617,6 +3727,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 TARGET_DIRS_UTILS=1
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+TARGET_DIRS_UTILS=1
+endif
 ifdef TARGET_DIRS_UTILS
 utils_all:
 	$(MAKE) -C utils all
@@ -3745,7 +3858,7 @@ INSTALLEXEFILE=$(PPCROSSNAME)
 else
 INSTALLEXEFILE=$(EXENAME)
 endif
-PPC_TARGETS=i386 m68k powerpc sparc arm armeb x86_64 powerpc64 alpha vis ia64 mips mipsel avr jvm i8086
+PPC_TARGETS=i386 m68k powerpc sparc arm armeb x86_64 powerpc64 alpha vis ia64 mips mipsel avr jvm i8086 aarch64
 INSTALL_TARGETS=$(addsuffix _exe_install,$(sort $(CYCLETARGETS) $(PPC_TARGETS)))
 .PHONY: $(PPC_TARGETS) $(INSTALL_TARGETS)
 $(PPC_TARGETS):
@@ -3780,11 +3893,11 @@ ppuclean:
 tempclean:
 	-$(DEL) $(PPCROSSNAME) $(TEMPNAME) $(TEMPNAME1) $(TEMPNAME2) $(TEMPNAME3) $(MSG2INC) pp1.wpo pp2.wpo
 execlean :
-	-$(DEL) ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) $(EXENAME) $(TEMPWPONAME1) $(TEMPWPONAME2)
+	-$(DEL) ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) ppca64$(EXEEXT) $(EXENAME) $(TEMPWPONAME1) $(TEMPWPONAME2)
 $(addsuffix _clean,$(ALLTARGETS)):
 	-$(DELTREE) $(addprefix $(subst _clean,,$@),/units)
 	-$(DEL) $(addprefix $(subst _clean,,$@)/,*$(OEXT) *$(PPUEXT) *$(RSTEXT) *$(ASMEXT) *$(STATICLIBEXT) *$(SHAREDLIBEXT) *$(PPLEXT))
-	-$(DEL) $(addprefix $(subst _clean,,$@)/,ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) $(EXENAME))
+	-$(DEL) $(addprefix $(subst _clean,,$@)/,ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) ppca64$(EXEEXT) $(EXENAME))
 cycleclean: cleanall $(addsuffix _clean,$(CPC_TARGET))
 	-$(DEL) $(EXENAME)
 clean: tempclean execlean cleanall $(addsuffix _clean,$(CPC_TARGET)) $(addsuffix _clean,$(TARGET_DIRS))

+ 13 - 7
compiler/Makefile.fpc

@@ -4,7 +4,7 @@
 
 [package]
 name=compiler
-version=2.7.1
+version=3.1.1
 
 [target]
 programs=pp
@@ -32,7 +32,7 @@ fpcdir=..
 unexport FPC_VERSION FPC_COMPILERINFO
 
 # Which platforms are ready for inclusion in the cycle
-CYCLETARGETS=i386 powerpc sparc arm x86_64 powerpc64 m68k armeb mipsel mips avr jvm i8086
+CYCLETARGETS=i386 powerpc sparc arm x86_64 powerpc64 m68k armeb mipsel mips avr jvm i8086 aarch64
 
 # All supported targets used for clean
 ALLTARGETS=$(CYCLETARGETS)
@@ -80,6 +80,9 @@ endif
 ifdef I8086
 PPC_TARGET=i8086
 endif
+ifdef AARCH64
+PPC_TARGET=aarch64
+endif
 
 # Default is to generate a compiler for the same
 # platform as CPU_TARGET (a native compiler)
@@ -205,6 +208,9 @@ endif
 ifeq ($(CPC_TARGET),i8086)
 CPUSUF=8086
 endif
+ifeq ($(CPC_TARGET),aarch64)
+CPUSUF=a64
+endif
 
 # Do not define the default -d$(CPU_TARGET) because that
 # will conflict with our -d$(CPC_TARGET)
@@ -225,7 +231,7 @@ override LOCALOPT+=-dREVINC
 # svnversion executable is available
 ifeq ($(REVSTR),)
 ifneq ($(SVNVERSION),)
-REVSTR:=$(shell $(SVNVERSION) -c .)
+REVSTR:=$(subst r,,$(subst r1:,,r$(shell $(SVNVERSION) -c .)))
 export REVSTR
 else
 ifeq ($(REVINC),force)
@@ -300,7 +306,7 @@ OPTWPOCOLLECT=-OWdevirtcalls,optvmts -FW$(BASEDIR)/pp1.wpo
 OPTWPOPERFORM=-Owdevirtcalls,optvmts -Fw$(BASEDIR)/pp1.wpo
 # symbol liveness WPO requires nm, smart linking and no stripping (the latter
 # is forced by the Makefile when necessary)
-ifneq ($(findstring $(OS_TARGET),darwin linux freebsd solaris),)
+ifneq ($(findstring $(OS_TARGET),darwin linux dragonfly freebsd solaris),)
 ifdef LINKSMART
 ifdef CREATESMART
 OPTWPOCOLLECT+=-OWsymbolliveness -Xs-
@@ -397,7 +403,7 @@ endif
 # CPU targets
 #####################################################################
 
-PPC_TARGETS=i386 m68k powerpc sparc arm armeb x86_64 powerpc64 alpha vis ia64 mips mipsel avr jvm i8086
+PPC_TARGETS=i386 m68k powerpc sparc arm armeb x86_64 powerpc64 alpha vis ia64 mips mipsel avr jvm i8086 aarch64
 INSTALL_TARGETS=$(addsuffix _exe_install,$(sort $(CYCLETARGETS) $(PPC_TARGETS)))
 
 .PHONY: $(PPC_TARGETS) $(INSTALL_TARGETS)
@@ -452,12 +458,12 @@ tempclean:
         -$(DEL) $(PPCROSSNAME) $(TEMPNAME) $(TEMPNAME1) $(TEMPNAME2) $(TEMPNAME3) $(MSG2INC) pp1.wpo pp2.wpo
 
 execlean :
-        -$(DEL) ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) $(EXENAME) $(TEMPWPONAME1) $(TEMPWPONAME2)
+        -$(DEL) ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) ppca64$(EXEEXT) $(EXENAME) $(TEMPWPONAME1) $(TEMPWPONAME2)
 
 $(addsuffix _clean,$(ALLTARGETS)):
         -$(DELTREE) $(addprefix $(subst _clean,,$@),/units)
         -$(DEL) $(addprefix $(subst _clean,,$@)/,*$(OEXT) *$(PPUEXT) *$(RSTEXT) *$(ASMEXT) *$(STATICLIBEXT) *$(SHAREDLIBEXT) *$(PPLEXT))
-        -$(DEL) $(addprefix $(subst _clean,,$@)/,ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) $(EXENAME))
+        -$(DEL) $(addprefix $(subst _clean,,$@)/,ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) ppca64$(EXEEXT) $(EXENAME))
 
 cycleclean: cleanall $(addsuffix _clean,$(CPC_TARGET))
         -$(DEL) $(EXENAME)

+ 93 - 35
compiler/aarch64/a64att.inc

@@ -1,14 +1,37 @@
 { don't edit, this file is generated from armins.dat }
 (
+'none',
 'b',
-'cb',
-'tb',
+'cbz',
+'cbnz',
+'tbz',
+'tbnz',
 'bl',
 'blr',
 'br',
 'ret',
+'brk',
+'hlt',
+'hvc',
+'smc',
+'svc',
+'eret',
+'dcps1',
+'dcps2',
+'dcps3',
+'drps',
+'dc',
+'at',
+'tlbi',
+'hint',
+'clrex',
+'dsb',
+'dmb',
+'isb',
 'ldr',
 'str',
+'ldur',
+'stur',
 'ldp',
 'stp',
 'ldnp',
@@ -16,40 +39,66 @@
 'ldtr',
 'sttr',
 'ldxr',
+'ldxp',
 'stxr',
+'stxp',
 'ldar',
 'stlr',
 'ldaxr',
 'stlxr',
+'stlxp',
+'ld1',
+'ld2',
+'ld3',
+'ld4',
+'st1',
+'st2',
+'st3',
+'st4',
+'ld1r',
+'ld2r',
+'ld3r',
+'ld4r',
 'prfm',
+'prfum',
 'add',
-'adc',
 'sub',
-'sbc',
 'cmp',
 'cmn',
-'mov',
 'and',
-'bic',
 'eor',
-'eon',
 'orr',
 'orn',
 'tst',
-'mvn',
+'movz',
+'movn',
 'movk',
+'mrs',
+'msr',
 'adrp',
 'adr',
 'bfm',
 'sbfm',
 'ubfm',
 'extr',
-'sxt',
-'uxt',
+'adc',
+'sbc',
+'bic',
+'eon',
 'asrv',
-'llslv',
+'lslv',
 'lsrv',
 'rorv',
+'madd',
+'msub',
+'smaddl',
+'smsubl',
+'smulh',
+'umaddl',
+'umsubl',
+'umulh',
+'sdiv',
+'udiv',
 'cls',
 'clz',
 'rbit',
@@ -62,33 +111,39 @@
 'csneg',
 'ccmn',
 'ccmp',
-'madd',
-'msub',
-'smaddl',
-'smsubl',
-'smulh',
-'umaddl',
-'umsubl',
-'umulh',
-'sdiv',
-'udiv',
-'neg',
+'nop',
+'yield',
+'wfe',
+'wfi',
+'sev',
+'sevl',
+'mov',
+'bfi',
+'bfxil',
+'sbfiz',
+'sbfx',
+'ubfiz',
+'ubfx',
 'asr',
 'lsl',
 'lsr',
 'ror',
-'cset',
-'csetm',
-'cinc',
-'cinv',
-'cneg',
+'sxt',
+'uxt',
+'neg',
 'ngc',
+'mvn',
 'mneg',
 'mul',
 'smnegl',
 'smull',
 'umnegl',
 'umull',
+'cset',
+'csetm',
+'cinc',
+'cinv',
+'cneg',
 'fmov',
 'fcvt',
 'fcvtas',
@@ -103,13 +158,13 @@
 'fcvtzu',
 'scvtf',
 'ucvtf',
-'fprinta',
-'fprinti',
-'fprintm',
-'fprintn',
-'fprintp',
-'fprintx',
-'fprintz',
+'frinta',
+'frinti',
+'frintm',
+'frintn',
+'frintp',
+'frintx',
+'frintz',
 'fabs',
 'fneg',
 'fsqrt',
@@ -129,5 +184,8 @@
 'fcmpe',
 'fccmp',
 'fcmmpe',
-'fcsel'
+'fcsel',
+'umov',
+'ins',
+'movi'
 );

+ 58 - 0
compiler/aarch64/a64atts.inc

@@ -129,5 +129,63 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufNONE
 );

+ 159 - 43
compiler/aarch64/a64ins.dat

@@ -1,8 +1,15 @@
+; invalid
+[NONE]
+
 [B]
 
-[CB]
+[CBZ]
+
+[CBNZ]
 
-[TB]
+[TBZ]
+
+[TBNZ]
 
 [BL]
 
@@ -12,10 +19,50 @@
 
 [RET]
 
+[BRK]
+
+[HLT]
+
+[HVC]
+
+[SMC]
+
+[SVC]
+
+[ERET]
+
+[DCPS1]
+
+[DCPS2]
+
+[DCPS3]
+
+[DRPS]
+
+[DC]
+
+[AT]
+
+[TLBI]
+
+[HINT]
+
+[CLREX]
+
+[DSB]
+
+[DMB]
+
+[ISB]
+
 [LDR]
 
 [STR]
 
+[LDUR]
+
+[STUR]
+
 [LDP]
 
 [STP]
@@ -30,8 +77,12 @@
 
 [LDXR]
 
+[LDXP]
+
 [STXR]
 
+[STXP]
+
 [LDAR]
 
 [STLR]
@@ -40,40 +91,64 @@
 
 [STLXR]
 
+[STLXP]
+
+[LD1]
+
+[LD2]
+
+[LD3]
+
+[LD4]
+
+[ST1]
+
+[ST2]
+
+[ST3]
+
+[ST4]
+
+[LD1R]
+
+[LD2R]
+
+[LD3R]
+
+[LD4R]
+
 [PRFM]
 
-[ADD]
+[PRFUM]
 
-[ADC]
+[ADD]
 
 [SUB]
 
-[SBC]
-
 [CMP]
 
 [CMN]
 
-[MOV]
-
 [AND]
 
-[BIC]
-
 [EOR]
 
-[EON]
-
 [ORR]
 
 [ORN]
 
 [TST]
 
-[MVN]
+[MOVZ]
+
+[MOVN]
 
 [MOVK]
 
+[MRS]
+
+[MSR]
+
 [ADRP]
 
 [ADR]
@@ -86,18 +161,42 @@
 
 [EXTR]
 
-[SXT]
+[ADC]
 
-[UXT]
+[SBC]
+
+[BIC]
+
+[EON]
 
 [ASRV]
 
-[LLSLV]
+[LSLV]
 
 [LSRV]
 
 [RORV]
 
+[MADD]
+
+[MSUB]
+
+[SMADDL]
+
+[SMSUBL]
+
+[SMULH]
+
+[UMADDL]
+
+[UMSUBL]
+
+[UMULH]
+
+[SDIV]
+
+[UDIV]
+
 [CLS]
 
 [CLZ]
@@ -122,29 +221,33 @@
 
 [CCMP]
 
-[MADD]
+; Aliases
+; they are not generated by the compiler, they are only used for inline assembler
+[NOP]
 
-[MSUB]
+[YIELD]
 
-[SMADDL]
+[WFE]
 
-[SMSUBL]
+[WFI]
 
-[SMULH]
+[SEV]
 
-[UMADDL]
+[SEVL]
 
-[UMSUBL]
+[MOV]
 
-[UMULH]
+[BFI]
 
-[SDIV]
+[BFXIL]
 
-[UDIV]
+[SBFIZ]
 
-; Aliases
-; they are not generated by the compiler, they are only used for inline assembler
-[NEG]
+[SBFX]
+
+[UBFIZ]
+
+[UBFX]
 
 [ASR]
 
@@ -154,18 +257,16 @@
 
 [ROR]
 
-[CSET]
-
-[CSETM]
-
-[CINC]
+[SXT]
 
-[CINV]
+[UXT]
 
-[CNEG]
+[NEG]
 
 [NGC]
 
+[MVN]
+
 [MNEG]
 
 [MUL]
@@ -178,6 +279,16 @@
 
 [UMULL]
 
+[CSET]
+
+[CSETM]
+
+[CINC]
+
+[CINV]
+
+[CNEG]
+
 [FMOV]
 
 [FCVT]
@@ -206,19 +317,19 @@
 
 [UCVTF]
 
-[FPRINTA]
+[FRINTA]
 
-[FPRINTI]
+[FRINTI]
 
-[FPRINTM]
+[FRINTM]
 
-[FPRINTN]
+[FRINTN]
 
-[FPRINTP]
+[FRINTP]
 
-[FPRINTX]
+[FRINTX]
 
-[FPRINTZ]
+[FRINTZ]
 
 [FABS]
 
@@ -260,3 +371,8 @@
 
 [FCSEL]
 
+[UMOV]
+
+[INS]
+
+[MOVI]

+ 93 - 35
compiler/aarch64/a64op.inc

@@ -1,14 +1,37 @@
 { don't edit, this file is generated from armins.dat }
 (
+A_NONE,
 A_B,
-A_CB,
-A_TB,
+A_CBZ,
+A_CBNZ,
+A_TBZ,
+A_TBNZ,
 A_BL,
 A_BLR,
 A_BR,
 A_RET,
+A_BRK,
+A_HLT,
+A_HVC,
+A_SMC,
+A_SVC,
+A_ERET,
+A_DCPS1,
+A_DCPS2,
+A_DCPS3,
+A_DRPS,
+A_DC,
+A_AT,
+A_TLBI,
+A_HINT,
+A_CLREX,
+A_DSB,
+A_DMB,
+A_ISB,
 A_LDR,
 A_STR,
+A_LDUR,
+A_STUR,
 A_LDP,
 A_STP,
 A_LDNP,
@@ -16,40 +39,66 @@ A_STNP,
 A_LDTR,
 A_STTR,
 A_LDXR,
+A_LDXP,
 A_STXR,
+A_STXP,
 A_LDAR,
 A_STLR,
 A_LDAXR,
 A_STLXR,
+A_STLXP,
+A_LD1,
+A_LD2,
+A_LD3,
+A_LD4,
+A_ST1,
+A_ST2,
+A_ST3,
+A_ST4,
+A_LD1R,
+A_LD2R,
+A_LD3R,
+A_LD4R,
 A_PRFM,
+A_PRFUM,
 A_ADD,
-A_ADC,
 A_SUB,
-A_SBC,
 A_CMP,
 A_CMN,
-A_MOV,
 A_AND,
-A_BIC,
 A_EOR,
-A_EON,
 A_ORR,
 A_ORN,
 A_TST,
-A_MVN,
+A_MOVZ,
+A_MOVN,
 A_MOVK,
+A_MRS,
+A_MSR,
 A_ADRP,
 A_ADR,
 A_BFM,
 A_SBFM,
 A_UBFM,
 A_EXTR,
-A_SXT,
-A_UXT,
+A_ADC,
+A_SBC,
+A_BIC,
+A_EON,
 A_ASRV,
-A_LLSLV,
+A_LSLV,
 A_LSRV,
 A_RORV,
+A_MADD,
+A_MSUB,
+A_SMADDL,
+A_SMSUBL,
+A_SMULH,
+A_UMADDL,
+A_UMSUBL,
+A_UMULH,
+A_SDIV,
+A_UDIV,
 A_CLS,
 A_CLZ,
 A_RBIT,
@@ -62,33 +111,39 @@ A_CSINV,
 A_CSNEG,
 A_CCMN,
 A_CCMP,
-A_MADD,
-A_MSUB,
-A_SMADDL,
-A_SMSUBL,
-A_SMULH,
-A_UMADDL,
-A_UMSUBL,
-A_UMULH,
-A_SDIV,
-A_UDIV,
-A_NEG,
+A_NOP,
+A_YIELD,
+A_WFE,
+A_WFI,
+A_SEV,
+A_SEVL,
+A_MOV,
+A_BFI,
+A_BFXIL,
+A_SBFIZ,
+A_SBFX,
+A_UBFIZ,
+A_UBFX,
 A_ASR,
 A_LSL,
 A_LSR,
 A_ROR,
-A_CSET,
-A_CSETM,
-A_CINC,
-A_CINV,
-A_CNEG,
+A_SXT,
+A_UXT,
+A_NEG,
 A_NGC,
+A_MVN,
 A_MNEG,
 A_MUL,
 A_SMNEGL,
 A_SMULL,
 A_UMNEGL,
 A_UMULL,
+A_CSET,
+A_CSETM,
+A_CINC,
+A_CINV,
+A_CNEG,
 A_FMOV,
 A_FCVT,
 A_FCVTAS,
@@ -103,13 +158,13 @@ A_FCVTZS,
 A_FCVTZU,
 A_SCVTF,
 A_UCVTF,
-A_FPRINTA,
-A_FPRINTI,
-A_FPRINTM,
-A_FPRINTN,
-A_FPRINTP,
-A_FPRINTX,
-A_FPRINTZ,
+A_FRINTA,
+A_FRINTI,
+A_FRINTM,
+A_FRINTN,
+A_FRINTP,
+A_FRINTX,
+A_FRINTZ,
 A_FABS,
 A_FNEG,
 A_FSQRT,
@@ -129,5 +184,8 @@ A_FCMP,
 A_FCMPE,
 A_FCCMP,
 A_FCMMPE,
-A_FCSEL
+A_FCSEL,
+A_UMOV,
+A_INS,
+A_MOVI
 );

+ 168 - 161
compiler/aarch64/a64reg.dat

@@ -70,169 +70,176 @@ W30,$01,$04,$1E,w30,30,30
 X30,$01,$05,$1E,x30,30,30
 WZR,$01,$04,$1F,wzr,31,31
 XZR,$01,$05,$1F,xzr,31,31
+; sp and zr share the same register number, but we still have to be able to
+; differentiate them because some instructions can be encoded with both ->
+; use a different superregister after all
+WSP,$01,$04,$20,wsp,31,31
+SP,$01,$05,$20,sp,31,31
 
 
 ; vfp registers
-B0,$04,$01,$00,b0,0,0
-H0,$04,$03,$00,h0,0,0
-S0,$04,$09,$00,s0,0,0
-D0,$04,$0a,$00,d0,0,0
-Q0,$04,$05,$00,q0,0,0
-B1,$04,$01,$01,b1,1,1
-H1,$04,$03,$01,h1,1,1
-S1,$04,$09,$01,s1,1,1
-D1,$04,$0a,$01,d1,1,1
-Q1,$04,$05,$01,q1,1,1
-B2,$04,$01,$02,b2,2,2
-H2,$04,$03,$02,h2,2,2
-S2,$04,$09,$02,s2,2,2
-D2,$04,$0a,$02,d2,2,2
-Q2,$04,$05,$02,q2,2,2
-B3,$04,$01,$03,b3,3,3
-H3,$04,$03,$03,h3,3,3
-S3,$04,$09,$03,s3,3,3
-D3,$04,$0a,$03,d3,3,3
-Q3,$04,$05,$03,q3,3,3
-B4,$04,$01,$04,b4,4,4
-H4,$04,$03,$04,h4,4,4
-S4,$04,$09,$04,s4,4,4
-D4,$04,$0a,$04,d4,4,4
-Q4,$04,$05,$04,q4,4,4
-B5,$04,$01,$05,b5,5,5
-H5,$04,$03,$05,h5,5,5
-S5,$04,$09,$05,s5,5,5
-D5,$04,$0a,$05,d5,5,5
-Q5,$04,$05,$05,q5,5,5
-B6,$04,$01,$06,b6,6,6
-H6,$04,$03,$06,h6,6,6
-S6,$04,$09,$06,s6,6,6
-D6,$04,$0a,$06,d6,6,6
-Q6,$04,$05,$06,q6,6,6
-B7,$04,$01,$07,b7,7,7
-H7,$04,$03,$07,h7,7,7
-S7,$04,$09,$07,s7,7,7
-D7,$04,$0a,$07,d7,7,7
-Q7,$04,$05,$07,q7,7,7
-B8,$04,$01,$08,b8,8,8
-H8,$04,$03,$08,h8,8,8
-S8,$04,$09,$08,s8,8,8
-D8,$04,$0a,$08,d8,8,8
-Q8,$04,$05,$08,q8,8,8
-B9,$04,$01,$09,b9,9,9
-H9,$04,$03,$09,h9,9,9
-S9,$04,$09,$09,s9,9,9
-D9,$04,$0a,$09,d9,9,9
-Q9,$04,$05,$09,q9,9,9
-B10,$04,$01,$0A,b10,10,10
-H10,$04,$03,$0A,h10,10,10
-S10,$04,$09,$0A,s10,10,10
-D10,$04,$0a,$0A,d10,10,10
-Q10,$04,$05,$0A,q10,10,10
-B11,$04,$01,$0B,b11,11,11
-H11,$04,$03,$0B,h11,11,11
-S11,$04,$09,$0B,s11,11,11
-D11,$04,$0a,$0B,d11,11,11
-Q11,$04,$05,$0B,q11,11,11
-B12,$04,$01,$0C,b12,12,12
-H12,$04,$03,$0C,h12,12,12
-S12,$04,$09,$0C,s12,12,12
-D12,$04,$0a,$0C,d12,12,12
-Q12,$04,$05,$0C,q12,12,12
-B13,$04,$01,$0D,b13,13,13
-H13,$04,$03,$0D,h13,13,13
-S13,$04,$09,$0D,s13,13,13
-D13,$04,$0a,$0D,d13,13,13
-Q13,$04,$05,$0D,q13,13,13
-B14,$04,$01,$0E,b14,14,14
-H14,$04,$03,$0E,h14,14,14
-S14,$04,$09,$0E,s14,14,14
-D14,$04,$0a,$0E,d14,14,14
-Q14,$04,$05,$0E,q14,14,14
-B15,$04,$01,$0F,b15,15,15
-H15,$04,$03,$0F,h15,15,15
-S15,$04,$09,$0F,s15,15,15
-D15,$04,$0a,$0F,d15,15,15
-Q15,$04,$05,$0F,q15,15,15
-B16,$04,$01,$10,b16,16,16
-H16,$04,$03,$10,h16,16,16
-S16,$04,$09,$10,s16,16,16
-D16,$04,$0a,$10,d16,16,16
-Q16,$04,$05,$10,q16,16,16
-B17,$04,$01,$11,b17,17,17
-H17,$04,$03,$11,h17,17,17
-S17,$04,$09,$11,s17,17,17
-D17,$04,$0a,$11,d17,17,17
-Q17,$04,$05,$11,q17,17,17
-B18,$04,$01,$12,b18,18,18
-H18,$04,$03,$12,h18,18,18
-S18,$04,$09,$12,s18,18,18
-D18,$04,$0a,$12,d18,18,18
-Q18,$04,$05,$12,q18,18,18
-B19,$04,$01,$13,b19,19,19
-H19,$04,$03,$13,h19,19,19
-S19,$04,$09,$13,s19,19,19
-D19,$04,$0a,$13,d19,19,19
-Q19,$04,$05,$13,q19,19,19
-B20,$04,$01,$14,b20,20,20
-H20,$04,$03,$14,h20,20,20
-S20,$04,$09,$14,s20,20,20
-D20,$04,$0a,$14,d20,20,20
-Q20,$04,$05,$14,q20,20,20
-B21,$04,$01,$15,b21,21,21
-H21,$04,$03,$15,h21,21,21
-S21,$04,$09,$15,s21,21,21
-D21,$04,$0a,$15,d21,21,21
-Q21,$04,$05,$15,q21,21,21
-B22,$04,$01,$16,b22,22,22
-H22,$04,$03,$16,h22,22,22
-S22,$04,$09,$16,s22,22,22
-D22,$04,$0a,$16,d22,22,22
-Q22,$04,$05,$16,q22,22,22
-B23,$04,$01,$17,b23,23,23
-H23,$04,$03,$17,h23,23,23
-S23,$04,$09,$17,s23,23,23
-D23,$04,$0a,$17,d23,23,23
-Q23,$04,$05,$17,q23,23,23
-B24,$04,$01,$18,b24,24,24
-H24,$04,$03,$18,h24,24,24
-S24,$04,$09,$18,s24,24,24
-D24,$04,$0a,$18,d24,24,24
-Q24,$04,$05,$18,q24,24,24
-B25,$04,$01,$19,b25,25,25
-H25,$04,$03,$19,h25,25,25
-S25,$04,$09,$19,s25,25,25
-D25,$04,$0a,$19,d25,25,25
-Q25,$04,$05,$19,q25,25,25
-B26,$04,$01,$1A,b26,26,26
-H26,$04,$03,$1A,h26,26,26
-S26,$04,$09,$1A,s26,26,26
-D26,$04,$0a,$1A,d26,26,26
-Q26,$04,$05,$1A,q26,26,26
-B27,$04,$01,$1B,b27,27,27
-H27,$04,$03,$1B,h27,27,27
-S27,$04,$09,$1B,s27,27,27
-D27,$04,$0a,$1B,d27,27,27
-Q27,$04,$05,$1B,q27,27,27
-B28,$04,$01,$1C,b28,28,28
-H28,$04,$03,$1C,h28,28,28
-S28,$04,$09,$1C,s28,28,28
-D28,$04,$0a,$1C,d28,28,28
-Q28,$04,$05,$1C,q28,28,28
-B29,$04,$01,$1D,b29,29,29
-H29,$04,$03,$1D,h29,29,29
-S29,$04,$09,$1D,s29,29,29
-D29,$04,$0a,$1D,d29,29,29
-Q29,$04,$05,$1D,q29,29,29
-B30,$04,$01,$1E,b30,30,30
-H30,$04,$03,$1E,h30,30,30
-S30,$04,$09,$1E,s30,30,30
-D30,$04,$0a,$1E,d30,30,30
-Q30,$04,$05,$1E,q30,30,30
-B31,$04,$01,$1F,b31,31,31
-H31,$04,$03,$1F,h31,31,31
-S31,$04,$09,$1F,s31,31,31
-D31,$04,$0a,$1F,d31,31,31
-Q31,$04,$05,$1F,q31,31,31
+B0,$04,$01,$00,b0,64,64
+H0,$04,$03,$00,h0,64,64
+S0,$04,$09,$00,s0,64,64
+D0,$04,$0a,$00,d0,64,64
+Q0,$04,$05,$00,q0,64,64
+B1,$04,$01,$01,b1,65,65
+H1,$04,$03,$01,h1,65,65
+S1,$04,$09,$01,s1,65,65
+D1,$04,$0a,$01,d1,65,65
+Q1,$04,$05,$01,q1,65,65
+B2,$04,$01,$02,b2,66,66
+H2,$04,$03,$02,h2,66,66
+S2,$04,$09,$02,s2,66,66
+D2,$04,$0a,$02,d2,66,66
+Q2,$04,$05,$02,q2,66,66
+B3,$04,$01,$03,b3,67,67
+H3,$04,$03,$03,h3,67,67
+S3,$04,$09,$03,s3,67,67
+D3,$04,$0a,$03,d3,67,67
+Q3,$04,$05,$03,q3,67,67
+B4,$04,$01,$04,b4,68,68
+H4,$04,$03,$04,h4,68,68
+S4,$04,$09,$04,s4,68,68
+D4,$04,$0a,$04,d4,68,68
+Q4,$04,$05,$04,q4,68,68
+B5,$04,$01,$05,b5,69,69
+H5,$04,$03,$05,h5,69,69
+S5,$04,$09,$05,s5,69,69
+D5,$04,$0a,$05,d5,69,69
+Q5,$04,$05,$05,q5,69,69
+B6,$04,$01,$06,b6,70,70
+H6,$04,$03,$06,h6,70,70
+S6,$04,$09,$06,s6,70,70
+D6,$04,$0a,$06,d6,70,70
+Q6,$04,$05,$06,q6,70,70
+B7,$04,$01,$07,b7,71,71
+H7,$04,$03,$07,h7,71,71
+S7,$04,$09,$07,s7,71,71
+D7,$04,$0a,$07,d7,71,71
+Q7,$04,$05,$07,q7,71,71
+B8,$04,$01,$08,b8,72,72
+H8,$04,$03,$08,h8,72,72
+S8,$04,$09,$08,s8,72,72
+D8,$04,$0a,$08,d8,72,72
+Q8,$04,$05,$08,q8,72,72
+B9,$04,$01,$09,b9,73,73
+H9,$04,$03,$09,h9,73,73
+S9,$04,$09,$09,s9,73,73
+D9,$04,$0a,$09,d9,73,73
+Q9,$04,$05,$09,q9,73,73
+B10,$04,$01,$0A,b10,74,74
+H10,$04,$03,$0A,h10,74,74
+S10,$04,$09,$0A,s10,74,74
+D10,$04,$0a,$0A,d10,74,74
+Q10,$04,$05,$0A,q10,74,74
+B11,$04,$01,$0B,b11,75,75
+H11,$04,$03,$0B,h11,75,75
+S11,$04,$09,$0B,s11,75,75
+D11,$04,$0a,$0B,d11,75,75
+Q11,$04,$05,$0B,q11,75,75
+B12,$04,$01,$0C,b12,76,76
+H12,$04,$03,$0C,h12,76,76
+S12,$04,$09,$0C,s12,76,76
+D12,$04,$0a,$0C,d12,76,76
+Q12,$04,$05,$0C,q12,76,76
+B13,$04,$01,$0D,b13,77,77
+H13,$04,$03,$0D,h13,77,77
+S13,$04,$09,$0D,s13,77,77
+D13,$04,$0a,$0D,d13,77,77
+Q13,$04,$05,$0D,q13,77,77
+B14,$04,$01,$0E,b14,78,78
+H14,$04,$03,$0E,h14,78,78
+S14,$04,$09,$0E,s14,78,78
+D14,$04,$0a,$0E,d14,78,78
+Q14,$04,$05,$0E,q14,78,78
+B15,$04,$01,$0F,b15,79,79
+H15,$04,$03,$0F,h15,79,79
+S15,$04,$09,$0F,s15,79,79
+D15,$04,$0a,$0F,d15,79,79
+Q15,$04,$05,$0F,q15,79,79
+B16,$04,$01,$10,b16,80,80
+H16,$04,$03,$10,h16,80,80
+S16,$04,$09,$10,s16,80,80
+D16,$04,$0a,$10,d16,80,80
+Q16,$04,$05,$10,q16,80,80
+B17,$04,$01,$11,b17,81,81
+H17,$04,$03,$11,h17,81,81
+S17,$04,$09,$11,s17,81,81
+D17,$04,$0a,$11,d17,81,81
+Q17,$04,$05,$11,q17,81,81
+B18,$04,$01,$12,b18,82,82
+H18,$04,$03,$12,h18,82,82
+S18,$04,$09,$12,s18,82,82
+D18,$04,$0a,$12,d18,82,82
+Q18,$04,$05,$12,q18,82,82
+B19,$04,$01,$13,b19,83,83
+H19,$04,$03,$13,h19,83,83
+S19,$04,$09,$13,s19,83,83
+D19,$04,$0a,$13,d19,83,83
+Q19,$04,$05,$13,q19,83,83
+B20,$04,$01,$14,b20,84,84
+H20,$04,$03,$14,h20,84,84
+S20,$04,$09,$14,s20,84,84
+D20,$04,$0a,$14,d20,84,84
+Q20,$04,$05,$14,q20,84,84
+B21,$04,$01,$15,b21,85,85
+H21,$04,$03,$15,h21,85,85
+S21,$04,$09,$15,s21,85,85
+D21,$04,$0a,$15,d21,85,85
+Q21,$04,$05,$15,q21,85,85
+B22,$04,$01,$16,b22,86,86
+H22,$04,$03,$16,h22,86,86
+S22,$04,$09,$16,s22,86,86
+D22,$04,$0a,$16,d22,86,86
+Q22,$04,$05,$16,q22,86,86
+B23,$04,$01,$17,b23,87,87
+H23,$04,$03,$17,h23,87,87
+S23,$04,$09,$17,s23,87,87
+D23,$04,$0a,$17,d23,87,87
+Q23,$04,$05,$17,q23,87,87
+B24,$04,$01,$18,b24,88,88
+H24,$04,$03,$18,h24,88,88
+S24,$04,$09,$18,s24,88,88
+D24,$04,$0a,$18,d24,88,88
+Q24,$04,$05,$18,q24,88,88
+B25,$04,$01,$19,b25,89,89
+H25,$04,$03,$19,h25,89,89
+S25,$04,$09,$19,s25,89,89
+D25,$04,$0a,$19,d25,89,89
+Q25,$04,$05,$19,q25,89,89
+B26,$04,$01,$1A,b26,90,90
+H26,$04,$03,$1A,h26,90,90
+S26,$04,$09,$1A,s26,90,90
+D26,$04,$0a,$1A,d26,90,90
+Q26,$04,$05,$1A,q26,90,90
+B27,$04,$01,$1B,b27,91,91
+H27,$04,$03,$1B,h27,91,91
+S27,$04,$09,$1B,s27,91,91
+D27,$04,$0a,$1B,d27,91,91
+Q27,$04,$05,$1B,q27,91,91
+B28,$04,$01,$1C,b28,92,92
+H28,$04,$03,$1C,h28,92,92
+S28,$04,$09,$1C,s28,92,92
+D28,$04,$0a,$1C,d28,92,92
+Q28,$04,$05,$1C,q28,92,92
+B29,$04,$01,$1D,b29,93,93
+H29,$04,$03,$1D,h29,93,93
+S29,$04,$09,$1D,s29,93,93
+D29,$04,$0a,$1D,d29,93,93
+Q29,$04,$05,$1D,q29,93,93
+B30,$04,$01,$1E,b30,94,94
+H30,$04,$03,$1E,h30,94,94
+S30,$04,$09,$1E,s30,94,94
+D30,$04,$0a,$1E,d30,94,94
+Q30,$04,$05,$1E,q30,94,94
+B31,$04,$01,$1F,b31,95,95
+H31,$04,$03,$1F,h31,95,95
+S31,$04,$09,$1F,s31,95,95
+D31,$04,$0a,$1F,d31,95,95
+Q31,$04,$05,$1F,q31,95,95
 
 NZCV,$05,$00,$00,nzcv,0,0
-
+FPCR,$05,$00,$01,fpcr,0,0
+FPSR,$05,$00,$02,fpsr,0,0
+TPIDR_EL0,$05,$00,$03,tpidr_el0,0,0

+ 462 - 1125
compiler/aarch64/aasmcpu.pas

@@ -1,7 +1,7 @@
 {
     Copyright (c) 2003-2012 by Florian Klaempfl and others
 
-    Contains the assembler object for ARM64
+    Contains the assembler object for Aarch64
 
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -149,9 +149,6 @@ uses
 
       pinsentry=^tinsentry;
 
-{    const
-      InsTab : array[0..instabentries-1] of TInsEntry={$i a64tab.inc} }
-
     var
       InsTabCache : PInsTabCache;
 
@@ -159,6 +156,7 @@ uses
       taicpu = class(tai_cpu_abstract_sym)
          oppostfix : TOpPostfix;
          procedure loadshifterop(opidx:longint;const so:tshifterop);
+         procedure loadconditioncode(opidx: longint; const c: tasmcond);
          constructor op_none(op : tasmop);
 
          constructor op_reg(op : tasmop;_op1 : tregister);
@@ -167,15 +165,21 @@ uses
 
          constructor op_reg_reg(op : tasmop;_op1,_op2 : tregister);
          constructor op_reg_ref(op : tasmop;_op1 : tregister;const _op2 : treference);
+         constructor op_reg_cond(op: tasmop; _op1: tregister; _op2: tasmcond);
          constructor op_reg_const(op:tasmop; _op1: tregister; _op2: aint);
+         constructor op_reg_const_shifterop(op : tasmop;_op1: tregister; _op2: aint;_op3 : tshifterop);
 
          constructor op_reg_reg_reg(op : tasmop;_op1,_op2,_op3 : tregister);
          constructor op_reg_reg_reg_reg(op : tasmop;_op1,_op2,_op3,_op4 : tregister);
          constructor op_reg_reg_const(op : tasmop;_op1,_op2 : tregister; _op3: aint);
+         constructor op_reg_reg_const_const(op : tasmop;_op1,_op2 : tregister; _op3, _op4: aint);
+         constructor op_reg_reg_const_shifterop(op : tasmop;_op1,_op2 : tregister; _op3: aint; const _op4 : tshifterop);
          constructor op_reg_reg_sym_ofs(op : tasmop;_op1,_op2 : tregister; _op3: tasmsymbol;_op3ofs: longint);
          constructor op_reg_reg_ref(op : tasmop;_op1,_op2 : tregister; const _op3: treference);
          constructor op_reg_reg_shifterop(op : tasmop;_op1,_op2 : tregister;_op3 : tshifterop);
-         constructor op_reg_reg_reg_shifterop(op : tasmop;_op1,_op2,_op3 : tregister;_op4 : tshifterop);
+         constructor op_reg_reg_reg_shifterop(op : tasmop;_op1,_op2,_op3 : tregister; const _op4 : tshifterop);
+         constructor op_reg_reg_reg_cond(op : tasmop;_op1,_op2,_op3 : tregister; const _op4: tasmcond);
+
 
          { this is for Jmp instructions }
          constructor op_cond_sym(op : tasmop;cond:TAsmCond;_op1 : tasmsymbol);
@@ -188,6 +192,7 @@ uses
          function is_same_reg_move(regtype: Tregistertype):boolean; override;
 
          function spilling_get_operation_type(opnr: longint): topertype;override;
+         function spilling_get_operation_type_ref(opnr: longint; reg: tregister): topertype;override;
 
          { assembler }
       public
@@ -203,28 +208,29 @@ uses
          procedure ppuwriteoper(ppufile:tcompilerppufile;const o:toper);override;
          procedure ppubuildderefimploper(var o:toper);override;
          procedure ppuderefoper(var o:toper);override;
-      private
-         { next fields are filled in pass1, so pass2 is faster }
-         inssize   : shortint;
-         insoffset : longint;
-         LastInsOffset : longint; { need to be public to be reset }
-         insentry  : PInsEntry;
-         function  InsEnd:longint;
-         procedure create_ot(objdata:TObjData);
-         function  Matches(p:PInsEntry):longint;
-         function  calcsize(p:PInsEntry):shortint;
-         procedure gencode(objdata:TObjData);
-         function  NeedAddrPrefix(opidx:byte):boolean;
-         procedure Swapoperands;
-         function  FindInsentry(objdata:TObjData):boolean;
       end;
 
       tai_align = class(tai_align_abstract)
         { nothing to add }
       end;
 
-    function spilling_create_load(const ref:treference;r:tregister):Taicpu;
-    function spilling_create_store(r:tregister; const ref:treference):Taicpu;
+    type
+      tsimplereftype =
+         { valid reference }
+        (sr_simple,
+         { invalid reference, should not be generated by the code generator (but
+           can be encountered via inline assembly, where it must be rejected) }
+         sr_internal_illegal,
+         { invalid reference, may be generated by the code generator and then
+           must be simplified (also rejected in inline assembly) }
+         sr_complex);
+
+    function simple_ref_type(op: tasmop; size:tcgsize; oppostfix: toppostfix; const ref: treference): tsimplereftype;
+    function can_be_shifter_operand(opc: tasmop; opnr: longint): boolean;
+    function valid_shifter_operand(opc: tasmop; useszr, usessp, is64bit: boolean; sm: tshiftmode; shiftimm: longint): boolean;
+
+    function spilling_create_load(const ref: treference; r: tregister): taicpu;
+    function spilling_create_store(r: tregister; const ref: treference): taicpu;
 
     function setoppostfix(i : taicpu;pf : toppostfix) : taicpu;
     function setcondition(i : taicpu;c : tasmcond) : taicpu;
@@ -261,6 +267,21 @@ implementation
       end;
 
 
+    procedure taicpu.loadconditioncode(opidx: longint; const c: tasmcond);
+      begin
+        allocate_oper(opidx+1);
+        with oper[opidx]^ do
+          begin
+            if typ<>top_conditioncode then
+              begin
+                clearop(opidx);
+              end;
+            cc:=c;
+            typ:=top_conditioncode;
+          end;
+      end;
+
+
 {*****************************************************************************
                                  taicpu Constructors
 *****************************************************************************}
@@ -314,6 +335,16 @@ implementation
       end;
 
 
+    constructor taicpu.op_reg_const_shifterop(op: tasmop; _op1: tregister; _op2: aint; _op3: tshifterop);
+      begin
+        inherited create(op);
+        ops:=3;
+        loadreg(0,_op1);
+        loadconst(1,_op2);
+        loadshifterop(2,_op3);
+      end;
+
+
     constructor taicpu.op_reg_ref(op : tasmop;_op1 : tregister;const _op2 : treference);
       begin
          inherited create(op);
@@ -323,6 +354,15 @@ implementation
       end;
 
 
+    constructor taicpu.op_reg_cond(op: tasmop; _op1: tregister; _op2: tasmcond);
+      begin
+        inherited create(op);
+        ops:=2;
+        loadreg(0,_op1);
+        loadconditioncode(1,_op2);
+      end;
+
+
     constructor taicpu.op_reg_reg_reg(op : tasmop;_op1,_op2,_op3 : tregister);
       begin
          inherited create(op);
@@ -354,6 +394,28 @@ implementation
       end;
 
 
+     constructor taicpu.op_reg_reg_const_const(op: tasmop; _op1, _op2: tregister; _op3, _op4: aint);
+       begin
+         inherited create(op);
+         ops:=4;
+         loadreg(0,_op1);
+         loadreg(1,_op2);
+         loadconst(2,aint(_op3));
+         loadconst(3,aint(_op4));
+       end;
+
+
+     constructor taicpu.op_reg_reg_const_shifterop(op: tasmop; _op1, _op2: tregister; _op3: aint; const _op4: tshifterop);
+       begin
+         inherited create(op);
+         ops:=4;
+         loadreg(0,_op1);
+         loadreg(1,_op2);
+         loadconst(2,aint(_op3));
+         loadshifterop(3,_op4);
+       end;
+
+
      constructor taicpu.op_reg_reg_sym_ofs(op : tasmop;_op1,_op2 : tregister; _op3: tasmsymbol;_op3ofs: longint);
        begin
          inherited create(op);
@@ -384,7 +446,7 @@ implementation
       end;
 
 
-     constructor taicpu.op_reg_reg_reg_shifterop(op : tasmop;_op1,_op2,_op3 : tregister;_op4 : tshifterop);
+     constructor taicpu.op_reg_reg_reg_shifterop(op : tasmop;_op1,_op2,_op3 : tregister; const _op4 : tshifterop);
       begin
          inherited create(op);
          ops:=4;
@@ -394,6 +456,16 @@ implementation
          loadshifterop(3,_op4);
       end;
 
+     constructor taicpu.op_reg_reg_reg_cond(op: tasmop; _op1, _op2, _op3: tregister; const _op4: tasmcond);
+       begin
+         inherited create(op);
+         ops:=4;
+         loadreg(0,_op1);
+         loadreg(1,_op2);
+         loadreg(2,_op3);
+         loadconditioncode(3,_op4);
+       end;
+
 
     constructor taicpu.op_cond_sym(op : tasmop;cond:TAsmCond;_op1 : tasmsymbol);
       begin
@@ -454,85 +526,403 @@ implementation
       end;
 
 
-    function spilling_create_load(const ref:treference;r:tregister):Taicpu;
+    function spilling_create_op(op: tasmop; const ref: treference; r: tregister): taicpu;
+      const
+        { invalid sizes for aarch64 are 0 }
+        subreg2bytesize: array[TSubRegister] of byte =
+          (0,0,0,0,4,8,0,0,0,4,8,0,0,0);
       var
-        op: tasmop;
+        scalefactor: byte;
       begin
+        scalefactor:=subreg2bytesize[getsubreg(r)];
+        if scalefactor=0 then
+          internalerror(2014120301);
+        if (ref.offset>4095*scalefactor) or
+           ((ref.offset>255) and
+            ((ref.offset mod scalefactor)<>0)) or
+           (ref.offset<-256) then
+          internalerror(2014120302);
         case getregtype(r) of
-          R_INTREGISTER :
-            result:=taicpu.op_reg_ref(A_LDR,r,ref);
-          R_MMREGISTER :
-            begin
-              case getsubreg(r) of
-                R_SUBFD:
-                  op:=A_LDR;
-                R_SUBFS:
-                  op:=A_LDR;
-                else
-                  internalerror(2009112905);
-              end;
-              result:=taicpu.op_reg_ref(op,r,ref);
-            end;
+          R_INTREGISTER,
+          R_MMREGISTER:
+            result:=taicpu.op_reg_ref(op,r,ref);
           else
             internalerror(200401041);
         end;
       end;
 
 
-    function spilling_create_store(r:tregister; const ref:treference):Taicpu;
+    function is_valid_load_symbol(op: tasmop; oppostfix: toppostfix; const ref: treference): tsimplereftype;
+      begin
+        result:=sr_complex;
+        if not assigned(ref.symboldata) and
+           not(ref.refaddr in [addr_gotpageoffset,addr_gotpage,addr_pageoffset,addr_page]) then
+          exit;
+        { can't use pre-/post-indexed mode here (makes no sense either) }
+        if ref.addressmode<>AM_OFFSET then
+          exit;
+        { "ldr literal" must be a 32/64 bit LDR and have a symbol }
+        if assigned(ref.symboldata) and
+           ((op<>A_LDR) or
+            not(oppostfix in [PF_NONE,PF_W,PF_SW]) or
+            not assigned(ref.symbol)) then
+          exit;
+        { if this is a (got) page offset load, we must have a base register and a
+          symbol }
+        if (ref.refaddr in [addr_gotpageoffset,addr_pageoffset]) and
+           (not assigned(ref.symbol) or
+            (ref.base=NR_NO) or
+            (ref.index<>NR_NO) or
+            (ref.offset<>0)) then
+          begin
+            result:=sr_internal_illegal;
+            exit;
+          end;
+        { cannot have base or index register (we generate these kind of
+          references internally, they should never end up here with an
+          extra base or offset) }
+        if (ref.refaddr in [addr_gotpage,addr_page]) and
+           (ref.base<>NR_NO) or
+           (ref.index<>NR_NO) then
+          begin
+            result:=sr_internal_illegal;
+            exit;
+          end;
+        result:=sr_simple;
+      end;
+
+
+    function simple_ref_type(op: tasmop; size:tcgsize; oppostfix: toppostfix; const ref: treference): tsimplereftype;
       var
-        op: tasmop;
+        maxoffs: asizeint;
+        accesssize: longint;
       begin
-        case getregtype(r) of
-          R_INTREGISTER :
-            result:=taicpu.op_reg_ref(A_STR,r,ref);
-          R_MMREGISTER :
-            begin
-              case getsubreg(r) of
-                R_SUBFD:
-                  op:=A_STR;
-                R_SUBFS:
-                  op:=A_STR;
+        result:=sr_internal_illegal;
+        { post-indexed is only allowed for vector and immediate loads/stores }
+        if (ref.addressmode=AM_POSTINDEXED) and
+           not(op in [A_LD1,A_LD2,A_LD3,A_LD4,A_ST1,A_ST2,A_ST3,A_ST4]) and
+           (not(op in [A_LDR,A_STR,A_LDP,A_STP]) or
+            (ref.base=NR_NO) or
+            (ref.index<>NR_NO)) then
+          exit;
+
+        { can only have a shift mode if we have an index }
+        if (ref.index=NR_NO) and
+           (ref.shiftmode<>SM_None) then
+          exit;
+
+        { the index can never be the stack pointer }
+        if ref.index=NR_SP then
+          exit;
+
+        { no instruction supports an index without a base }
+        if (ref.base=NR_NO) and
+           (ref.index<>NR_NO) then
+          begin
+            result:=sr_complex;
+            exit;
+          end;
+
+        { LDR literal or GOT entry: 32 or 64 bit, label }
+        if assigned(ref.symboldata) or
+           assigned(ref.symbol) then
+          begin
+            { we generate these kind of references internally; at least for now,
+              they should never end up here with an extra base or offset or so }
+            result:=is_valid_load_symbol(op,oppostfix,ref);
+            exit;
+          end;
+
+        { any other reference cannot be gotpage/gotpageoffset/pic }
+        if ref.refaddr in [addr_gotpage,addr_gotpageoffset,addr_page,addr_pageoffset,addr_pic] then
+          exit;
+
+        { base & index:
+            * index cannot be the stack pointer
+            * offset must be 0
+            * can scale with the size of the access
+            * can zero/sign extend 32 bit index register, and/or multiple by
+              access size
+            * no pre/post-indexing
+        }
+        if (ref.base<>NR_NO) and
+           (ref.index<>NR_NO) then
+          begin
+            if ref.addressmode in [AM_PREINDEXED,AM_POSTINDEXED] then
+              exit;
+            case op of
+              { this holds for both integer and fpu/vector loads }
+              A_LDR,A_STR:
+                if (ref.offset=0) and
+                   (((ref.shiftmode=SM_None) and
+                     (ref.shiftimm=0)) or
+                    ((ref.shiftmode in [SM_LSL,SM_UXTW,SM_SXTW]) and
+                     (ref.shiftimm=tcgsizep2size[size]))) then
+                  result:=sr_simple
                 else
-                  internalerror(2009112904);
-              end;
-              result:=taicpu.op_reg_ref(op,r,ref);
+                  result:=sr_complex;
+              { todo }
+              A_LD1,A_LD2,A_LD3,A_LD4,
+              A_ST1,A_ST2,A_ST3,A_ST4:
+                internalerror(2014110704);
+              { these don't support base+index }
+              A_LDUR,A_STUR,
+              A_LDP,A_STP:
+                result:=sr_complex;
+              else
+                { nothing: result is already sr_internal_illegal };
             end;
+            exit;
+          end;
+
+        { base + immediate offset. Variants:
+            * LDR*/STR*:
+              - pre- or post-indexed with signed 9 bit immediate
+              - regular with unsiged scaled immediate (multiple of access
+                size), in the range 0 to (12 bit * access_size)-1
+            * LDP/STP
+              - pre- or post-indexed with signed 9 bit immediate
+              - regular with signed 9 bit immediate
+            * LDUR*/STUR*:
+              - regular with signed 9 bit immediate
+        }
+        if ref.base<>NR_NO then
+          begin
+            accesssize:=1 shl tcgsizep2size[size];
+            case op of
+              A_LDR,A_STR:
+                begin
+                  if (ref.addressmode=AM_OFFSET) and
+                     (ref.offset>=0) and
+                     (ref.offset<(((1 shl 12)-1)*accesssize)) and
+                     ((ref.offset mod accesssize)=0) then
+                    result:=sr_simple
+                  else if (ref.offset>=-256) and
+                     (ref.offset<=255) then
+                    begin
+                      { non pre-/post-indexed regular loads/stores can only be
+                        performed using LDUR/STUR }
+                      if ref.addressmode in [AM_PREINDEXED,AM_POSTINDEXED] then
+                        result:=sr_simple
+                      else
+                        result:=sr_complex
+                    end
+                  else
+                    result:=sr_complex;
+                end;
+              A_LDP,A_LDNP,
+              A_STP,A_STNP:
+                begin
+                  { only supported for 32/64 bit }
+                  if not(oppostfix in [PF_W,PF_SW,PF_None]) then
+                    exit;
+                  { offset must be a multple of the access size }
+                  if (ref.offset mod accesssize)<>0 then
+                    exit;
+                  { offset must fit in a signed 7 bit offset }
+                  if (ref.offset>=-(1 shl (6+tcgsizep2size[size]))) and
+                     (ref.offset<=(1 shl (6+tcgsizep2size[size]))-1) then
+                    result:=sr_simple
+                  else
+                    result:=sr_complex;
+                end;
+              A_LDUR,A_STUR:
+                begin
+                  if (ref.addressmode=AM_OFFSET) and
+                     (ref.offset>=-256) and
+                     (ref.offset<=255) then
+                    result:=sr_simple
+                  else
+                    result:=sr_complex;
+                end;
+              { todo }
+              A_LD1,A_LD2,A_LD3,A_LD4,
+              A_ST1,A_ST2,A_ST3,A_ST4:
+                internalerror(2014110907);
+              A_LDAR,
+              A_LDAXR,
+              A_LDXR,
+              A_LDXP,
+              A_STLR,
+              A_STLXR,
+              A_STLXP,
+              A_STXP,
+              A_STXR:
+                begin
+                  if (ref.addressmode=AM_OFFSET) and
+                     (ref.offset=0) then
+                    result:=sr_simple;
+                end
+              else
+                { nothing: result is already sr_internal_illegal };
+            end;
+            exit;
+          end;
+        { absolute addresses are not supported, have to load them first into
+          a register }
+        result:=sr_complex;
+      end;
+
+
+    function can_be_shifter_operand(opc: tasmop; opnr: longint): boolean;
+      begin
+        case opc of
+          A_ADD,
+          A_AND,
+          A_EON,
+          A_EOR,
+          A_ORN,
+          A_ORR,
+          A_SUB:
+            result:=opnr=3;
+          A_BIC,
+          A_CMN,
+          A_CMP,
+          A_MOVK,
+          A_MOVZ,
+          A_MOVN,
+          A_MVN,
+          A_NEG,
+          A_TST:
+            result:=opnr=2;
           else
-            internalerror(200401041);
+            result:=false;
         end;
       end;
 
 
+    function valid_shifter_operand(opc: tasmop; useszr, usessp, is64bit: boolean; sm: tshiftmode; shiftimm: longint): boolean;
+      begin
+        case opc of
+          A_ADD,
+          A_SUB,
+          A_NEG,
+          A_AND,
+          A_TST,
+          A_CMN,
+          A_CMP:
+            begin
+              result:=false;
+              if not useszr then
+                result:=
+                  (sm in shiftedregmodes) and
+                  ((shiftimm in [0..31]) or
+                   (is64bit and
+                    (shiftimm in [32..63])));
+              if not usessp then
+                result:=
+                  result or
+                  ((sm in extendedregmodes) and
+                   (shiftimm in [0..4]));
+            end;
+          A_BIC,
+          A_EON,
+          A_EOR,
+          A_MVN,
+          A_ORN,
+          A_ORR:
+            result:=
+              (sm in shiftedregmodes) and
+              (shiftimm in [0..31*(ord(is64bit)+1)+ord(is64bit)]);
+          A_MOVK,
+          A_MOVZ,
+          A_MOVN:
+            result:=
+              (sm=SM_LSL) and
+              ((shiftimm in [0,16]) or
+               (is64bit and
+                (shiftimm in [32,48])));
+          else
+            result:=false;
+        end;
+      end;
+
+
+    function spilling_create_load(const ref: treference; r: tregister): taicpu;
+      var
+        op: tasmop;
+      begin
+        if (ref.index<>NR_NO) or
+           (ref.offset<-256) or
+           (ref.offset>255) then
+          op:=A_LDR
+        else
+          op:=A_LDUR;
+        result:=spilling_create_op(op,ref,r);
+      end;
+
+
+    function spilling_create_store(r: tregister; const ref: treference): taicpu;
+      var
+        op: tasmop;
+      begin
+        if (ref.index<>NR_NO) or
+           (ref.offset<-256) or
+           (ref.offset>255) then
+          op:=A_STR
+        else
+          op:=A_STUR;
+        result:=spilling_create_op(op,ref,r);
+      end;
+
+
     function taicpu.spilling_get_operation_type(opnr: longint): topertype;
       begin
         case opcode of
-          A_ADC,A_ADD,A_AND,A_BIC,
-          A_EOR,A_CLZ,A_RBIT,
-          A_LDR,
-          A_MOV,A_MVN,A_MUL,
-          A_ORR,A_SBC,A_SUB,
-          A_UXT,A_SXT:
+          A_B,A_BL,
+          A_CMN,A_CMP,
+          A_CCMN,A_CCMP,
+          A_TST:
+            result:=operand_read;
+          A_STR,A_STUR:
             if opnr=0 then
-              result:=operand_write
+              result:=operand_read
             else
+              { check for pre/post indexed in spilling_get_operation_type_ref }
               result:=operand_read;
-          A_B,A_BL,
-          A_CMN,A_CMP,A_TST:
-            result:=operand_read;
-          A_STR:
-            { important is what happens with the involved registers }
+          A_STLXP,
+          A_STLXR,
+          A_STXP,
+          A_STXR:
             if opnr=0 then
-              result := operand_read
+              result:=operand_write
             else
-              { check for pre/post indexed }
-              result := operand_read;
-          else
-            internalerror(200403151);
+              result:=operand_read;
+          A_STP:
+            begin
+              if opnr in [0,1] then
+                result:=operand_read
+              else
+                { check for pre/post indexed in spilling_get_operation_type_ref }
+                result:=operand_read;
+            end;
+           A_LDP,
+           A_LDXP:
+             begin
+               if opnr in [0,1] then
+                 result:=operand_write
+               else
+                 { check for pre/post indexed in spilling_get_operation_type_ref }
+                 result:=operand_read;
+             end;
+           else
+             if opnr=0 then
+               result:=operand_write
+             else
+               result:=operand_read;
         end;
       end;
 
 
+    function taicpu.spilling_get_operation_type_ref(opnr: longint; reg: tregister): topertype;
+      begin
+        result:=operand_read;
+        if (oper[opnr]^.ref^.base = reg) and
+          (oper[opnr]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) then
+           result:=operand_readwrite;
+      end;
+
+
     procedure BuildInsTabCache;
       var
         i : longint;
@@ -1069,22 +1459,12 @@ implementation
         { we need to reset everything here, because the choosen insentry
           can be invalid for a new situation where the previously optimized
           insentry is not correct }
-        InsEntry:=nil;
-        InsSize:=0;
-        LastInsOffset:=-1;
       end;
 
 
     procedure taicpu.ResetPass2;
       begin
         { we are here in a second pass, check if the instruction can be optimized }
-        if assigned(InsEntry) and
-           ((InsEntry^.flags and IF_PASS2)<>0) then
-         begin
-           InsEntry:=nil;
-           InsSize:=0;
-         end;
-        LastInsOffset:=-1;
       end;
 
 
@@ -1097,18 +1477,15 @@ implementation
     function taicpu.Pass1(objdata:TObjData):longint;
       begin
         Pass1:=0;
-        LastInsOffset:=-1;
       end;
 
 
     procedure taicpu.Pass2(objdata:TObjData);
       begin
         { error in pass1 ? }
-        if insentry=nil then
-         exit;
         current_filepos:=fileinfo;
         { Generate the instruction }
-        GenCode(objdata);
+        { GenCode(objdata); }
       end;
 
 
@@ -1132,1046 +1509,6 @@ implementation
       end;
 
 
-    function  taicpu.InsEnd:longint;
-      begin
-        Result:=0; { unimplemented }
-      end;
-
-
-    procedure taicpu.create_ot(objdata:TObjData);
-      begin
-      end;
-
-
-    function taicpu.Matches(p:PInsEntry):longint;
-      begin
-      end;
-
-
-    function  taicpu.calcsize(p:PInsEntry):shortint;
-      begin
-        result:=4;
-      end;
-
-
-    function  taicpu.NeedAddrPrefix(opidx:byte):boolean;
-      begin
-        Result:=False; { unimplemented }
-      end;
-
-
-    procedure taicpu.Swapoperands;
-      begin
-      end;
-
-
-    function taicpu.FindInsentry(objdata:TObjData):boolean;
-      begin
-      end;
-
-
-    procedure taicpu.gencode(objdata:TObjData);
-      var
-        bytes : dword;
-        i_field : byte;
-
-      procedure setshifterop(op : byte);
-        begin
-          case oper[op]^.typ of
-            top_const:
-              begin
-                i_field:=1;
-                bytes:=bytes or dword(oper[op]^.val and $fff);
-              end;
-            top_reg:
-              begin
-                i_field:=0;
-                bytes:=bytes or (getsupreg(oper[op]^.reg) shl 16);
-
-                { does a real shifter op follow? }
-                if (op+1<=op) and (oper[op+1]^.typ=top_shifterop) then
-                  begin
-                  end;
-              end;
-          else
-            internalerror(2005091103);
-          end;
-        end;
-
-      begin
-        bytes:=$0;
-        { evaluate and set condition code }
-
-        { condition code allowed? }
-
-        { setup rest of the instruction }
-        case insentry^.code[0] of
-          #$08:
-            begin
-              { set instruction code }
-              bytes:=bytes or (ord(insentry^.code[1]) shl 26);
-              bytes:=bytes or (ord(insentry^.code[2]) shl 21);
-
-              { set destination }
-              bytes:=bytes or (getsupreg(oper[0]^.reg) shl 12);
-
-              { create shifter op }
-              setshifterop(1);
-
-              { set i field }
-              bytes:=bytes or (i_field shl 25);
-
-              { set s if necessary }
-              if oppostfix=PF_S then
-                bytes:=bytes or (1 shl 20);
-            end;
-          #$ff:
-            internalerror(2005091101);
-          else
-            internalerror(2005091102);
-        end;
-        { we're finished, write code }
-        objdata.writebytes(bytes,sizeof(bytes));
-      end;
-
-
-{$ifdef dummy}
-(*
-static void gencode (long segment, long offset, int bits,
-                     insn *ins, char *codes, long insn_end)
-{
-    int has_S_code;             /* S - setflag */
-    int has_B_code;             /* B - setflag */
-    int has_T_code;             /* T - setflag */
-    int has_W_code;             /* ! => W flag */
-    int has_F_code;             /* ^ => S flag */
-    int keep;
-    unsigned char c;
-    unsigned char bytes[4];
-    long          data, size;
-    static int cc_code[] =      /* bit pattern of cc */
-  {                             /* order as enum in  */
-    0x0E, 0x03, 0x02, 0x00,     /* nasm.h            */
-    0x0A, 0x0C, 0x08, 0x0D,
-    0x09, 0x0B, 0x04, 0x01,
-    0x05, 0x07, 0x06,
-  };
-
-
-#ifdef DEBUG
-static char *CC[] =
-  {                                    /* condition code names */
-    "AL", "CC", "CS", "EQ",
-    "GE", "GT", "HI", "LE",
-    "LS", "LT", "MI", "NE",
-    "PL", "VC", "VS", "",
-    "S"
-};
-
-
-    has_S_code = (ins->condition & C_SSETFLAG);
-    has_B_code = (ins->condition & C_BSETFLAG);
-    has_T_code = (ins->condition & C_TSETFLAG);
-    has_W_code = (ins->condition & C_EXSETFLAG);
-    has_F_code = (ins->condition & C_FSETFLAG);
-    ins->condition = (ins->condition & 0x0F);
-
-
-    if (rt_debug)
-      {
-    printf ("gencode: instruction: %s%s", insn_names[ins->opcode],
-            CC[ins->condition & 0x0F]);
-    if (has_S_code)
-      printf ("S");
-    if (has_B_code)
-      printf ("B");
-    if (has_T_code)
-      printf ("T");
-    if (has_W_code)
-      printf ("!");
-    if (has_F_code)
-      printf ("^");
-
-    printf ("\n");
-
-    c = *codes;
-
-    printf ("   (%d)  decode - '0x%02X'\n", ins->operands, c);
-
-
-    bytes[0] = 0xB;
-    bytes[1] = 0xE;
-    bytes[2] = 0xE;
-    bytes[3] = 0xF;
-      }
-
-    // First condition code in upper nibble
-    if (ins->condition < C_NONE)
-      {
-        c = cc_code[ins->condition] << 4;
-      }
-    else
-      {
-        c = cc_code[C_AL] << 4; // is often ALWAYS but not always
-      }
-
-
-    switch (keep = *codes)
-      {
-        case 1:
-          // B, BL
-          ++codes;
-          c |= *codes++;
-          bytes[0] = c;
-
-          if (ins->oprs[0].segment != segment)
-            {
-              // fais une relocation
-              c = 1;
-              data = 0; // Let the linker locate ??
-            }
-          else
-            {
-              c = 0;
-              data = ins->oprs[0].offset - (offset + 8);
-
-              if (data % 4)
-                {
-                  errfunc (ERR_NONFATAL, "offset not aligned on 4 bytes");
-                }
-            }
-
-          if (data >= 0x1000)
-            {
-              errfunc (ERR_NONFATAL, "too long offset");
-            }
-
-          data = data >> 2;
-          bytes[1] = (data >> 16) & 0xFF;
-          bytes[2] = (data >> 8)  & 0xFF;
-          bytes[3] = (data )      & 0xFF;
-
-          if (c == 1)
-            {
-//            out (offset, segment, &bytes[0], OUT_RAWDATA+1, NO_SEG, NO_SEG);
-              out (offset, segment, &bytes[0], OUT_REL3ADR+4, ins->oprs[0].segment, NO_SEG);
-            }
-          else
-            {
-              out (offset, segment, &bytes[0], OUT_RAWDATA+4, NO_SEG, NO_SEG);
-            }
-          return;
-
-        case 2:
-          // SWI
-          ++codes;
-          c |= *codes++;
-          bytes[0] = c;
-          data = ins->oprs[0].offset;
-          bytes[1] = (data >> 16) & 0xFF;
-          bytes[2] = (data >> 8) & 0xFF;
-          bytes[3] = (data) & 0xFF;
-          out (offset, segment, &bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
-          return;
-        case 3:
-          // BX
-          ++codes;
-          c |= *codes++;
-          bytes[0] = c;
-          bytes[1] = *codes++;
-          bytes[2] = *codes++;
-          bytes[3] = *codes++;
-          c = regval (&ins->oprs[0],1);
-          if (c == 15)  // PC
-            {
-              errfunc (ERR_WARNING, "'BX' with R15 has undefined behaviour");
-            }
-          else if (c > 15)
-            {
-              errfunc (ERR_NONFATAL, "Illegal register specified for 'BX'");
-            }
-
-          bytes[3] |= (c & 0x0F);
-          out (offset, segment, bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
-          return;
-
-        case 4:         // AND Rd,Rn,Rm
-        case 5:         // AND Rd,Rn,Rm,<shift>Rs
-        case 6:         // AND Rd,Rn,Rm,<shift>imm
-        case 7:         // AND Rd,Rn,<shift>imm
-          ++codes;
-#ifdef DEBUG
-          if (rt_debug)
-            {
-              printf ("         decode - '0x%02X'\n", keep);
-              printf ("           code - '0x%02X'\n", (unsigned char) ( *codes));
-            }
-#endif
-          bytes[0] = c | *codes;
-          ++codes;
-
-          bytes[1] = *codes;
-          if (has_S_code)
-            bytes[1] |= 0x10;
-          c = regval (&ins->oprs[1],1);
-          // Rn in low nibble
-          bytes[1] |= c;
-
-          // Rd in high nibble
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-
-          if (keep != 7)
-            {
-              // Rm in low nibble
-              bytes[3] = regval (&ins->oprs[2],1);
-            }
-
-          // Shifts if any
-          if (keep == 5 || keep == 6)
-            {
-              // Shift in bytes 2 and 3
-              if (keep == 5)
-                {
-                  // Rs
-                  c = regval (&ins->oprs[3],1);
-                  bytes[2] |= c;
-
-                  c = 0x10;             // Set bit 4 in byte[3]
-                }
-              if (keep == 6)
-                {
-                  c = (ins->oprs[3].offset) & 0x1F;
-
-                  // #imm
-                  bytes[2] |= c >> 1;
-                  if (c & 0x01)
-                    {
-                      bytes[3] |= 0x80;
-                    }
-                  c = 0;                // Clr bit 4 in byte[3]
-                }
-              // <shift>
-              c |= shiftval (&ins->oprs[3]) << 5;
-
-              bytes[3] |= c;
-            }
-
-          // reg,reg,imm
-          if (keep == 7)
-            {
-              int shimm;
-
-              shimm = imm_shift (ins->oprs[2].offset);
-
-              if (shimm == -1)
-                {
-                  errfunc (ERR_NONFATAL, "cannot create that constant");
-                }
-              bytes[3] = shimm & 0xFF;
-              bytes[2] |= (shimm & 0xF00) >> 8;
-            }
-
-          out (offset, segment, bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
-          return;
-
-        case 8:         // MOV Rd,Rm
-        case 9:         // MOV Rd,Rm,<shift>Rs
-        case 0xA:       // MOV Rd,Rm,<shift>imm
-        case 0xB:       // MOV Rd,<shift>imm
-          ++codes;
-#ifdef DEBUG
-          if (rt_debug)
-            {
-              printf ("         decode - '0x%02X'\n", keep);
-              printf ("           code - '0x%02X'\n", (unsigned char) ( *codes));
-            }
-#endif
-          bytes[0] = c | *codes;
-          ++codes;
-
-          bytes[1] = *codes;
-          if (has_S_code)
-            bytes[1] |= 0x10;
-
-          // Rd in high nibble
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-
-          if (keep != 0x0B)
-            {
-              // Rm in low nibble
-              bytes[3] = regval (&ins->oprs[1],1);
-            }
-
-          // Shifts if any
-          if (keep == 0x09 || keep == 0x0A)
-            {
-              // Shift in bytes 2 and 3
-              if (keep == 0x09)
-                {
-                  // Rs
-                  c = regval (&ins->oprs[2],1);
-                  bytes[2] |= c;
-
-                  c = 0x10;             // Set bit 4 in byte[3]
-                }
-              if (keep == 0x0A)
-                {
-                  c = (ins->oprs[2].offset) & 0x1F;
-
-                  // #imm
-                  bytes[2] |= c >> 1;
-                  if (c & 0x01)
-                    {
-                      bytes[3] |= 0x80;
-                    }
-                  c = 0;                // Clr bit 4 in byte[3]
-                }
-              // <shift>
-              c |= shiftval (&ins->oprs[2]) << 5;
-
-              bytes[3] |= c;
-            }
-
-          // reg,imm
-          if (keep == 0x0B)
-            {
-              int shimm;
-
-              shimm = imm_shift (ins->oprs[1].offset);
-
-              if (shimm == -1)
-                {
-                  errfunc (ERR_NONFATAL, "cannot create that constant");
-                }
-              bytes[3] = shimm & 0xFF;
-              bytes[2] |= (shimm & 0xF00) >> 8;
-            }
-
-          out (offset, segment, bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
-          return;
-
-
-        case 0xC:       // CMP Rn,Rm
-        case 0xD:       // CMP Rn,Rm,<shift>Rs
-        case 0xE:       // CMP Rn,Rm,<shift>imm
-        case 0xF:       // CMP Rn,<shift>imm
-          ++codes;
-
-          bytes[0] = c | *codes++;
-
-          bytes[1] = *codes;
-
-          // Implicit S code
-          bytes[1] |= 0x10;
-
-          c = regval (&ins->oprs[0],1);
-          // Rn in low nibble
-          bytes[1] |= c;
-
-          // No destination
-          bytes[2] = 0;
-
-          if (keep != 0x0B)
-            {
-              // Rm in low nibble
-              bytes[3] = regval (&ins->oprs[1],1);
-            }
-
-          // Shifts if any
-          if (keep == 0x0D || keep == 0x0E)
-            {
-              // Shift in bytes 2 and 3
-              if (keep == 0x0D)
-                {
-                  // Rs
-                  c = regval (&ins->oprs[2],1);
-                  bytes[2] |= c;
-
-                  c = 0x10;             // Set bit 4 in byte[3]
-                }
-              if (keep == 0x0E)
-                {
-                  c = (ins->oprs[2].offset) & 0x1F;
-
-                  // #imm
-                  bytes[2] |= c >> 1;
-                  if (c & 0x01)
-                    {
-                      bytes[3] |= 0x80;
-                    }
-                  c = 0;                // Clr bit 4 in byte[3]
-                }
-              // <shift>
-              c |= shiftval (&ins->oprs[2]) << 5;
-
-              bytes[3] |= c;
-            }
-
-          // reg,imm
-          if (keep == 0x0F)
-            {
-              int shimm;
-
-              shimm = imm_shift (ins->oprs[1].offset);
-
-              if (shimm == -1)
-                {
-                  errfunc (ERR_NONFATAL, "cannot create that constant");
-                }
-              bytes[3] = shimm & 0xFF;
-              bytes[2] |= (shimm & 0xF00) >> 8;
-            }
-
-          out (offset, segment, bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
-          return;
-
-        case 0x10:      // MRS Rd,<psr>
-          ++codes;
-
-          bytes[0] = c | *codes++;
-
-          bytes[1] = *codes++;
-
-          // Rd
-          c = regval (&ins->oprs[0],1);
-
-          bytes[2] = c << 4;
-
-          bytes[3] = 0;
-
-          c = ins->oprs[1].basereg;
-
-          if (c == R_CPSR || c == R_SPSR)
-            {
-              if (c == R_SPSR)
-                {
-                  bytes[1] |= 0x40;
-                }
-            }
-          else
-            {
-              errfunc (ERR_NONFATAL, "CPSR or SPSR expected");
-            }
-
-          out (offset, segment, bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
-
-          return;
-
-        case 0x11:      // MSR <psr>,Rm
-        case 0x12:      // MSR <psrf>,Rm
-        case 0x13:      // MSR <psrf>,#expression
-          ++codes;
-
-          bytes[0] = c | *codes++;
-
-          bytes[1] = *codes++;
-
-          bytes[2] = *codes;
-
-
-          if (keep == 0x11 || keep == 0x12)
-            {
-              // Rm
-              c = regval (&ins->oprs[1],1);
-
-              bytes[3] = c;
-            }
-          else
-            {
-              int shimm;
-
-              shimm = imm_shift (ins->oprs[1].offset);
-
-              if (shimm == -1)
-                {
-                  errfunc (ERR_NONFATAL, "cannot create that constant");
-                }
-              bytes[3] = shimm & 0xFF;
-              bytes[2] |= (shimm & 0xF00) >> 8;
-            }
-
-          c = ins->oprs[0].basereg;
-
-          if ( keep == 0x11)
-            {
-              if ( c == R_CPSR || c == R_SPSR)
-                {
-                if ( c== R_SPSR)
-                  {
-                    bytes[1] |= 0x40;
-                  }
-                }
-            else
-              {
-                errfunc (ERR_NONFATAL, "CPSR or SPSR expected");
-              }
-            }
-          else
-            {
-              if ( c == R_CPSR_FLG || c == R_SPSR_FLG)
-                {
-                  if ( c== R_SPSR_FLG)
-                    {
-                      bytes[1] |= 0x40;
-                    }
-                }
-              else
-                {
-                  errfunc (ERR_NONFATAL, "CPSR_flg or SPSR_flg expected");
-                }
-            }
-          break;
-
-        case 0x14:      // MUL  Rd,Rm,Rs
-        case 0x15:      // MULA Rd,Rm,Rs,Rn
-          ++codes;
-
-          bytes[0] = c | *codes++;
-
-          bytes[1] = *codes++;
-
-          bytes[3] = *codes;
-
-          // Rd
-          bytes[1] |= regval (&ins->oprs[0],1);
-          if (has_S_code)
-            bytes[1] |= 0x10;
-
-          // Rm
-          bytes[3] |= regval (&ins->oprs[1],1);
-
-          // Rs
-          bytes[2] = regval (&ins->oprs[2],1);
-
-          if (keep == 0x15)
-            {
-              bytes[2] |= regval (&ins->oprs[3],1) << 4;
-            }
-          break;
-
-        case 0x16:      // SMLAL RdHi,RdLo,Rm,Rs
-          ++codes;
-
-          bytes[0] = c | *codes++;
-
-          bytes[1] = *codes++;
-
-          bytes[3] = *codes;
-
-          // RdHi
-          bytes[1] |= regval (&ins->oprs[1],1);
-          if (has_S_code)
-            bytes[1] |= 0x10;
-
-          // RdLo
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-          // Rm
-          bytes[3] |= regval (&ins->oprs[2],1);
-
-          // Rs
-          bytes[2] |= regval (&ins->oprs[3],1);
-
-          break;
-
-        case 0x17:      // LDR Rd, expression
-          ++codes;
-
-          bytes[0] = c | *codes++;
-
-          bytes[1] = *codes++;
-
-          // Rd
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-          if (has_B_code)
-            bytes[1] |= 0x40;
-          if (has_T_code)
-            {
-              errfunc (ERR_NONFATAL, "'T' not allowed in pre-index mode");
-            }
-          if (has_W_code)
-            {
-              errfunc (ERR_NONFATAL, "'!' not allowed");
-            }
-
-          // Rn - implicit R15
-          bytes[1] |= 0xF;
-
-          if (ins->oprs[1].segment != segment)
-            {
-              errfunc (ERR_NONFATAL, "label not in same segment");
-            }
-
-          data = ins->oprs[1].offset - (offset + 8);
-
-          if (data < 0)
-            {
-              data = -data;
-            }
-          else
-            {
-              bytes[1] |= 0x80;
-            }
-
-          if (data >= 0x1000)
-            {
-              errfunc (ERR_NONFATAL, "too long offset");
-            }
-
-          bytes[2] |= ((data & 0xF00) >> 8);
-          bytes[3] = data & 0xFF;
-          break;
-
-        case 0x18:      // LDR Rd, [Rn]
-          ++codes;
-
-          bytes[0] = c | *codes++;
-
-          bytes[1] = *codes++;
-
-          // Rd
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-          if (has_B_code)
-            bytes[1] |= 0x40;
-          if (has_T_code)
-            {
-              bytes[1] |= 0x20;         // write-back
-            }
-          else
-            {
-              bytes[0] |= 0x01;         // implicit pre-index mode
-            }
-
-          if (has_W_code)
-            {
-              bytes[1] |= 0x20;         // write-back
-            }
-
-          // Rn
-          c = regval (&ins->oprs[1],1);
-          bytes[1] |= c;
-
-          if (c == 0x15)                // R15
-            data = -8;
-          else
-            data = 0;
-
-          if (data < 0)
-            {
-              data = -data;
-            }
-          else
-            {
-              bytes[1] |= 0x80;
-            }
-
-          bytes[2] |= ((data & 0xF00) >> 8);
-          bytes[3] = data & 0xFF;
-          break;
-
-        case 0x19:      // LDR Rd, [Rn,#expression]
-        case 0x20:      // LDR Rd, [Rn,Rm]
-        case 0x21:      // LDR Rd, [Rn,Rm,shift]
-          ++codes;
-
-          bytes[0] = c | *codes++;
-
-          bytes[1] = *codes++;
-
-          // Rd
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-          if (has_B_code)
-            bytes[1] |= 0x40;
-
-          // Rn
-          c = regval (&ins->oprs[1],1);
-          bytes[1] |= c;
-
-          if (ins->oprs[ins->operands-1].bracket)       // FIXME: Bracket on last operand -> pre-index  <--
-            {
-              bytes[0] |= 0x01;         // pre-index mode
-              if (has_W_code)
-                {
-                  bytes[1] |= 0x20;
-                }
-              if (has_T_code)
-                {
-                  errfunc (ERR_NONFATAL, "'T' not allowed in pre-index mode");
-                }
-            }
-          else
-            {
-              if (has_T_code)           // Forced write-back in post-index mode
-                {
-                  bytes[1] |= 0x20;
-                }
-              if (has_W_code)
-                {
-                  errfunc (ERR_NONFATAL, "'!' not allowed in post-index mode");
-                }
-            }
-
-          if (keep == 0x19)
-            {
-              data = ins->oprs[2].offset;
-
-              if (data < 0)
-                {
-                  data = -data;
-                }
-              else
-                {
-                  bytes[1] |= 0x80;
-                }
-
-              if (data >= 0x1000)
-                {
-                  errfunc (ERR_NONFATAL, "too long offset");
-                }
-
-              bytes[2] |= ((data & 0xF00) >> 8);
-              bytes[3] = data & 0xFF;
-            }
-          else
-            {
-              if (ins->oprs[2].minus == 0)
-                {
-                  bytes[1] |= 0x80;
-                }
-              c = regval (&ins->oprs[2],1);
-              bytes[3] = c;
-
-              if (keep == 0x21)
-                {
-                  c = ins->oprs[3].offset;
-                  if (c > 0x1F)
-                    {
-                      errfunc (ERR_NONFATAL, "too large shiftvalue");
-                      c = c & 0x1F;
-                    }
-
-                  bytes[2] |= c >> 1;
-                  if (c & 0x01)
-                    {
-                      bytes[3] |= 0x80;
-                    }
-                  bytes[3] |= shiftval (&ins->oprs[3]) << 5;
-                }
-            }
-
-          break;
-
-        case 0x22:      // LDRH Rd, expression
-          ++codes;
-
-          bytes[0] = c | 0x01;          // Implicit pre-index
-
-          bytes[1] = *codes++;
-
-          // Rd
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-
-          // Rn - implicit R15
-          bytes[1] |= 0xF;
-
-          if (ins->oprs[1].segment != segment)
-            {
-              errfunc (ERR_NONFATAL, "label not in same segment");
-            }
-
-          data = ins->oprs[1].offset - (offset + 8);
-
-          if (data < 0)
-            {
-              data = -data;
-            }
-          else
-            {
-              bytes[1] |= 0x80;
-            }
-
-          if (data >= 0x100)
-            {
-              errfunc (ERR_NONFATAL, "too long offset");
-            }
-          bytes[3] = *codes++;
-
-          bytes[2] |= ((data & 0xF0) >> 4);
-          bytes[3] |= data & 0xF;
-          break;
-
-        case 0x23:      // LDRH Rd, Rn
-          ++codes;
-
-          bytes[0] = c | 0x01;          // Implicit pre-index
-
-          bytes[1] = *codes++;
-
-          // Rd
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-
-          // Rn
-          c = regval (&ins->oprs[1],1);
-          bytes[1] |= c;
-
-          if (c == 0x15)                // R15
-            data = -8;
-          else
-            data = 0;
-
-          if (data < 0)
-            {
-              data = -data;
-            }
-          else
-            {
-              bytes[1] |= 0x80;
-            }
-
-          if (data >= 0x100)
-            {
-              errfunc (ERR_NONFATAL, "too long offset");
-            }
-          bytes[3] = *codes++;
-
-          bytes[2] |= ((data & 0xF0) >> 4);
-          bytes[3] |= data & 0xF;
-          break;
-
-        case 0x24:      // LDRH Rd, Rn, expression
-        case 0x25:      // LDRH Rd, Rn, Rm
-          ++codes;
-
-          bytes[0] = c;
-
-          bytes[1] = *codes++;
-
-          // Rd
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-
-          // Rn
-          c = regval (&ins->oprs[1],1);
-          bytes[1] |= c;
-
-          if (ins->oprs[ins->operands-1].bracket)       // FIXME: Bracket on last operand -> pre-index  <--
-            {
-              bytes[0] |= 0x01;         // pre-index mode
-              if (has_W_code)
-                {
-                  bytes[1] |= 0x20;
-                }
-            }
-          else
-            {
-              if (has_W_code)
-                {
-                  errfunc (ERR_NONFATAL, "'!' not allowed in post-index mode");
-                }
-            }
-
-          bytes[3] = *codes++;
-
-          if (keep == 0x24)
-            {
-              data = ins->oprs[2].offset;
-
-              if (data < 0)
-                {
-                  data = -data;
-                }
-              else
-                {
-                  bytes[1] |= 0x80;
-                }
-
-              if (data >= 0x100)
-                {
-                  errfunc (ERR_NONFATAL, "too long offset");
-                }
-
-              bytes[2] |= ((data & 0xF0) >> 4);
-              bytes[3] |= data & 0xF;
-            }
-          else
-            {
-              if (ins->oprs[2].minus == 0)
-                {
-                  bytes[1] |= 0x80;
-                }
-              c = regval (&ins->oprs[2],1);
-              bytes[3] |= c;
-
-            }
-          break;
-
-        case 0x26:      // LDM/STM Rn, {reg-list}
-          ++codes;
-
-          bytes[0] = c;
-
-          bytes[0] |= ( *codes >> 4) & 0xF;
-          bytes[1] = ( *codes << 4) & 0xF0;
-          ++codes;
-
-          if (has_W_code)
-            {
-              bytes[1] |= 0x20;
-            }
-          if (has_F_code)
-            {
-              bytes[1] |= 0x40;
-            }
-
-          // Rn
-          bytes[1] |= regval (&ins->oprs[0],1);
-
-          data = ins->oprs[1].basereg;
-
-          bytes[2] = ((data >> 8) & 0xFF);
-          bytes[3] = (data & 0xFF);
-
-          break;
-
-        case 0x27:      // SWP Rd, Rm, [Rn]
-          ++codes;
-
-          bytes[0] = c;
-
-          bytes[0] |= *codes++;
-
-          bytes[1] = regval (&ins->oprs[2],1);
-          if (has_B_code)
-            {
-              bytes[1] |= 0x40;
-            }
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-          bytes[3] = *codes++;
-          bytes[3] |= regval (&ins->oprs[1],1);
-          break;
-
-        default:
-          errfunc (ERR_FATAL, "unknown decoding of instruction");
-
-          bytes[0] = c;
-          // And a fix nibble
-          ++codes;
-          bytes[0] |= *codes++;
-
-         if ( *codes == 0x01)           // An I bit
-           {
-
-           }
-         if ( *codes == 0x02)           // An I bit
-           {
-
-           }
-         ++codes;
-      }
-    out (offset, segment, bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
-}
-
-*)
-{$endif dummy}
-
 begin
   cai_align:=tai_align;
 end.

+ 288 - 0
compiler/aarch64/agcpugas.pas

@@ -0,0 +1,288 @@
+{
+    Copyright (c) 2003,2014 by Florian Klaempfl and Jonas Maebe
+
+    This unit implements an asm for AArch64
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+{ This unit implements the GNU Assembler writer for AArch64
+}
+
+unit agcpugas;
+
+{$i fpcdefs.inc}
+
+  interface
+
+    uses
+       globtype,
+       aasmtai,
+       aggas,
+       cpubase,cpuinfo;
+
+    type
+      TAArch64InstrWriter=class(TCPUInstrWriter)
+        procedure WriteInstruction(hp : tai);override;
+      end;
+
+      TAArch64AppleAssembler=class(TAppleGNUassembler)
+        constructor create(smart: boolean); override;
+        function MakeCmdLine: TCmdStr; override;
+      end;
+
+
+    const
+      gas_shiftmode2str : array[tshiftmode] of string[4] = (
+        '','lsl','lsr','asr',
+        'uxtb','uxth','uxtw','uxtx',
+        'sxtb','sxth','sxtw','sxtx');
+
+    const 
+      cputype_to_gas_march : array[tcputype] of string = (
+        '', // cpu_none
+        'armv8'
+      );
+
+  implementation
+
+    uses
+       cutils,globals,verbose,
+       systems,
+       assemble,
+       aasmcpu,
+       itcpugas,
+       cgbase,cgutils;
+
+
+{****************************************************************************}
+{                      Apple AArch64 Assembler writer                        }
+{****************************************************************************}
+
+    constructor TAArch64AppleAssembler.create(smart: boolean);
+      begin
+        inherited create(smart);
+        InstrWriter := TAArch64InstrWriter.create(self);
+      end;
+
+    function TAArch64AppleAssembler.MakeCmdLine: TCmdStr;
+      begin
+        { 'as' calls through to clang for aarch64, and that one only supports
+          reading from standard input in case "-" is specified as input file
+          (in which case you also have to specify the language via -x) }
+        result:=inherited;
+{$ifdef hasunix}
+        if DoPipe then
+          result:=result+' -x assembler -'
+{$endif}
+      end;
+
+
+{****************************************************************************}
+{                  Helper routines for Instruction Writer                    }
+{****************************************************************************}
+
+    function getreferencestring(var ref : treference) : string;
+      const
+        darwin_addrpage2str: array[addr_page..addr_gotpageoffset] of string[11] =
+           ('@PAGE','@PAGEOFF','@GOTPAGE','@GOTPAGEOFF');
+      begin
+        if ref.base=NR_NO then
+          begin
+            case ref.refaddr of
+              addr_gotpage,
+              addr_page,
+              addr_gotpageoffset,
+              addr_pageoffset:
+                begin
+                  if not assigned(ref.symbol) or
+                     (ref.base<>NR_NO) or
+                     (ref.index<>NR_NO) or
+                     (ref.shiftmode<>SM_None) or
+                     (ref.offset<>0) then
+                    internalerror(2014121501);
+                  if target_asm.id=as_darwin then
+                    result:=ref.symbol.name+darwin_addrpage2str[ref.refaddr]
+                  else
+                    { todo }
+                    internalerror(2014121502);
+                end
+              else
+                internalerror(2015022301);
+            end
+          end
+        else
+          begin
+            result:='['+gas_regname(ref.base);
+            if ref.addressmode=AM_POSTINDEXED then
+              result:=result+']';
+            if ref.index<>NR_NO then
+              begin
+                if (ref.offset<>0) or
+                   assigned(ref.symbol) then
+                  internalerror(2014121504);
+                result:=result+', '+gas_regname(ref.index);
+                case ref.shiftmode of
+                  SM_None: ;
+                  SM_LSL,
+                  SM_UXTW, SM_UXTX, SM_SXTW, SM_SXTX:
+                    begin
+                      result:=result+', '+gas_shiftmode2str[ref.shiftmode];
+                      if (ref.shiftmode=SM_LSL) or
+                         (ref.shiftimm<>0) then
+                        result:=result+' #'+tostr(ref.shiftimm);
+                    end
+                  else
+                    internalerror(2014121505);
+                end;
+              end
+            else
+              begin
+                if assigned(ref.symbol) then
+                  begin
+                    case ref.refaddr of
+                      addr_gotpageoffset,
+                      addr_pageoffset:
+                        begin
+                          if target_asm.id=as_darwin then
+                            result:=result+', '+ref.symbol.name+darwin_addrpage2str[ref.refaddr]
+                          else
+                            { todo }
+                            internalerror(2014122510);
+                        end
+                      else
+                        { todo: not yet generated/don't know syntax }
+                        internalerror(2014121506);
+                    end;
+                  end
+                else
+                  begin
+                    if ref.refaddr<>addr_no then
+                      internalerror(2014121506);
+                    if (ref.offset<>0) then
+                      result:=result+', #'+tostr(ref.offset);
+                  end;
+              end;
+            case ref.addressmode of
+              AM_OFFSET:
+                result:=result+']';
+              AM_PREINDEXED:
+                result:=result+']!';
+            end;
+          end;
+      end;
+
+
+    function getopstr(hp: taicpu; opnr: longint; const o: toper): string;
+      begin
+        case o.typ of
+          top_reg:
+            { we cannot yet represent "umov w0, v4.s[0]" or "ins v4.d[0], x1",
+              so for now we use "s4" or "d4" instead -> translate here }
+            if ((hp.opcode=A_INS) or
+                (hp.opcode=A_UMOV)) and
+               (getregtype(hp.oper[opnr]^.reg)=R_MMREGISTER) then
+              begin
+                case getsubreg(hp.oper[opnr]^.reg) of
+                  R_SUBMMS:
+                    getopstr:='v'+tostr(getsupreg(hp.oper[opnr]^.reg))+'.S[0]';
+                  R_SUBMMD:
+                    getopstr:='v'+tostr(getsupreg(hp.oper[opnr]^.reg))+'.D[0]';
+                  else
+                    internalerror(2014122907);
+                end;
+              end
+            else
+              getopstr:=gas_regname(o.reg);
+          top_shifterop:
+            begin
+              getopstr:=gas_shiftmode2str[o.shifterop^.shiftmode];
+              if o.shifterop^.shiftimm<>0 then
+                getopstr:=getopstr+' #'+tostr(o.shifterop^.shiftimm)
+            end;
+          top_const:
+            if o.val>=0 then
+              getopstr:='#'+tostr(o.val)
+            else
+              getopstr:='#0x'+hexStr(o.val,16);
+          top_conditioncode:
+            getopstr:=cond2str[o.cc];
+          top_ref:
+            if is_calljmp(hp.opcode) then
+              begin
+                if o.ref^.refaddr<>addr_full then
+                  internalerror(2014122220);
+                if not assigned(o.ref^.symbol) or
+                   assigned(o.ref^.relsymbol) or
+                   (o.ref^.base<>NR_NO) or
+                   (o.ref^.index<>NR_NO) or
+                   (o.ref^.offset<>0) then
+                  internalerror(2014122221);
+                getopstr:=o.ref^.symbol.name;
+              end
+            else
+              getopstr:=getreferencestring(o.ref^);
+          else
+            internalerror(2014121507);
+        end;
+      end;
+
+
+    procedure TAArch64InstrWriter.WriteInstruction(hp : tai);
+      var
+        op: TAsmOp;
+        s: string;
+        i: byte;
+        sep: string[3];
+      begin
+        op:=taicpu(hp).opcode;
+        s:=#9+gas_op2str[op]+oppostfix2str[taicpu(hp).oppostfix];
+        if taicpu(hp).condition<>C_NONE then
+          s:=s+'.'+cond2str[taicpu(hp).condition];
+        if taicpu(hp).ops<>0 then
+          begin
+            sep:=#9;
+            for i:=0 to taicpu(hp).ops-1 do
+              begin
+                 // debug code
+                 // writeln(s);
+                 // writeln(taicpu(hp).fileinfo.line);
+                 s:=s+sep+getopstr(taicpu(hp),i,taicpu(hp).oper[i]^);
+                 sep:=',';
+              end;
+          end;
+        owner.AsmWriteLn(s);
+      end;
+
+
+    const
+       as_aarch64_gas_darwin_info : tasminfo =
+          (
+            id     : as_darwin;
+            idtxt  : 'AS-Darwin';
+            asmbin : 'as';
+            asmcmd : '-o $OBJ $EXTRAOPT $ASM -arch arm64';
+            supported_targets : [system_aarch64_darwin];
+            flags : [af_needar,af_smartlink_sections,af_supports_dwarf,af_stabs_use_function_absolute_addresses];
+            labelprefix : 'L';
+            comment : '# ';
+            dollarsign: '$';
+          );
+
+
+begin
+  RegisterAssembler(as_aarch64_gas_darwin_info,TAArch64AppleAssembler);
+end.

+ 25 - 7
compiler/aarch64/aoptcpub.pas

@@ -118,17 +118,35 @@ Implementation
     End;
 
 
-  function TAoptBaseCpu.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;
+  function TAoptBaseCpu.RegModifiedByInstruction(reg: tregister; p1: tai): boolean;
     var
-      i : Longint;
+      i: longint;
+      preg: tregister;
     begin
       result:=false;
       for i:=0 to taicpu(p1).ops-1 do
-        if (taicpu(p1).oper[i]^.typ=top_reg) and (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
-          begin
-            result:=true;
-            exit;
-          end;
+        case taicpu(p1).oper[i]^.typ of
+          top_reg:
+            begin
+              preg:=taicpu(p1).oper[i]^.reg;
+              if (getregtype(preg)=getregtype(reg)) and
+                 (getsupreg(preg)=getsupreg(reg)) and
+                 (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
+                begin
+                  result:=true;
+                  exit;
+                end;
+            end;
+          top_ref:
+            begin
+              if (taicpu(p1).oper[i]^.ref^.addressmode<>am_offset) and
+                 (reg=taicpu(p1).oper[i]^.ref^.base) then
+                begin
+                  result:=true;
+                  exit
+                end;
+            end;
+        end;
     end;
 
 End.

+ 2284 - 0
compiler/aarch64/cgcpu.pas

@@ -0,0 +1,2284 @@
+{
+    Copyright (c) 2014 by Jonas Maebe
+
+    This unit implements the code generator for AArch64
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit cgcpu;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+       globtype,parabase,
+       cgbase,cgutils,cgobj,
+       aasmbase,aasmtai,aasmdata,aasmcpu,
+       cpubase,cpuinfo,
+       node,symconst,SymType,symdef,
+       rgcpu;
+
+    type
+      tcgaarch64=class(tcg)
+       protected
+        { simplifies "ref" so it can be used with "op". If "ref" can be used
+          with a different load/Store operation that has the same meaning as the
+          original one, "op" will be replaced with the alternative }
+        procedure make_simple_ref(list:TAsmList; var op: tasmop; size: tcgsize; oppostfix: toppostfix; var ref: treference; preferred_newbasereg: tregister);
+        { changes register size without adding register allocation info }
+        function makeregsize(reg: tregister; size: tcgsize): tregister; overload;
+       public
+        function getfpuregister(list: TAsmList; size: Tcgsize): Tregister; override;
+        procedure handle_reg_imm12_reg(list: TAsmList; op: Tasmop; size: tcgsize; src: tregister; a: tcgint; dst: tregister; tmpreg: tregister; setflags, usedest: boolean);
+        procedure init_register_allocators;override;
+        procedure done_register_allocators;override;
+        function  getmmregister(list:TAsmList;size:tcgsize):tregister;override;
+        function handle_load_store(list:TAsmList; op: tasmop; size: tcgsize; oppostfix: toppostfix; reg: tregister; ref: treference):treference;
+        procedure a_call_name(list:TAsmList;const s:string; weak: boolean);override;
+        procedure a_call_reg(list:TAsmList;Reg:tregister);override;
+        { General purpose instructions }
+        procedure maybeadjustresult(list: TAsmList; op: topcg; size: tcgsize; dst: tregister);
+        procedure a_op_const_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; reg: tregister);override;
+        procedure a_op_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src, dst: tregister);override;
+        procedure a_op_const_reg_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister);override;
+        procedure a_op_reg_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister);override;
+        procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister; setflags : boolean; var ovloc : tlocation);override;
+        procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister; setflags : boolean; var ovloc : tlocation);override;
+        { move instructions }
+        procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg: tregister);override;
+        procedure a_load_const_ref(list: TAsmList; size: tcgsize; a: tcgint; const ref: treference); override;
+        procedure a_load_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister;const ref: TReference);override;
+        procedure a_load_reg_ref_unaligned(list: TAsmList; fromsize, tosize: tcgsize; register: tregister; const ref: treference); override;
+        procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: TReference; reg: tregister);override;
+        procedure a_load_ref_reg_unaligned(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; register: tregister); override;
+        procedure a_load_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister);override;
+        procedure a_loadaddr_ref_reg(list: TAsmList; const ref: TReference; r: tregister);override;
+        { fpu move instructions (not used, all floating point is vector unit-based) }
+        procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
+        procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
+        procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
+        procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister;shuffle : pmmshuffle);override;
+        procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: TReference; reg: tregister; shuffle: pmmshuffle);override;
+        procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: TReference; shuffle: pmmshuffle);override;
+
+        procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle); override;
+        procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle); override;
+
+        procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle); override;
+
+        procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); override;
+        { comparison operations }
+        procedure a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel);override;
+        procedure a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel);override;
+        procedure a_jmp_always(list: TAsmList; l: TAsmLabel);override;
+        procedure a_jmp_name(list: TAsmList; const s: string);override;
+        procedure a_jmp_cond(list: TAsmList; cond: TOpCmp; l: tasmlabel);{ override;}
+        procedure a_jmp_flags(list: TAsmList; const f: tresflags; l: tasmlabel);override;
+        procedure g_flags2reg(list: TAsmList; size: tcgsize; const f:tresflags; reg: tregister);override;
+        procedure g_overflowcheck(list: TAsmList; const loc: tlocation; def: tdef);override;
+        procedure g_overflowcheck_loc(list: TAsmList; const loc: tlocation; def: tdef; ovloc: tlocation);override;
+        procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);override;
+        procedure g_proc_exit(list: TAsmList; parasize: longint; nostackframe: boolean);override;
+        procedure g_maybe_got_init(list: TAsmList); override;
+        procedure g_restore_registers(list: TAsmList);override;
+        procedure g_save_registers(list: TAsmList);override;
+        procedure g_concatcopy_move(list: TAsmList; const source, dest: treference; len: tcgint);
+        procedure g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);override;
+        procedure g_adjust_self_value(list: TAsmList; procdef: tprocdef; ioffset: tcgint);override;
+        procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);override;
+       private
+        function save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
+        procedure load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
+      end;
+
+    procedure create_codegen;
+
+    const
+      TOpCG2AsmOpReg: array[topcg] of TAsmOp = (
+        A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NEG,A_MVN,A_ORR,A_ASRV,A_LSLV,A_LSRV,A_SUB,A_EOR,A_NONE,A_RORV
+      );
+      TOpCG2AsmOpImm: array[topcg] of TAsmOp = (
+        A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NEG,A_MVN,A_ORR,A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR
+      );
+      TOpCmp2AsmCond: array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
+        C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI
+      );
+
+
+implementation
+
+  uses
+    globals,verbose,systems,cutils,
+    paramgr,fmodule,
+    symtable,symsym,
+    tgobj,
+    procinfo,cpupi;
+
+
+    procedure tcgaarch64.make_simple_ref(list:TAsmList; var op: tasmop; size: tcgsize; oppostfix: toppostfix; var ref: treference; preferred_newbasereg: tregister);
+      var
+        href: treference;
+        so: tshifterop;
+        accesssize: longint;
+      begin
+        if (ref.base=NR_NO) then
+          begin
+            if ref.shiftmode<>SM_None then
+              internalerror(2014110701);
+            ref.base:=ref.index;
+            ref.index:=NR_NO;
+          end;
+        { no abitrary scale factor support (the generic code doesn't set it,
+          AArch-specific code shouldn't either) }
+        if not(ref.scalefactor in [0,1]) then
+          internalerror(2014111002);
+
+        case simple_ref_type(op,size,oppostfix,ref) of
+          sr_simple:
+            exit;
+          sr_internal_illegal:
+            internalerror(2014121702);
+          sr_complex:
+            { continue } ;
+        end;
+
+        if assigned(ref.symbol) then
+          begin
+            { internal "load symbol" instructions should already be valid }
+            if assigned(ref.symboldata) or
+               (ref.refaddr in [addr_pic,addr_gotpage,addr_gotpageoffset,addr_page,addr_pageoffset]) then
+              internalerror(2014110802);
+            { no relative symbol support (needed) yet }
+            if assigned(ref.relsymbol) then
+              internalerror(2014111001);
+            { on Darwin: load the address from the GOT. There does not appear to
+              be a non-GOT variant. This consists of first loading the address
+              of the page containing the GOT entry for this variable, and then
+              the address of the entry itself from that page (can be relaxed by
+              the linker in case the variable itself can be stored directly in
+              the GOT) }
+            if target_info.system in systems_darwin then
+              begin
+                if (preferred_newbasereg=NR_NO) or
+                   (ref.base=preferred_newbasereg) or
+                   (ref.index=preferred_newbasereg) then
+                  preferred_newbasereg:=getaddressregister(list);
+                { load the (GOT) page }
+                reference_reset_symbol(href,ref.symbol,0,8);
+                if ((ref.symbol.typ in [AT_FUNCTION,AT_LABEL]) and
+                    (ref.symbol.bind in [AB_LOCAL,AB_GLOBAL])) or
+                   ((ref.symbol.typ=AT_DATA) and
+                    (ref.symbol.bind=AB_LOCAL)) then
+                  href.refaddr:=addr_page
+                else
+                  href.refaddr:=addr_gotpage;
+                list.concat(taicpu.op_reg_ref(A_ADRP,preferred_newbasereg,href));
+                { load the GOT entry (= address of the variable) }
+                reference_reset_base(href,preferred_newbasereg,0,sizeof(pint));
+                href.symbol:=ref.symbol;
+                { code symbols defined in the current compilation unit do not
+                  have to be accessed via the GOT }
+                if ((ref.symbol.typ in [AT_FUNCTION,AT_LABEL]) and
+                    (ref.symbol.bind in [AB_LOCAL,AB_GLOBAL])) or
+                   ((ref.symbol.typ=AT_DATA) and
+                    (ref.symbol.bind=AB_LOCAL)) then
+                  begin
+                    href.base:=NR_NO;
+                    href.refaddr:=addr_pageoffset;
+                    list.concat(taicpu.op_reg_reg_ref(A_ADD,preferred_newbasereg,preferred_newbasereg,href));
+                  end
+                else
+                  begin
+                    href.refaddr:=addr_gotpageoffset;
+                    { use a_load_ref_reg() rather than directly encoding the LDR,
+                      so that we'll check the validity of the reference }
+                    a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,preferred_newbasereg);
+                  end;
+                { set as new base register }
+                if ref.base=NR_NO then
+                  ref.base:=preferred_newbasereg
+                else if ref.index=NR_NO then
+                  ref.index:=preferred_newbasereg
+                else
+                  begin
+                    { make sure it's valid in case ref.base is SP -> make it
+                      the second operand}
+                    a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,preferred_newbasereg,ref.base,preferred_newbasereg);
+                    ref.base:=preferred_newbasereg
+                  end;
+                ref.symbol:=nil;
+              end
+            else
+              { todo }
+              internalerror(2014111003);
+          end;
+
+        { base & index }
+        if (ref.base<>NR_NO) and
+           (ref.index<>NR_NO) then
+          begin
+            case op of
+              A_LDR, A_STR:
+                begin
+                  if (ref.shiftmode=SM_None) and
+                     (ref.shiftimm<>0) then
+                    internalerror(2014110805);
+                  { wrong shift? (possible in case of something like
+                     array_of_2byte_rec[x].bytefield -> shift will be set 1, but
+                     the final load is a 1 byte -> can't use shift after all }
+                  if (ref.shiftmode in [SM_LSL,SM_UXTW,SM_SXTW]) and
+                     ((ref.shiftimm<>BsfDWord(tcgsizep2size[size])) or
+                      (ref.offset<>0)) then
+                    begin
+                      if preferred_newbasereg=NR_NO then
+                        preferred_newbasereg:=getaddressregister(list);
+                      { "add" supports a superset of the shift modes supported by
+                        load/store instructions }
+                      shifterop_reset(so);
+                      so.shiftmode:=ref.shiftmode;
+                      so.shiftimm:=ref.shiftimm;
+                      list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,preferred_newbasereg,ref.base,ref.index,so));
+                      reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.alignment);
+                      { possibly still an invalid offset -> fall through }
+                    end
+                  else if ref.offset<>0 then
+                    begin
+                      if (preferred_newbasereg=NR_NO) or
+                         { we keep ref.index, so it must not be overwritten }
+                         (ref.index=preferred_newbasereg) then
+                        preferred_newbasereg:=getaddressregister(list);
+                      { add to the base and not to the index, because the index
+                        may be scaled; this works even if the base is SP }
+                      a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
+                      ref.offset:=0;
+                      ref.base:=preferred_newbasereg;
+                      { finished }
+                      exit;
+                    end
+                  else
+                    { valid -> exit }
+                    exit;
+                end;
+              { todo }
+              A_LD1,A_LD2,A_LD3,A_LD4,
+              A_ST1,A_ST2,A_ST3,A_ST4:
+                internalerror(2014110704);
+              { these don't support base+index }
+              A_LDUR,A_STUR,
+              A_LDP,A_STP:
+                begin
+                  { these either don't support pre-/post-indexing, or don't
+                    support it with base+index }
+                  if ref.addressmode<>AM_OFFSET then
+                    internalerror(2014110911);
+                  if preferred_newbasereg=NR_NO then
+                    preferred_newbasereg:=getaddressregister(list);
+                  if ref.shiftmode<>SM_None then
+                    begin
+                      { "add" supports a superset of the shift modes supported by
+                        load/store instructions }
+                      shifterop_reset(so);
+                      so.shiftmode:=ref.shiftmode;
+                      so.shiftimm:=ref.shiftimm;
+                      list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,preferred_newbasereg,ref.base,ref.index,so));
+                    end
+                  else
+                    a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,ref.index,ref.base,preferred_newbasereg);
+                  reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.alignment);
+                  { fall through to the handling of base + offset, since the
+                    offset may still be too big }
+                end;
+              else
+                internalerror(2014110901);
+            end;
+          end;
+
+        { base + offset }
+        if ref.base<>NR_NO then
+          begin
+            { valid offset for LDUR/STUR -> use that }
+            if (ref.addressmode=AM_OFFSET) and
+               (op in [A_LDR,A_STR]) and
+               (ref.offset>=-256) and
+               (ref.offset<=255) then
+              begin
+                if op=A_LDR then
+                  op:=A_LDUR
+                else
+                  op:=A_STUR
+              end
+            { if it's not a valid LDUR/STUR, use LDR/STR }
+            else if (op in [A_LDUR,A_STUR]) and
+               ((ref.offset<-256) or
+                (ref.offset>255) or
+                (ref.addressmode<>AM_OFFSET)) then
+              begin
+                if op=A_LDUR then
+                  op:=A_LDR
+                else
+                  op:=A_STR
+              end;
+            case op of
+              A_LDR,A_STR:
+                begin
+                  case ref.addressmode of
+                    AM_PREINDEXED:
+                      begin
+                        { since the loaded/stored register cannot be the same
+                          as the base register, we can safely add the
+                          offset to the base if it doesn't fit}
+                        if (ref.offset<-256) or
+                            (ref.offset>255) then
+                          begin
+                            a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base);
+                            ref.offset:=0;
+                          end;
+                      end;
+                    AM_POSTINDEXED:
+                      begin
+                        { cannot emulate post-indexing if we have to fold the
+                          offset into the base register }
+                        if (ref.offset<-256) or
+                            (ref.offset>255) then
+                          internalerror(2014110909);
+                        { ok }
+                      end;
+                    AM_OFFSET:
+                      begin
+                        { unsupported offset -> fold into base register }
+                        accesssize:=1 shl tcgsizep2size[size];
+                        if (ref.offset<0) or
+                           (ref.offset>(((1 shl 12)-1)*accesssize)) or
+                           ((ref.offset mod accesssize)<>0) then
+                          begin
+                            if preferred_newbasereg=NR_NO then
+                              preferred_newbasereg:=getaddressregister(list);
+                            { can we split the offset beween an
+                              "add/sub (imm12 shl 12)" and the load (also an
+                              imm12)?
+                              -- the offset from the load will always be added,
+                                that's why the lower bound has a smaller range
+                                than the upper bound; it must also be a multiple
+                                of the access size }
+                            if (ref.offset>=-(((1 shl 12)-1) shl 12)) and
+                               (ref.offset<=((1 shl 12)-1) shl 12 + ((1 shl 12)-1)) and
+                               ((ref.offset mod accesssize)=0) then
+                              begin
+                                a_op_const_reg_reg(list,OP_ADD,OS_ADDR,(ref.offset shr 12) shl 12,ref.base,preferred_newbasereg);
+                                ref.offset:=ref.offset-(ref.offset shr 12) shl 12;
+                              end
+                            else
+                              begin
+                                a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
+                                ref.offset:=0;
+                              end;
+                            reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.alignment);
+                          end;
+                      end
+                    else
+                      internalerror(2014110904);
+                  end;
+                end;
+              A_LDP,A_STP:
+                begin
+                  { unsupported offset -> fold into base register (these
+                    instructions support all addressmodes) }
+                  if (ref.offset<-(1 shl (6+tcgsizep2size[size]))) or
+                     (ref.offset>(1 shl (6+tcgsizep2size[size]))-1) then
+                    begin
+                      case ref.addressmode of
+                        AM_POSTINDEXED:
+                          { don't emulate post-indexing if we have to fold the
+                            offset into the base register }
+                          internalerror(2014110910);
+                        AM_PREINDEXED:
+                          { this means the offset must be added to the current
+                            base register }
+                          preferred_newbasereg:=ref.base;
+                        AM_OFFSET:
+                          if preferred_newbasereg=NR_NO then
+                            preferred_newbasereg:=getaddressregister(list);
+                      end;
+                      a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
+                      reference_reset_base(ref,preferred_newbasereg,0,ref.alignment);
+                    end
+                end;
+              A_LDUR,A_STUR:
+                begin
+                  { valid, checked above }
+                end;
+              { todo }
+              A_LD1,A_LD2,A_LD3,A_LD4,
+              A_ST1,A_ST2,A_ST3,A_ST4:
+                internalerror(2014110908);
+              else
+                internalerror(2014110708);
+            end;
+            { done }
+            exit;
+          end;
+
+        { only an offset -> change to base (+ offset 0) }
+        if preferred_newbasereg=NR_NO then
+          preferred_newbasereg:=getaddressregister(list);
+        a_load_const_reg(list,OS_ADDR,ref.offset,preferred_newbasereg);
+        reference_reset_base(ref,preferred_newbasereg,0,newalignment(8,ref.offset));
+      end;
+
+
+    function tcgaarch64.makeregsize(reg: tregister; size: tcgsize): tregister;
+      var
+        subreg:Tsubregister;
+      begin
+        subreg:=cgsize2subreg(getregtype(reg),size);
+        result:=reg;
+        setsubreg(result,subreg);
+      end;
+
+
+    function tcgaarch64.getfpuregister(list: TAsmList; size: Tcgsize): Tregister;
+      begin
+        internalerror(2014122110);
+        { squash warning }
+        result:=NR_NO;
+      end;
+
+
+    function tcgaarch64.handle_load_store(list: TAsmList; op: tasmop; size: tcgsize; oppostfix: toppostfix; reg: tregister; ref: treference):treference;
+      begin
+        make_simple_ref(list,op,size,oppostfix,ref,NR_NO);
+        list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
+        result:=ref;
+      end;
+
+
+    procedure tcgaarch64.handle_reg_imm12_reg(list: TAsmList; op: Tasmop; size: tcgsize; src: tregister; a: tcgint; dst: tregister; tmpreg: tregister; setflags, usedest: boolean);
+      var
+        instr: taicpu;
+        so: tshifterop;
+        hadtmpreg: boolean;
+      begin
+        { imm12 }
+        if (a>=0) and
+           (a<=((1 shl 12)-1)) then
+          if usedest then
+            instr:=taicpu.op_reg_reg_const(op,dst,src,a)
+          else
+            instr:=taicpu.op_reg_const(op,src,a)
+        { imm12 lsl 12 }
+        else if (a and not(((tcgint(1) shl 12)-1) shl 12))=0 then
+          begin
+            so.shiftmode:=SM_LSL;
+            so.shiftimm:=12;
+            if usedest then
+              instr:=taicpu.op_reg_reg_const_shifterop(op,dst,src,a shr 12,so)
+            else
+              instr:=taicpu.op_reg_const_shifterop(op,src,a shr 12,so)
+          end
+        else
+          begin
+            { todo: other possible optimizations (e.g. load 16 bit constant in
+                register and then add/sub/cmp/cmn shifted the rest) }
+            if tmpreg=NR_NO then
+              begin
+                hadtmpreg:=false;
+                tmpreg:=getintregister(list,size);
+              end
+            else
+              begin
+                hadtmpreg:=true;
+                getcpuregister(list,tmpreg);
+              end;
+            a_load_const_reg(list,size,a,tmpreg);
+            if usedest then
+              instr:=taicpu.op_reg_reg_reg(op,dst,src,tmpreg)
+            else
+              instr:=taicpu.op_reg_reg(op,src,tmpreg);
+            if hadtmpreg then
+              ungetcpuregister(list,tmpreg);
+          end;
+        if setflags then
+          setoppostfix(instr,PF_S);
+        list.concat(instr);
+      end;
+
+
+{****************************************************************************
+                              Assembler code
+****************************************************************************}
+
+    procedure tcgaarch64.init_register_allocators;
+      begin
+        inherited init_register_allocators;
+
+        rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
+            [RS_X0,RS_X1,RS_X2,RS_X3,RS_X4,RS_X5,RS_X6,RS_X7,RS_X8,
+             RS_X9,RS_X10,RS_X11,RS_X12,RS_X13,RS_X14,RS_X15,RS_X16,RS_X17,
+             RS_X19,RS_X20,RS_X21,RS_X22,RS_X23,RS_X24,RS_X25,RS_X26,RS_X27,RS_X28
+             { maybe we can enable this in the future for leaf functions (it's
+               the frame pointer)
+             ,RS_X29 }],
+            first_int_imreg,[]);
+
+        rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBMMD,
+            [RS_Q0,RS_Q1,RS_Q2,RS_Q3,RS_Q4,RS_Q5,RS_Q6,RS_Q7,
+             RS_Q8,RS_Q9,RS_Q10,RS_Q11,RS_Q12,RS_Q13,RS_Q14,RS_Q15,
+             RS_Q16,RS_Q17,RS_Q18,RS_Q19,RS_Q20,RS_Q21,RS_Q22,RS_Q23,
+             RS_Q24,RS_Q25,RS_Q26,RS_Q27,RS_Q28,RS_Q29,RS_Q30,RS_Q31],
+            first_mm_imreg,[]);
+      end;
+
+
+    procedure tcgaarch64.done_register_allocators;
+      begin
+        rg[R_INTREGISTER].free;
+        rg[R_FPUREGISTER].free;
+        rg[R_MMREGISTER].free;
+        inherited done_register_allocators;
+      end;
+
+
+    function tcgaarch64.getmmregister(list: TAsmList; size: tcgsize):tregister;
+      begin
+        case size of
+          OS_F32:
+            result:=rg[R_MMREGISTER].getregister(list,R_SUBMMS);
+          OS_F64:
+            result:=rg[R_MMREGISTER].getregister(list,R_SUBMMD)
+          else
+            internalerror(2014102701);
+        end;
+      end;
+
+
+    procedure tcgaarch64.a_call_name(list: TAsmList; const s: string; weak: boolean);
+      begin
+        if not weak then
+          list.concat(taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(s)))
+        else
+          list.concat(taicpu.op_sym(A_BL,current_asmdata.WeakRefAsmSymbol(s)));
+      end;
+
+
+    procedure tcgaarch64.a_call_reg(list:TAsmList;Reg:tregister);
+      begin
+        list.concat(taicpu.op_reg(A_BLR,reg));
+      end;
+
+
+    {********************** load instructions ********************}
+
+    procedure tcgaarch64.a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg : tregister);
+      var
+        preva: tcgint;
+        opc: tasmop;
+        shift,maxshift: byte;
+        so: tshifterop;
+        reginited: boolean;
+        mask: tcgint;
+      begin
+        { if we load a value into a 32 bit register, it is automatically
+          zero-extended to 64 bit }
+        if (high(a)=0) and
+           (size in [OS_64,OS_S64]) then
+          begin
+            size:=OS_32;
+            reg:=makeregsize(reg,size);
+          end;
+        { values <= 32 bit are stored in a 32 bit register }
+        if not(size in [OS_64,OS_S64]) then
+          a:=cardinal(a);
+
+        if size in [OS_64,OS_S64] then
+          begin
+            mask:=-1;
+            maxshift:=64;
+          end
+        else
+          begin
+            mask:=$ffffffff;
+            maxshift:=32;
+          end;
+        { single movn enough? (to be extended) }
+        shift:=16;
+        preva:=a;
+        repeat
+          if (a shr shift)=(mask shr shift) then
+            begin
+              if shift=16 then
+                list.concat(taicpu.op_reg_const(A_MOVN,reg,not(word(preva))))
+              else
+                begin
+                  shifterop_reset(so);
+                  so.shiftmode:=SM_LSL;
+                  so.shiftimm:=shift-16;
+                  list.concat(taicpu.op_reg_const_shifterop(A_MOVN,reg,not(word(preva)),so));
+                end;
+              exit;
+            end;
+          { only try the next 16 bits if the current one is all 1 bits, since
+            the movn will set all lower bits to 1 }
+          if word(a shr (shift-16))<>$ffff then
+            break;
+          inc(shift,16);
+        until shift=maxshift;
+        reginited:=false;
+        shift:=0;
+        { can be optimized later to use more movn }
+        repeat
+          { leftover is shifterconst? (don't check if we can represent it just
+            as effectively with movz/movk, as this check is expensive) }
+          if ((shift<tcgsize2size[size]*(8 div 2)) and
+              (word(a)<>0) and
+              ((a shr 16)<>0)) and
+             is_shifter_const(a shl shift,size) then
+            begin
+              if reginited then
+                list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,a shl shift))
+              else
+                list.concat(taicpu.op_reg_reg_const(A_ORR,reg,makeregsize(NR_XZR,size),a shl shift));
+              exit;
+            end;
+          { set all 16 bit parts <> 0 }
+          if (word(a)<>0) or
+             ((shift=0) and
+              (a=0)) then
+            if shift=0 then
+              begin
+                list.concat(taicpu.op_reg_const(A_MOVZ,reg,word(a)));
+                reginited:=true;
+              end
+            else
+              begin
+                shifterop_reset(so);
+                so.shiftmode:=SM_LSL;
+                so.shiftimm:=shift;
+                if not reginited then
+                  begin
+                    opc:=A_MOVZ;
+                    reginited:=true;
+                  end
+                else
+                  opc:=A_MOVK;
+                list.concat(taicpu.op_reg_const_shifterop(opc,reg,word(a),so));
+              end;
+            preva:=a;
+            a:=a shr 16;
+           inc(shift,16);
+        until word(preva)=preva;
+        if not reginited then
+          internalerror(2014102702);
+      end;
+
+
+    procedure tcgaarch64.a_load_const_ref(list: TAsmList; size: tcgsize; a: tcgint; const ref: treference);
+      var
+        reg: tregister;
+      begin
+        { use the zero register if possible }
+        if a=0 then
+          begin
+            if size in [OS_64,OS_S64] then
+              reg:=NR_XZR
+            else
+              reg:=NR_WZR;
+            a_load_reg_ref(list,size,size,reg,ref);
+          end
+        else
+          inherited;
+      end;
+
+
+    procedure tcgaarch64.a_load_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
+      var
+        oppostfix:toppostfix;
+        hreg: tregister;
+      begin
+        if tcgsize2Size[fromsize]>=tcgsize2Size[tosize] then
+          fromsize:=tosize
+        { have a 32 bit register but need a 64 bit one? }
+        else if tosize in [OS_64,OS_S64] then
+          begin
+            { sign extend if necessary }
+            if fromsize in [OS_S8,OS_S16,OS_S32] then
+              begin
+                { can't overwrite reg, may be a constant reg }
+                hreg:=getintregister(list,tosize);
+                a_load_reg_reg(list,fromsize,tosize,reg,hreg);
+                reg:=hreg;
+              end
+            else
+              { top 32 bit are zero by default }
+              reg:=makeregsize(reg,OS_64);
+            fromsize:=tosize;
+          end;
+        if (ref.alignment<>0) and
+           (ref.alignment<tcgsize2size[tosize]) then
+          begin
+            a_load_reg_ref_unaligned(list,fromsize,tosize,reg,ref);
+          end
+        else
+          begin
+            case tosize of
+              { signed integer registers }
+              OS_8,
+              OS_S8:
+                oppostfix:=PF_B;
+              OS_16,
+              OS_S16:
+                oppostfix:=PF_H;
+              OS_32,
+              OS_S32,
+              OS_64,
+              OS_S64:
+                oppostfix:=PF_None;
+              else
+                InternalError(200308299);
+            end;
+            handle_load_store(list,A_STR,tosize,oppostfix,reg,ref);
+          end;
+      end;
+
+
+    procedure tcgaarch64.a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister);
+      var
+        oppostfix:toppostfix;
+      begin
+        if tcgsize2Size[fromsize]>=tcgsize2Size[tosize] then
+          fromsize:=tosize;
+        { ensure that all bits of the 32/64 register are always correctly set:
+           * default behaviour is always to zero-extend to the entire (64 bit)
+             register -> unsigned 8/16/32 bit loads only exist with a 32 bit
+             target register, as the upper 32 bit will be zeroed implicitly
+             -> always make target register 32 bit
+           * signed loads exist both with 32 and 64 bit target registers,
+             depending on whether the value should be sign extended to 32 or
+             to 64 bit (if sign extended to 32 bit, the upper 32 bits of the
+             corresponding 64 bit register are again zeroed) -> no need to
+             change anything (we only have 32 and 64 bit registers), except that
+             when loading an OS_S32 to a 32 bit register, we don't need/can't
+             use sign extension
+        }
+        if fromsize in [OS_8,OS_16,OS_32] then
+          reg:=makeregsize(reg,OS_32);
+        if (ref.alignment<>0) and
+           (ref.alignment<tcgsize2size[fromsize]) then
+          begin
+            a_load_ref_reg_unaligned(list,fromsize,tosize,ref,reg);
+            exit;
+          end;
+        case fromsize of
+          { signed integer registers }
+          OS_8:
+            oppostfix:=PF_B;
+          OS_S8:
+            oppostfix:=PF_SB;
+          OS_16:
+            oppostfix:=PF_H;
+          OS_S16:
+            oppostfix:=PF_SH;
+          OS_S32:
+            if getsubreg(reg)=R_SUBD then
+              oppostfix:=PF_NONE
+            else
+              oppostfix:=PF_SW;
+          OS_32,
+          OS_64,
+          OS_S64:
+            oppostfix:=PF_None;
+          else
+            InternalError(200308297);
+        end;
+        handle_load_store(list,A_LDR,fromsize,oppostfix,reg,ref);
+
+        { clear upper 16 bits if the value was negative }
+        if (fromsize=OS_S8) and (tosize=OS_16) then
+          a_load_reg_reg(list,fromsize,tosize,reg,reg);
+      end;
+
+
+    procedure tcgaarch64.a_load_ref_reg_unaligned(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; register: tregister);
+      var
+        href: treference;
+        hreg1, hreg2, tmpreg: tregister;
+      begin
+        if fromsize in [OS_64,OS_S64] then
+          begin
+            { split into two 32 bit loads }
+            hreg1:=makeregsize(register,OS_32);
+            hreg2:=getintregister(list,OS_32);
+            if target_info.endian=endian_big then
+              begin
+                tmpreg:=hreg1;
+                hreg1:=hreg2;
+                hreg2:=tmpreg;
+              end;
+            { can we use LDP? }
+            if (ref.alignment=4) and
+               (simple_ref_type(A_LDP,OS_32,PF_None,ref)=sr_simple) then
+              list.concat(taicpu.op_reg_reg_ref(A_LDP,hreg1,hreg2,ref))
+            else
+              begin
+                a_load_ref_reg(list,OS_32,OS_32,ref,hreg1);
+                href:=ref;
+                inc(href.offset,4);
+                a_load_ref_reg(list,OS_32,OS_32,href,hreg2);
+              end;
+            list.concat(taicpu.op_reg_reg_const_const(A_BFI,register,makeregsize(hreg2,OS_64),32,32));
+          end
+       else
+         inherited;
+      end;
+
+
+    procedure tcgaarch64.a_load_reg_reg(list:TAsmList;fromsize,tosize:tcgsize;reg1,reg2:tregister);
+      var
+        instr: taicpu;
+      begin
+        { we use both 32 and 64 bit registers -> insert conversion when when
+          we have to truncate/sign extend inside the (32 or 64 bit) register
+          holding the value, and when we sign extend from a 32 to a 64 bit
+          register }
+        if (tcgsize2size[fromsize]>tcgsize2size[tosize]) or
+           ((tcgsize2size[fromsize]=tcgsize2size[tosize]) and
+            (fromsize<>tosize) and
+            not(fromsize in [OS_32,OS_S32,OS_64,OS_S64])) or
+           ((fromsize in [OS_S8,OS_S16,OS_S32]) and
+            (tosize in [OS_64,OS_S64])) or
+           { needs to mask out the sign in the top 16 bits }
+           ((fromsize=OS_S8) and
+            (tosize=OS_16)) then
+          begin
+            case tosize of
+              OS_8:
+                list.concat(setoppostfix(taicpu.op_reg_reg(A_UXT,reg2,makeregsize(reg1,OS_32)),PF_B));
+              OS_16:
+                list.concat(setoppostfix(taicpu.op_reg_reg(A_UXT,reg2,makeregsize(reg1,OS_32)),PF_H));
+              OS_S8:
+                list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_B));
+              OS_S16:
+                list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_H));
+              { while "mov wN, wM" automatically inserts a zero-extension and
+                hence we could encode a 64->32 bit move like that, the problem
+                is that we then can't distinguish 64->32 from 32->32 moves, and
+                the 64->32 truncation could be removed altogether... So use a
+                different instruction }
+              OS_32,
+              OS_S32:
+                { in theory, reg1 should be 64 bit here (since fromsize>tosize),
+                  but because of the way location_force_register() tries to
+                  avoid superfluous zero/sign extensions, it's not always the
+                  case -> also force reg1 to to 64 bit }
+                list.concat(taicpu.op_reg_reg_const_const(A_UBFIZ,makeregsize(reg2,OS_64),makeregsize(reg1,OS_64),0,32));
+              OS_64,
+              OS_S64:
+                list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_W));
+              else
+                internalerror(2002090901);
+            end;
+          end
+        else
+          begin
+            { 32 -> 32 bit move implies zero extension (sign extensions have
+              been handled above) -> also use for 32 <-> 64 bit moves }
+            if not(fromsize in [OS_64,OS_S64]) or
+               not(tosize in [OS_64,OS_S64]) then
+              instr:=taicpu.op_reg_reg(A_MOV,makeregsize(reg2,OS_32),makeregsize(reg1,OS_32))
+            else
+              instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
+            list.Concat(instr);
+            { Notify the register allocator that we have written a move instruction so
+             it can try to eliminate it. }
+            add_move_instruction(instr);
+          end;
+      end;
+
+
+    procedure tcgaarch64.a_loadaddr_ref_reg(list: TAsmList; const ref: treference; r: tregister);
+      var
+         href: treference;
+         so: tshifterop;
+         op: tasmop;
+      begin
+        op:=A_LDR;
+        href:=ref;
+        { simplify as if we're going to perform a regular 64 bit load, using
+          "r" as the new base register if possible/necessary }
+        make_simple_ref(list,op,OS_ADDR,PF_None,href,r);
+        { load literal? }
+        if assigned(href.symbol) then
+          begin
+            if (href.base<>NR_NO) or
+               (href.index<>NR_NO) or
+               not assigned(href.symboldata) then
+              internalerror(2014110912);
+            list.concat(taicpu.op_reg_sym_ofs(A_ADR,r,href.symbol,href.offset));
+          end
+        else
+          begin
+            if href.index<>NR_NO then
+              begin
+                if href.shiftmode<>SM_None then
+                  begin
+                    { "add" supports a supperset of the shift modes supported by
+                      load/store instructions }
+                    shifterop_reset(so);
+                    so.shiftmode:=href.shiftmode;
+                    so.shiftimm:=href.shiftimm;
+                    list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,r,href.base,href.index,so));
+                  end
+                else
+                  a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,href.index,href.base,r);
+              end
+            else if href.offset<>0 then
+              a_op_const_reg_reg(list,OP_ADD,OS_ADDR,href.offset,href.base,r)
+            else
+              a_load_reg_reg(list,OS_ADDR,OS_ADDR,href.base,r);
+          end;
+      end;
+
+
+    procedure tcgaarch64.a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister);
+      begin
+        internalerror(2014122107)
+      end;
+
+
+    procedure tcgaarch64.a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister);
+      begin
+        internalerror(2014122108)
+      end;
+
+
+    procedure tcgaarch64.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
+      begin
+        internalerror(2014122109)
+      end;
+
+
+    procedure tcgaarch64.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
+      var
+        instr: taicpu;
+      begin
+        if assigned(shuffle) and
+           not shufflescalar(shuffle) then
+          internalerror(2014122104);
+        if fromsize=tosize then
+          begin
+            instr:=taicpu.op_reg_reg(A_FMOV,reg2,reg1);
+            { Notify the register allocator that we have written a move
+              instruction so it can try to eliminate it. }
+            add_move_instruction(instr);
+          end
+        else
+          begin
+            if (reg_cgsize(reg1)<>fromsize) or
+               (reg_cgsize(reg2)<>tosize) then
+              internalerror(2014110913);
+            instr:=taicpu.op_reg_reg(A_FCVT,reg2,reg1);
+          end;
+        list.Concat(instr);
+      end;
+
+
+    procedure tcgaarch64.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
+       var
+         tmpreg: tregister;
+       begin
+         if assigned(shuffle) and
+            not shufflescalar(shuffle) then
+           internalerror(2014122105);
+         tmpreg:=NR_NO;
+         if (fromsize<>tosize) then
+           begin
+             tmpreg:=reg;
+             reg:=getmmregister(list,fromsize);
+           end;
+         handle_load_store(list,A_LDR,fromsize,PF_None,reg,ref);
+         if (fromsize<>tosize) then
+           a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpreg,nil);
+       end;
+
+
+     procedure tcgaarch64.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
+       var
+         tmpreg: tregister;
+       begin
+         if assigned(shuffle) and
+            not shufflescalar(shuffle) then
+           internalerror(2014122106);
+         if (fromsize<>tosize) then
+           begin
+             tmpreg:=getmmregister(list,tosize);
+             a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpreg,nil);
+             reg:=tmpreg;
+           end;
+         handle_load_store(list,A_STR,tosize,PF_NONE,reg,ref);
+       end;
+
+
+     procedure tcgaarch64.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
+       begin
+         if not shufflescalar(shuffle) then
+           internalerror(2014122801);
+         if not(tcgsize2size[fromsize] in [4,8]) or
+            (tcgsize2size[fromsize]<>tcgsize2size[tosize]) then
+           internalerror(2014122803);
+         list.concat(taicpu.op_reg_reg(A_INS,mmreg,intreg));
+       end;
+
+
+     procedure tcgaarch64.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
+       begin
+         if not shufflescalar(shuffle) then
+           internalerror(2014122802);
+         if not(tcgsize2size[fromsize] in [4,8]) or
+            (tcgsize2size[fromsize]<>tcgsize2size[tosize]) then
+           internalerror(2014122804);
+         list.concat(taicpu.op_reg_reg(A_UMOV,intreg,mmreg));
+       end;
+
+
+    procedure tcgaarch64.a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle);
+      begin
+        case op of
+          { "xor Vx,Vx" is used to initialize global regvars to 0 }
+          OP_XOR:
+            begin
+              if (src<>dst) or
+                 (reg_cgsize(src)<>size) or
+                 assigned(shuffle) then
+                internalerror(2015011401);
+              case size of
+                OS_F32,
+                OS_F64:
+                  list.concat(taicpu.op_reg_const(A_MOVI,makeregsize(dst,OS_F64),0));
+                else
+                  internalerror(2015011402);
+              end;
+            end
+          else
+            internalerror(2015011403);
+        end;
+      end;
+
+
+    procedure tcgaarch64.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister);
+      var
+        bitsize,
+        signbit: longint;
+      begin
+        if srcsize in [OS_64,OS_S64] then
+          begin
+            bitsize:=64;
+            signbit:=6;
+          end
+        else
+          begin
+            bitsize:=32;
+            signbit:=5;
+          end;
+        { source is 0 -> dst will have to become 255 }
+        list.concat(taicpu.op_reg_const(A_CMP,src,0));
+        if reverse then
+          begin
+            list.Concat(taicpu.op_reg_reg(A_CLZ,makeregsize(dst,srcsize),src));
+            { xor 31/63 is the same as setting the lower 5/6 bits to
+              "31/63-(lower 5/6 bits of dst)" }
+            list.Concat(taicpu.op_reg_reg_const(A_EOR,dst,dst,bitsize-1));
+          end
+        else
+          begin
+            list.Concat(taicpu.op_reg_reg(A_RBIT,makeregsize(dst,srcsize),src));
+            list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
+          end;
+        { set dst to -1 if src was 0 }
+        list.Concat(taicpu.op_reg_reg_reg_cond(A_CSINV,dst,dst,makeregsize(NR_XZR,dstsize),C_NE));
+        { mask the -1 to 255 if src was 0 (anyone find a two-instruction
+          branch-free version? All of mine are 3...) }
+        list.Concat(setoppostfix(taicpu.op_reg_reg(A_UXT,dst,dst),PF_B));
+      end;
+
+
+    procedure tcgaarch64.a_load_reg_ref_unaligned(list: TAsmList; fromsize, tosize: tcgsize; register: tregister; const ref: treference);
+      var
+        href: treference;
+        hreg1, hreg2, tmpreg: tregister;
+      begin
+        if fromsize in [OS_64,OS_S64] then
+          begin
+            { split into two 32 bit stores }
+            hreg1:=makeregsize(register,OS_32);
+            hreg2:=getintregister(list,OS_32);
+            a_op_const_reg_reg(list,OP_SHR,OS_64,32,register,makeregsize(hreg2,OS_64));
+            if target_info.endian=endian_big then
+              begin
+                tmpreg:=hreg1;
+                hreg1:=hreg2;
+                hreg2:=tmpreg;
+              end;
+            { can we use STP? }
+            if (ref.alignment=4) and
+               (simple_ref_type(A_STP,OS_32,PF_None,ref)=sr_simple) then
+              list.concat(taicpu.op_reg_reg_ref(A_STP,hreg1,hreg2,ref))
+            else
+              begin
+                a_load_reg_ref(list,OS_32,OS_32,hreg1,ref);
+                href:=ref;
+                inc(href.offset,4);
+                a_load_reg_ref(list,OS_32,OS_32,hreg2,href);
+              end;
+          end
+       else
+         inherited;
+      end;
+
+
+    procedure tcgaarch64.maybeadjustresult(list: TAsmList; op: topcg; size: tcgsize; dst: tregister);
+      const
+        overflowops = [OP_MUL,OP_IMUL,OP_SHL,OP_ADD,OP_SUB,OP_NOT,OP_NEG];
+      begin
+        if (op in overflowops) and
+           (size in [OS_8,OS_S8,OS_16,OS_S16]) then
+          a_load_reg_reg(list,OS_32,size,makeregsize(dst,OS_32),makeregsize(dst,OS_32))
+      end;
+
+
+    procedure tcgaarch64.a_op_const_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; reg: tregister);
+      begin
+        optimize_op_const(size,op,a);
+        case op of
+          OP_NONE:
+            exit;
+          OP_MOVE:
+            a_load_const_reg(list,size,a,reg);
+          OP_NEG,OP_NOT:
+            internalerror(200306011);
+          else
+            a_op_const_reg_reg(list,op,size,a,reg,reg);
+        end;
+      end;
+
+
+    procedure tcgaarch64.a_op_reg_reg(list:TAsmList;op:topcg;size:tcgsize;src,dst:tregister);
+      begin
+        Case op of
+          OP_NEG,
+          OP_NOT:
+            begin
+              list.concat(taicpu.op_reg_reg(TOpCG2AsmOpReg[op],dst,src));
+              maybeadjustresult(list,op,size,dst);
+            end
+          else
+            a_op_reg_reg_reg(list,op,size,src,dst,dst);
+        end;
+      end;
+
+
+    procedure tcgaarch64.a_op_const_reg_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister);
+      var
+        l: tlocation;
+      begin
+        a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,l);
+      end;
+
+
+    procedure tcgaarch64.a_op_reg_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister);
+      var
+        hreg: tregister;
+      begin
+        { no ROLV opcode... }
+        if op=OP_ROL then
+          begin
+            case size of
+              OS_32,OS_S32,
+              OS_64,OS_S64:
+                begin
+                  hreg:=getintregister(list,size);
+                  a_load_const_reg(list,size,tcgsize2size[size]*8,hreg);
+                  a_op_reg_reg(list,OP_SUB,size,src1,hreg);
+                  a_op_reg_reg_reg(list,OP_ROR,size,hreg,src2,dst);
+                  exit;
+                end;
+              else
+                internalerror(2014111005);
+            end;
+          end
+        else if (op=OP_ROR) and
+           not(size in [OS_32,OS_S32,OS_64,OS_S64]) then
+          internalerror(2014111006);
+        if TOpCG2AsmOpReg[op]=A_NONE then
+          internalerror(2014111007);
+        list.concat(taicpu.op_reg_reg_reg(TOpCG2AsmOpReg[op],dst,src2,src1));
+        maybeadjustresult(list,op,size,dst);
+      end;
+
+
+    procedure tcgaarch64.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister; setflags : boolean; var ovloc : tlocation);
+      var
+        shiftcountmask: longint;
+        constreg: tregister;
+      begin
+        { add/sub instructions have only positive immediate operands }
+        if (op in [OP_ADD,OP_SUB]) and
+           (a<0) then
+          begin
+            if op=OP_ADD then
+              op:=op_SUB
+            else
+              op:=OP_ADD;
+            { avoid range/overflow error in case a = low(tcgint) }
+{$push}{$r-}{$q-}
+            a:=-a;
+{$pop}
+          end;
+        ovloc.loc:=LOC_VOID;
+        optimize_op_const(size,op,a);
+        case op of
+          OP_NONE:
+            begin
+              a_load_reg_reg(list,size,size,src,dst);
+              exit;
+            end;
+          OP_MOVE:
+            begin
+              a_load_const_reg(list,size,a,dst);
+              exit;
+            end;
+        end;
+        case op of
+          OP_ADD,
+          OP_SUB:
+            begin
+              handle_reg_imm12_reg(list,TOpCG2AsmOpImm[op],size,src,a,dst,NR_NO,setflags,true);
+              { on a 64 bit target, overflows with smaller data types
+                are handled via range errors }
+              if setflags and
+                 (size in [OS_64,OS_S64]) then
+                begin
+                  location_reset(ovloc,LOC_FLAGS,OS_8);
+                  if size=OS_64 then
+                    if op=OP_ADD then
+                      ovloc.resflags:=F_CS
+                    else
+                      ovloc.resflags:=F_CC
+                  else
+                    ovloc.resflags:=F_VS;
+                end;
+            end;
+          OP_OR,
+          OP_AND,
+          OP_XOR:
+            begin
+              if not(size in [OS_64,OS_S64]) then
+                a:=cardinal(a);
+              if is_shifter_const(a,size) then
+                list.concat(taicpu.op_reg_reg_const(TOpCG2AsmOpReg[op],dst,src,a))
+              else
+                begin
+                  constreg:=getintregister(list,size);
+                  a_load_const_reg(list,size,a,constreg);
+                  a_op_reg_reg_reg(list,op,size,constreg,src,dst);
+                end;
+            end;
+          OP_SHL,
+          OP_SHR,
+          OP_SAR:
+            begin
+              if size in [OS_64,OS_S64] then
+                shiftcountmask:=63
+              else
+                shiftcountmask:=31;
+              if (a and shiftcountmask)<>0 Then
+                list.concat(taicpu.op_reg_reg_const(
+                  TOpCG2AsmOpImm[Op],dst,src,a and shiftcountmask))
+              else
+                a_load_reg_reg(list,size,size,src,dst);
+              if (a and not(tcgint(shiftcountmask)))<>0 then
+                internalError(2014112101);
+            end;
+          OP_ROL,
+          OP_ROR:
+            begin
+              case size of
+                OS_32,OS_S32:
+                  if (a and not(tcgint(31)))<>0 then
+                    internalError(2014112102);
+                OS_64,OS_S64:
+                  if (a and not(tcgint(63)))<>0 then
+                    internalError(2014112103);
+                else
+                  internalError(2014112104);
+              end;
+              { there's only a ror opcode }
+              if op=OP_ROL then
+                a:=(tcgsize2size[size]*8)-a;
+              list.concat(taicpu.op_reg_reg_const(A_ROR,dst,src,a));
+            end;
+          OP_MUL,
+          OP_IMUL,
+          OP_DIV,
+          OP_IDIV:
+            begin
+              constreg:=getintregister(list,size);
+              a_load_const_reg(list,size,a,constreg);
+              a_op_reg_reg_reg_checkoverflow(list,op,size,constreg,src,dst,setflags,ovloc);
+            end;
+          else
+            internalerror(2014111403);
+        end;
+        maybeadjustresult(list,op,size,dst);
+      end;
+
+
+    procedure tcgaarch64.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister; setflags : boolean; var ovloc : tlocation);
+      var
+        tmpreg1: tregister;
+      begin
+        ovloc.loc:=LOC_VOID;
+        { overflow can only occur with 64 bit calculations on 64 bit cpus }
+        if setflags and
+           (size in [OS_64,OS_S64]) then
+          begin
+            case op of
+              OP_ADD,
+              OP_SUB:
+                begin
+                  list.concat(setoppostfix(taicpu.op_reg_reg_reg(TOpCG2AsmOpReg[op],dst,src2,src1),PF_S));
+                  ovloc.loc:=LOC_FLAGS;
+                  if size=OS_64 then
+                    if op=OP_ADD then
+                      ovloc.resflags:=F_CS
+                    else
+                      ovloc.resflags:=F_CC
+                  else
+                    ovloc.resflags:=F_VS;
+                  { finished; since we won't call through to a_op_reg_reg_reg,
+                    adjust the result here if necessary }
+                  maybeadjustresult(list,op,size,dst);
+                  exit;
+                end;
+              OP_MUL:
+                begin
+                  { check whether the upper 64 bit of the 128 bit product is 0 }
+                  tmpreg1:=getintregister(list,OS_64);
+                  list.concat(taicpu.op_reg_reg_reg(A_UMULH,tmpreg1,src2,src1));
+                  list.concat(taicpu.op_reg_const(A_CMP,tmpreg1,0));
+                  ovloc.loc:=LOC_FLAGS;
+                  ovloc.resflags:=F_NE;
+                  { still have to perform the actual multiplication  }
+                end;
+              OP_IMUL:
+                begin
+                  { check whether the sign bit of the (128 bit) result is the
+                    same as "sign bit of src1" xor "signbit of src2" (if so, no
+                    overflow and the xor-product of all sign bits is 0) }
+                  tmpreg1:=getintregister(list,OS_64);
+                  list.concat(taicpu.op_reg_reg_reg(A_SMULH,tmpreg1,src2,src1));
+                  list.concat(taicpu.op_reg_reg_reg(A_EOR,tmpreg1,tmpreg1,src1));
+                  list.concat(taicpu.op_reg_reg_reg(A_EOR,tmpreg1,tmpreg1,src2));
+                  list.concat(taicpu.op_reg_const(A_TST,tmpreg1,$80000000));
+                  ovloc.loc:=LOC_FLAGS;
+                  ovloc.resflags:=F_NE;
+                  { still have to perform the actual multiplication }
+                end;
+              OP_IDIV,
+              OP_DIV:
+                begin
+                  { not handled here, needs div-by-zero check (dividing by zero
+                    just gives a 0 result on aarch64), and low(int64) div -1
+                    check for overflow) }
+                  internalerror(2014122101);
+                end;
+            end;
+          end;
+        a_op_reg_reg_reg(list,op,size,src1,src2,dst);
+      end;
+
+
+
+  {*************** compare instructructions ****************}
+
+    procedure tcgaarch64.a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel);
+      var
+        op: tasmop;
+      begin
+        if a>=0 then
+          op:=A_CMP
+        else
+          op:=A_CMN;
+        { avoid range/overflow error in case a=low(tcgint) }
+{$push}{$r-}{$q-}
+        handle_reg_imm12_reg(list,op,size,reg,abs(a),NR_XZR,NR_NO,false,false);
+{$pop}
+        a_jmp_cond(list,cmp_op,l);
+      end;
+
+
+    procedure tcgaarch64.a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1,reg2: tregister; l: tasmlabel);
+      begin
+        list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
+        a_jmp_cond(list,cmp_op,l);
+      end;
+
+
+    procedure tcgaarch64.a_jmp_always(list: TAsmList; l: TAsmLabel);
+      var
+        ai: taicpu;
+      begin
+        ai:=TAiCpu.op_sym(A_B,current_asmdata.RefAsmSymbol(l.name));
+        ai.is_jmp:=true;
+        list.Concat(ai);
+      end;
+
+
+    procedure tcgaarch64.a_jmp_name(list: TAsmList; const s: string);
+      var
+        ai: taicpu;
+      begin
+        ai:=TAiCpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s));
+        ai.is_jmp:=true;
+        list.Concat(ai);
+      end;
+
+
+    procedure tcgaarch64.a_jmp_cond(list: TAsmList; cond: TOpCmp; l: TAsmLabel);
+      var
+        ai: taicpu;
+      begin
+        ai:=TAiCpu.op_sym(A_B,l);
+        ai.is_jmp:=true;
+        ai.SetCondition(TOpCmp2AsmCond[cond]);
+        list.Concat(ai);
+      end;
+
+
+    procedure tcgaarch64.a_jmp_flags(list: TAsmList; const f: tresflags; l: tasmlabel);
+      var
+        ai : taicpu;
+      begin
+        ai:=Taicpu.op_sym(A_B,l);
+        ai.is_jmp:=true;
+        ai.SetCondition(flags_to_cond(f));
+        list.Concat(ai);
+      end;
+
+
+    procedure tcgaarch64.g_flags2reg(list: TAsmList; size: tcgsize; const f: tresflags; reg: tregister);
+      begin
+        list.concat(taicpu.op_reg_cond(A_CSET,reg,flags_to_cond(f)));
+      end;
+
+
+    procedure tcgaarch64.g_overflowcheck(list: TAsmList; const loc: tlocation; def: tdef);
+      begin
+        { we need an explicit overflow location, because there are many
+          possibilities (not just the overflow flag, which is only used for
+          signed add/sub) }
+        internalerror(2014112303);
+      end;
+
+
+    procedure tcgaarch64.g_overflowcheck_loc(list: TAsmList; const loc: tlocation; def: tdef; ovloc : tlocation);
+      var
+        hl : tasmlabel;
+        hflags : tresflags;
+      begin
+        if not(cs_check_overflow in current_settings.localswitches) then
+          exit;
+        current_asmdata.getjumplabel(hl);
+        case ovloc.loc of
+          LOC_FLAGS:
+            begin
+              hflags:=ovloc.resflags;
+              inverse_flags(hflags);
+              cg.a_jmp_flags(list,hflags,hl);
+            end;
+          else
+            internalerror(2014112304);
+        end;
+        a_call_name(list,'FPC_OVERFLOW',false);
+        a_label(list,hl);
+      end;
+
+  { *********** entry/exit code and address loading ************ }
+
+    function tcgaarch64.save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
+      var
+        ref: treference;
+        sr: tsuperregister;
+        pairreg: tregister;
+      begin
+        result:=0;
+        reference_reset_base(ref,NR_SP,-16,16);
+        ref.addressmode:=AM_PREINDEXED;
+        pairreg:=NR_NO;
+        { store all used registers pairwise }
+        for sr:=lowsr to highsr do
+          if sr in rg[rt].used_in_proc then
+            if pairreg=NR_NO then
+              pairreg:=newreg(rt,sr,sub)
+            else
+              begin
+                inc(result,16);
+                list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,newreg(rt,sr,sub),ref));
+                pairreg:=NR_NO
+              end;
+        { one left -> store twice (stack must be 16 bytes aligned) }
+        if pairreg<>NR_NO then
+          begin
+            list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,pairreg,ref));
+            inc(result,16);
+          end;
+      end;
+
+
+    procedure FixupOffsets(p:TObject;arg:pointer);
+      var
+        sym: tabstractnormalvarsym absolute p;
+      begin
+        if (tsym(p).typ in [paravarsym,localvarsym]) and
+          (sym.localloc.loc=LOC_REFERENCE) and
+          (sym.localloc.reference.base=NR_STACK_POINTER_REG) then
+          begin
+            sym.localloc.reference.base:=NR_FRAME_POINTER_REG;
+            dec(sym.localloc.reference.offset,PLongint(arg)^);
+          end;
+      end;
+
+
+    procedure tcgaarch64.g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);
+      var
+        ref: treference;
+        totalstackframesize: longint;
+      begin
+        if nostackframe then
+          exit;
+        { stack pointer has to be aligned to 16 bytes at all times }
+        localsize:=align(localsize,16);
+
+        { save stack pointer and return address }
+        reference_reset_base(ref,NR_SP,-16,16);
+        ref.addressmode:=AM_PREINDEXED;
+        list.concat(taicpu.op_reg_reg_ref(A_STP,NR_FP,NR_LR,ref));
+        { initialise frame pointer }
+        a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_SP,NR_FP);
+
+        totalstackframesize:=localsize;
+        { save modified integer registers }
+        inc(totalstackframesize,
+          save_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE));
+        { only the lower 64 bits of the modified vector registers need to be
+          saved; if the caller needs the upper 64 bits, it has to save them
+          itself }
+        inc(totalstackframesize,
+          save_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD));
+
+        { allocate stack space }
+        if localsize<>0 then
+          begin
+            localsize:=align(localsize,16);
+            current_procinfo.final_localsize:=localsize;
+            handle_reg_imm12_reg(list,A_SUB,OS_ADDR,NR_SP,localsize,NR_SP,NR_IP0,false,true);
+          end;
+        { By default, we use the frame pointer to access parameters passed via
+          the stack and the stack pointer to address local variables and temps
+          because
+           a) we can use bigger positive than negative offsets (so accessing
+              locals via negative offsets from the frame pointer would be less
+              efficient)
+           b) we don't know the local size while generating the code, so
+              accessing the parameters via the stack pointer is not possible
+              without copying them
+          The problem with this is the get_frame() intrinsic:
+           a) it must return the same value as what we pass as parentfp
+              parameter, since that's how it's used in the TP-style objects unit
+           b) its return value must usable to access all local data from a
+              routine (locals and parameters), since it's all the nested
+              routines have access to
+           c) its return value must be usable to construct a backtrace, as it's
+              also used by the exception handling routines
+
+          The solution we use here, based on something similar that's done in
+          the MIPS port, is to generate all accesses to locals in the routine
+          itself SP-relative, and then after the code is generated and the local
+          size is known (namely, here), we change all SP-relative variables/
+          parameters into FP-relative ones. This means that they'll be accessed
+          less efficiently from nested routines, but those accesses are indirect
+          anyway and at least this way they can be accessed at all
+        }
+        if current_procinfo.has_nestedprocs then
+          begin
+            current_procinfo.procdef.localst.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
+            current_procinfo.procdef.parast.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
+          end;
+      end;
+
+
+    procedure tcgaarch64.g_maybe_got_init(list : TAsmList);
+      begin
+        { nothing to do on Darwin; check on ELF targets }
+        if not(target_info.system in systems_darwin) then
+          internalerror(2014112601);
+      end;
+
+
+    procedure tcgaarch64.g_restore_registers(list:TAsmList);
+      begin
+        { done in g_proc_exit }
+      end;
+
+
+    procedure tcgaarch64.load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
+      var
+        ref: treference;
+        sr, highestsetsr: tsuperregister;
+        pairreg: tregister;
+        regcount: longint;
+      begin
+        reference_reset_base(ref,NR_SP,16,16);
+        ref.addressmode:=AM_POSTINDEXED;
+        { highest reg stored twice? }
+        regcount:=0;
+        highestsetsr:=RS_NO;
+        for sr:=lowsr to highsr do
+          if sr in rg[rt].used_in_proc then
+            begin
+              inc(regcount);
+              highestsetsr:=sr;
+            end;
+        if odd(regcount) then
+          begin
+            list.concat(taicpu.op_reg_ref(A_LDR,newreg(rt,highestsetsr,sub),ref));
+            highestsetsr:=pred(highestsetsr);
+          end;
+        { load all (other) used registers pairwise }
+        pairreg:=NR_NO;
+        for sr:=highestsetsr downto lowsr do
+          if sr in rg[rt].used_in_proc then
+            if pairreg=NR_NO then
+              pairreg:=newreg(rt,sr,sub)
+            else
+              begin
+                list.concat(taicpu.op_reg_reg_ref(A_LDP,newreg(rt,sr,sub),pairreg,ref));
+                pairreg:=NR_NO
+              end;
+        { There can't be any register left }
+        if pairreg<>NR_NO then
+          internalerror(2014112602);
+      end;
+
+
+
+    procedure tcgaarch64.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
+      var
+        ref: treference;
+        regsstored: boolean;
+        sr: tsuperregister;
+      begin
+        if not nostackframe then
+          begin
+            { if no registers have been stored, we don't have to subtract the
+              allocated temp space from the stack pointer }
+            regsstored:=false;
+            for sr:=RS_X19 to RS_X28 do
+              if sr in rg[R_INTREGISTER].used_in_proc then
+                begin
+                  regsstored:=true;
+                  break;
+                end;
+            if not regsstored then
+              for sr:=RS_D8 to RS_D15 do
+                if sr in rg[R_MMREGISTER].used_in_proc then
+                  begin
+                    regsstored:=true;
+                    break;
+                  end;
+            { restore registers (and stack pointer) }
+            if regsstored then
+              begin
+                if current_procinfo.final_localsize<>0 then
+                  handle_reg_imm12_reg(list,A_ADD,OS_ADDR,NR_SP,current_procinfo.final_localsize,NR_SP,NR_IP0,false,true);
+                load_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD);
+                load_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE);
+              end
+            else if current_procinfo.final_localsize<>0 then
+              { restore stack pointer }
+              a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_FP,NR_SP);
+
+            { restore framepointer and return address }
+            reference_reset_base(ref,NR_SP,16,16);
+            ref.addressmode:=AM_POSTINDEXED;
+            list.concat(taicpu.op_reg_reg_ref(A_LDP,NR_FP,NR_LR,ref));
+          end;
+
+        { return }
+        list.concat(taicpu.op_none(A_RET));
+      end;
+
+
+    procedure tcgaarch64.g_save_registers(list : TAsmList);
+      begin
+        { done in g_proc_entry }
+      end;
+
+
+    { ************* concatcopy ************ }
+
+    procedure tcgaarch64.g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
+      var
+        paraloc1,paraloc2,paraloc3 : TCGPara;
+        pd : tprocdef;
+      begin
+        pd:=search_system_proc('MOVE');
+        paraloc1.init;
+        paraloc2.init;
+        paraloc3.init;
+        paramanager.getintparaloc(pd,1,paraloc1);
+        paramanager.getintparaloc(pd,2,paraloc2);
+        paramanager.getintparaloc(pd,3,paraloc3);
+        a_load_const_cgpara(list,OS_SINT,len,paraloc3);
+        a_loadaddr_ref_cgpara(list,dest,paraloc2);
+        a_loadaddr_ref_cgpara(list,source,paraloc1);
+        paramanager.freecgpara(list,paraloc3);
+        paramanager.freecgpara(list,paraloc2);
+        paramanager.freecgpara(list,paraloc1);
+        alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
+        alloccpuregisters(list,R_MMREGISTER,paramanager.get_volatile_registers_mm(pocall_default));
+        a_call_name(list,'FPC_MOVE',false);
+        dealloccpuregisters(list,R_MMREGISTER,paramanager.get_volatile_registers_mm(pocall_default));
+        dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
+        paraloc3.done;
+        paraloc2.done;
+        paraloc1.done;
+      end;
+
+
+    procedure tcgaarch64.g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);
+
+      var
+        sourcebasereplaced, destbasereplaced: boolean;
+
+      { get optimal memory operation to use for loading/storing data
+        in an unrolled loop }
+      procedure getmemop(scaledop, unscaledop: tasmop; const startref, endref: treference; opsize: tcgsize; postfix: toppostfix; out memop: tasmop; out needsimplify: boolean);
+        begin
+          if (simple_ref_type(scaledop,opsize,postfix,startref)=sr_simple) and
+             (simple_ref_type(scaledop,opsize,postfix,endref)=sr_simple) then
+            begin
+              memop:=unscaledop;
+              needsimplify:=true;
+            end
+          else if (unscaledop<>A_NONE) and
+             (simple_ref_type(unscaledop,opsize,postfix,startref)=sr_simple) and
+             (simple_ref_type(unscaledop,opsize,postfix,endref)=sr_simple) then
+            begin
+              memop:=unscaledop;
+              needsimplify:=false;
+            end
+          else
+            begin
+              memop:=scaledop;
+              needsimplify:=true;
+            end;
+        end;
+
+      { adjust the offset and/or addressing mode after a load/store so it's
+        correct for the next one of the same size }
+      procedure updaterefafterloadstore(var ref: treference; oplen: longint);
+        begin
+          case ref.addressmode of
+            AM_OFFSET:
+              inc(ref.offset,oplen);
+            AM_POSTINDEXED:
+              { base register updated by instruction, next offset can remain
+                the same }
+              ;
+            AM_PREINDEXED:
+              begin
+                { base register updated by instruction -> next instruction can
+                  use post-indexing with offset = sizeof(operation) }
+                ref.offset:=0;
+                ref.addressmode:=AM_OFFSET;
+              end;
+          end;
+        end;
+
+      { generate a load/store and adjust the reference offset to the next
+        memory location if necessary }
+      procedure genloadstore(list: TAsmList; op: tasmop; reg: tregister; var ref: treference; postfix: toppostfix; opsize: tcgsize);
+        begin
+          list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),postfix));
+          updaterefafterloadstore(ref,tcgsize2size[opsize]);
+        end;
+
+      { generate a dual load/store (ldp/stp) and adjust the reference offset to
+        the next memory location if necessary }
+      procedure gendualloadstore(list: TAsmList; op: tasmop; reg1, reg2: tregister; var ref: treference; postfix: toppostfix; opsize: tcgsize);
+        begin
+          list.concat(setoppostfix(taicpu.op_reg_reg_ref(op,reg1,reg2,ref),postfix));
+          updaterefafterloadstore(ref,tcgsize2size[opsize]*2);
+        end;
+
+      { turn a reference into a pre- or post-indexed reference for use in a
+        load/store of a particular size }
+      procedure makesimpleforcopy(list: TAsmList; var scaledop: tasmop; opsize: tcgsize; postfix: toppostfix; forcepostindexing: boolean; var ref: treference; var basereplaced: boolean);
+        var
+          tmpreg: tregister;
+          scaledoffset: longint;
+          orgaddressmode: taddressmode;
+        begin
+          scaledoffset:=tcgsize2size[opsize];
+          if scaledop in [A_LDP,A_STP] then
+            scaledoffset:=scaledoffset*2;
+          { can we use the reference as post-indexed without changes? }
+          if forcepostindexing then
+            begin
+              orgaddressmode:=ref.addressmode;
+              ref.addressmode:=AM_POSTINDEXED;
+              if (orgaddressmode=AM_POSTINDEXED) or
+                 ((ref.offset=0) and
+                  (simple_ref_type(scaledop,opsize,postfix,ref)=sr_simple)) then
+                begin
+                  { just change the post-indexed offset to the access size }
+                  ref.offset:=scaledoffset;
+                  { and replace the base register if that didn't happen yet
+                    (could be sp or a regvar) }
+                  if not basereplaced then
+                    begin
+                      tmpreg:=getaddressregister(list);
+                      a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg);
+                      ref.base:=tmpreg;
+                      basereplaced:=true;
+                    end;
+                  exit;
+                end;
+              ref.addressmode:=orgaddressmode;
+            end;
+{$ifdef dummy}
+          This could in theory be useful in case you have a concatcopy from
+          e.g. x1+255 to x1+267 *and* the reference is aligned, but this seems
+          very unlikely. Disabled because it still needs fixes, as it
+          also generates pre-indexed loads right now at the very end for the
+          left-over gencopies
+
+          { can we turn it into a pre-indexed reference for free? (after the
+            first operation, it will be turned into an offset one) }
+          if not forcepostindexing and
+             (ref.offset<>0) then
+            begin
+              orgaddressmode:=ref.addressmode;
+              ref.addressmode:=AM_PREINDEXED;
+              tmpreg:=ref.base;
+              if not basereplaced and
+                 (ref.base=tmpreg) then
+                begin
+                  tmpreg:=getaddressregister(list);
+                  a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg);
+                  ref.base:=tmpreg;
+                  basereplaced:=true;
+                end;
+              if simple_ref_type(scaledop,opsize,postfix,ref)<>sr_simple then
+                make_simple_ref(list,scaledop,opsize,postfix,ref,NR_NO);
+              exit;
+            end;
+{$endif dummy}
+          if not forcepostindexing then
+            begin
+              ref.addressmode:=AM_OFFSET;
+              make_simple_ref(list,scaledop,opsize,postfix,ref,NR_NO);
+              { this may still cause problems if the final offset is no longer
+                a simple ref; it's a bit complicated to pass all information
+                through at all places and check that here, so play safe: we
+                currently never generate unrolled copies for more than 64
+                bytes (32 with non-double-register copies) }
+              if ref.index=NR_NO then
+                begin
+                  if ((scaledop in [A_LDP,A_STP]) and
+                      (ref.offset<((64-8)*tcgsize2size[opsize]))) or
+                     ((scaledop in [A_LDUR,A_STUR]) and
+                      (ref.offset<(255-8*tcgsize2size[opsize]))) or
+                     ((scaledop in [A_LDR,A_STR]) and
+                      (ref.offset<((4096-8)*tcgsize2size[opsize]))) then
+                    exit;
+                end;
+            end;
+          tmpreg:=getaddressregister(list);
+          a_loadaddr_ref_reg(list,ref,tmpreg);
+          basereplaced:=true;
+          if forcepostindexing then
+            begin
+              reference_reset_base(ref,tmpreg,scaledoffset,ref.alignment);
+              ref.addressmode:=AM_POSTINDEXED;
+            end
+          else
+            begin
+              reference_reset_base(ref,tmpreg,0,ref.alignment);
+              ref.addressmode:=AM_OFFSET;
+            end
+        end;
+
+      { prepare a reference for use by gencopy. This is done both after the
+        unrolled and regular copy loop -> get rid of post-indexing mode, make
+        sure ref is valid }
+      procedure preparecopy(list: tasmlist; scaledop, unscaledop: tasmop; var ref: treference; opsize: tcgsize; postfix: toppostfix; out op: tasmop; var basereplaced: boolean);
+        var
+          simplify: boolean;
+        begin
+          if ref.addressmode=AM_POSTINDEXED then
+            ref.offset:=tcgsize2size[opsize];
+          getmemop(scaledop,scaledop,ref,ref,opsize,postfix,op,simplify);
+          if simplify then
+            begin
+              makesimpleforcopy(list,scaledop,opsize,postfix,false,ref,basereplaced);
+              op:=scaledop;
+            end;
+        end;
+
+      { generate a copy from source to dest of size opsize/postfix }
+      procedure gencopy(list: TAsmList; var source, dest: treference; postfix: toppostfix; opsize: tcgsize);
+        var
+          reg: tregister;
+          loadop, storeop: tasmop;
+        begin
+          preparecopy(list,A_LDR,A_LDUR,source,opsize,postfix,loadop,sourcebasereplaced);
+          preparecopy(list,A_STR,A_STUR,dest,opsize,postfix,storeop,destbasereplaced);
+          reg:=getintregister(list,opsize);
+          genloadstore(list,loadop,reg,source,postfix,opsize);
+          genloadstore(list,storeop,reg,dest,postfix,opsize);
+        end;
+
+
+      { copy the leftovers after an unrolled or regular copy loop }
+      procedure gencopyleftovers(list: TAsmList; var source, dest: treference; len: longint);
+        begin
+          { stop post-indexing if we did so in the loop, since in that case all
+            offsets definitely can be represented now }
+          if source.addressmode=AM_POSTINDEXED then
+            begin
+              source.addressmode:=AM_OFFSET;
+              source.offset:=0;
+            end;
+          if dest.addressmode=AM_POSTINDEXED then
+            begin
+              dest.addressmode:=AM_OFFSET;
+              dest.offset:=0;
+            end;
+          { transfer the leftovers }
+          if len>=8 then
+            begin
+              dec(len,8);
+              gencopy(list,source,dest,PF_NONE,OS_64);
+            end;
+          if len>=4 then
+            begin
+              dec(len,4);
+              gencopy(list,source,dest,PF_NONE,OS_32);
+            end;
+          if len>=2 then
+            begin
+              dec(len,2);
+              gencopy(list,source,dest,PF_H,OS_16);
+            end;
+          if len>=1 then
+            begin
+              dec(len);
+              gencopy(list,source,dest,PF_B,OS_8);
+            end;
+        end;
+
+
+      const
+        { load_length + loop dec + cbnz }
+        loopoverhead=12;
+        { loop overhead + load + store }
+        totallooplen=loopoverhead + 8;
+      var
+        totalalign: longint;
+        maxlenunrolled: tcgint;
+        loadop, storeop: tasmop;
+        opsize: tcgsize;
+        postfix: toppostfix;
+        tmpsource, tmpdest: treference;
+        scaledstoreop, unscaledstoreop,
+        scaledloadop, unscaledloadop: tasmop;
+        regs: array[1..8] of tregister;
+        countreg: tregister;
+        i, regcount: longint;
+        hl: tasmlabel;
+        simplifysource, simplifydest: boolean;
+      begin
+        if len=0 then
+          exit;
+        sourcebasereplaced:=false;
+        destbasereplaced:=false;
+        { maximum common alignment }
+        totalalign:=max(1,newalignment(source.alignment,dest.alignment));
+        { use a simple load/store? }
+        if (len in [1,2,4,8]) and
+           ((totalalign>=(len div 2)) or
+            (source.alignment=len) or
+            (dest.alignment=len)) then
+          begin
+            opsize:=int_cgsize(len);
+            a_load_ref_ref(list,opsize,opsize,source,dest);
+            exit;
+          end;
+
+        { alignment > length is not useful, and would break some checks below }
+        while totalalign>len do
+          totalalign:=totalalign div 2;
+
+        { operation sizes to use based on common alignment }
+        case totalalign of
+          1:
+            begin
+              postfix:=PF_B;
+              opsize:=OS_8;
+            end;
+          2:
+            begin
+              postfix:=PF_H;
+              opsize:=OS_16;
+            end;
+          4:
+            begin
+              postfix:=PF_None;
+              opsize:=OS_32;
+            end
+          else
+            begin
+              totalalign:=8;
+              postfix:=PF_None;
+              opsize:=OS_64;
+            end;
+        end;
+        { maximum length to handled with an unrolled loop (4 loads + 4 stores) }
+        maxlenunrolled:=min(totalalign,8)*4;
+        { ldp/stp -> 2 registers per instruction }
+        if (totalalign>=4) and
+           (len>=totalalign*2) then
+          begin
+            maxlenunrolled:=maxlenunrolled*2;
+            scaledstoreop:=A_STP;
+            scaledloadop:=A_LDP;
+            unscaledstoreop:=A_NONE;
+            unscaledloadop:=A_NONE;
+          end
+        else
+          begin
+            scaledstoreop:=A_STR;
+            scaledloadop:=A_LDR;
+            unscaledstoreop:=A_STUR;
+            unscaledloadop:=A_LDUR;
+          end;
+        { we only need 4 instructions extra to call FPC_MOVE }
+        if cs_opt_size in current_settings.optimizerswitches then
+          maxlenunrolled:=maxlenunrolled div 2;
+        if (len>maxlenunrolled) and
+           (len>totalalign*8) then
+          begin
+            g_concatcopy_move(list,source,dest,len);
+            exit;
+          end;
+
+        simplifysource:=true;
+        simplifydest:=true;
+        tmpsource:=source;
+        tmpdest:=dest;
+        { can we directly encode all offsets in an unrolled loop? }
+        if len<=maxlenunrolled then
+          begin
+{$ifdef extdebug}
+            list.concat(tai_comment.Create(strpnew('concatcopy unrolled loop; len/opsize/align: '+tostr(len)+'/'+tostr(tcgsize2size[opsize])+'/'+tostr(totalalign))));
+{$endif extdebug}
+            { the leftovers will be handled separately -> -(len mod opsize) }
+            inc(tmpsource.offset,len-(len mod tcgsize2size[opsize]));
+            { additionally, the last regular load/store will be at
+              offset+len-opsize (if len-(len mod opsize)>len) }
+            if tmpsource.offset>source.offset then
+              dec(tmpsource.offset,tcgsize2size[opsize]);
+            getmemop(scaledloadop,unscaledloadop,source,tmpsource,opsize,postfix,loadop,simplifysource);
+            inc(tmpdest.offset,len-(len mod tcgsize2size[opsize]));
+            if tmpdest.offset>dest.offset then
+              dec(tmpdest.offset,tcgsize2size[opsize]);
+            getmemop(scaledstoreop,unscaledstoreop,dest,tmpdest,opsize,postfix,storeop,simplifydest);
+            tmpsource:=source;
+            tmpdest:=dest;
+            { if we can't directly encode all offsets, simplify }
+            if simplifysource then
+              begin
+                loadop:=scaledloadop;
+                makesimpleforcopy(list,loadop,opsize,postfix,false,tmpsource,sourcebasereplaced);
+              end;
+            if simplifydest then
+              begin
+                storeop:=scaledstoreop;
+                makesimpleforcopy(list,storeop,opsize,postfix,false,tmpdest,destbasereplaced);
+              end;
+            regcount:=len div tcgsize2size[opsize];
+            { in case we transfer two registers at a time, we copy an even
+              number of registers }
+            if loadop=A_LDP then
+              regcount:=regcount and not(1);
+            { initialise for dfa }
+            regs[low(regs)]:=NR_NO;
+            { max 4 loads/stores -> max 8 registers (in case of ldp/stdp) }
+            for i:=1 to regcount do
+              regs[i]:=getintregister(list,opsize);
+            if loadop=A_LDP then
+              begin
+                { load registers }
+                for i:=1 to (regcount div 2) do
+                  gendualloadstore(list,loadop,regs[i*2-1],regs[i*2],tmpsource,postfix,opsize);
+                { store registers }
+                for i:=1 to (regcount div 2) do
+                  gendualloadstore(list,storeop,regs[i*2-1],regs[i*2],tmpdest,postfix,opsize);
+              end
+            else
+              begin
+                for i:=1 to regcount do
+                  genloadstore(list,loadop,regs[i],tmpsource,postfix,opsize);
+                for i:=1 to regcount do
+                  genloadstore(list,storeop,regs[i],tmpdest,postfix,opsize);
+              end;
+            { leftover }
+            len:=len-regcount*tcgsize2size[opsize];
+{$ifdef extdebug}
+            list.concat(tai_comment.Create(strpnew('concatcopy unrolled loop leftover: '+tostr(len))));
+{$endif extdebug}
+          end
+        else
+          begin
+{$ifdef extdebug}
+            list.concat(tai_comment.Create(strpnew('concatcopy regular loop; len/align: '+tostr(len)+'/'+tostr(totalalign))));
+{$endif extdebug}
+            { regular loop -> definitely use post-indexing }
+            loadop:=scaledloadop;
+            makesimpleforcopy(list,loadop,opsize,postfix,true,tmpsource,sourcebasereplaced);
+            storeop:=scaledstoreop;
+            makesimpleforcopy(list,storeop,opsize,postfix,true,tmpdest,destbasereplaced);
+            current_asmdata.getjumplabel(hl);
+            countreg:=getintregister(list,OS_32);
+            if loadop=A_LDP then
+              a_load_const_reg(list,OS_32,len div tcgsize2size[opsize]*2,countreg)
+            else
+              a_load_const_reg(list,OS_32,len div tcgsize2size[opsize],countreg);
+            a_label(list,hl);
+            a_op_const_reg(list,OP_SUB,OS_32,1,countreg);
+            if loadop=A_LDP then
+              begin
+                regs[1]:=getintregister(list,opsize);
+                regs[2]:=getintregister(list,opsize);
+                gendualloadstore(list,loadop,regs[1],regs[2],tmpsource,postfix,opsize);
+                gendualloadstore(list,storeop,regs[1],regs[2],tmpdest,postfix,opsize);
+              end
+            else
+              begin
+                regs[1]:=getintregister(list,opsize);
+                genloadstore(list,loadop,regs[1],tmpsource,postfix,opsize);
+                genloadstore(list,storeop,regs[1],tmpdest,postfix,opsize);
+              end;
+            list.concat(taicpu.op_reg_sym_ofs(A_CBNZ,countreg,hl,0));
+            len:=len mod tcgsize2size[opsize];
+          end;
+        gencopyleftovers(list,tmpsource,tmpdest,len);
+      end;
+
+
+    procedure tcgaarch64.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
+      begin
+        { This method is integrated into g_intf_wrapper and shouldn't be called separately }
+        InternalError(2013020102);
+      end;
+
+
+    procedure tcgaarch64.g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);
+      var
+        make_global: boolean;
+        href: treference;
+        hsym: tsym;
+        paraloc: pcgparalocation;
+        op: tasmop;
+      begin
+        if not(procdef.proctypeoption in [potype_function,potype_procedure]) then
+          Internalerror(200006137);
+        if not assigned(procdef.struct) or
+           (procdef.procoptions*[po_classmethod, po_staticmethod,
+             po_methodpointer, po_interrupt, po_iocheck]<>[]) then
+          Internalerror(200006138);
+        if procdef.owner.symtabletype<>ObjectSymtable then
+          Internalerror(200109191);
+
+        make_global:=false;
+        if (not current_module.is_unit) or create_smartlink_library or
+           (procdef.owner.defowner.owner.symtabletype=globalsymtable) then
+          make_global:=true;
+
+        if make_global then
+          list.concat(Tai_symbol.Createname_global(labelname,AT_FUNCTION,0))
+        else
+          list.concat(Tai_symbol.Createname(labelname,AT_FUNCTION,0));
+
+        { set param1 interface to self  }
+        procdef.init_paraloc_info(callerside);
+        hsym:=tsym(procdef.parast.Find('self'));
+        if not(assigned(hsym) and
+          (hsym.typ=paravarsym)) then
+          internalerror(2010103101);
+        paraloc:=tparavarsym(hsym).paraloc[callerside].location;
+        if assigned(paraloc^.next) then
+          InternalError(2013020101);
+
+        case paraloc^.loc of
+          LOC_REGISTER:
+            handle_reg_imm12_reg(list,A_SUB,paraloc^.size,paraloc^.register,ioffset,paraloc^.register,NR_IP0,false,true);
+          else
+            internalerror(2010103102);
+        end;
+
+        if (po_virtualmethod in procdef.procoptions) and
+            not is_objectpascal_helper(procdef.struct) then
+          begin
+            if (procdef.extnumber=$ffff) then
+              Internalerror(200006139);
+            { mov  0(%rdi),%rax ; load vmt}
+            reference_reset_base(href,paraloc^.register,0,sizeof(pint));
+            getcpuregister(list,NR_IP0);
+            a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_IP0);
+            { jmp *vmtoffs(%eax) ; method offs }
+            reference_reset_base(href,NR_IP0,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),sizeof(pint));
+            op:=A_LDR;
+            make_simple_ref(list,op,OS_ADDR,PF_None,href,NR_IP0);
+            list.concat(taicpu.op_reg_ref(op,NR_IP0,href));
+            ungetcpuregister(list,NR_IP0);
+            list.concat(taicpu.op_reg(A_BR,NR_IP0));
+          end
+        else
+          a_jmp_name(list,procdef.mangledname);
+        list.concat(Tai_symbol_end.Createname(labelname));
+      end;
+
+
+    procedure create_codegen;
+      begin
+        cg:=tcgaarch64.Create;
+        cg128:=tcg128.Create;
+      end;
+
+end.

+ 200 - 38
compiler/aarch64/cpubase.pas

@@ -1,7 +1,8 @@
 {
     Copyright (c) 1998-2012 by Florian Klaempfl and Peter Vreman
+    Copyright (c) 2014 by Jonas Maebe and Florian Klaempfl
 
-    Contains the base types for ARM64
+    Contains the base types for Aarch64
 
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -68,14 +69,22 @@ unit cpubase;
       { Available Superregisters }
       {$i ra64sup.inc}
 
+      RS_IP0 = RS_X16;
+      RS_IP1 = RS_X17;
+
       R_SUBWHOLE = R_SUBQ;
 
       { Available Registers }
       {$i ra64con.inc}
 
+      NR_IP0 = NR_X16;
+      NR_IP1 = NR_X17;
+
       { Integer Super registers first and last }
       first_int_supreg = RS_X0;
-      first_int_imreg = $20;
+      { xzr and sp take up a separate super register because some instructions
+        are ambiguous otherwise }
+      first_int_imreg = $21;
 
       { Integer Super registers first and last }
       first_fpu_supreg = RS_S0;
@@ -92,7 +101,7 @@ unit cpubase;
         The value of this constant is equal to the constant
         PARM_BOUNDARY / BITS_PER_UNIT in the GCC source.
       }
-      std_param_align = 4;
+      std_param_align = 8;
 
       { TODO: Calculate bsstart}
       regnumber_count_bsstart = 128;
@@ -109,7 +118,7 @@ unit cpubase;
         {$i ra64dwa.inc}
       );
       { registers which may be destroyed by calls }
-      VOLATILE_INTREGISTERS = [RS_X0..RS_X18,RS_X29..RS_X30];
+      VOLATILE_INTREGISTERS = [RS_X0..RS_X18,RS_X30];
       VOLATILE_MMREGISTERS =  [RS_D0..RS_D7,RS_D16..RS_D31];
 
     type
@@ -126,16 +135,23 @@ unit cpubase;
       TOpPostfix = (PF_None,
         { update condition flags }
         PF_S,
-        { load/store }
-        PF_B,PF_SB,PF_H,PF_SH
+        { load/store sizes }
+        PF_B,PF_SB,PF_H,PF_SH,PF_W,PF_SW
       );
 
       TOpPostfixes = set of TOpPostfix;
 
     const
-      oppostfix2str : array[TOpPostfix] of string[2] = ('',
+      tcgsizep2size: array[OS_NO..OS_F128] of byte =
+        {OS_NO }
+        (0,
+        {OS_8,OS_16,OS_32,OS_64,OS_128,OS_S8,OS_S16,OS_S32,OS_S64,OS_S128}
+            0,    1,    2,    3,     4,    0,     1,     2,     3,      4,
+        {OS_F32,OS_F64,OS_F80,OS_C64,OS_F128,}
+             2,      3,     0,     3,      4);
+      oppostfix2str: array[TOpPostfix] of string[2] = ('',
         's',
-        'b','sb','h','sh');
+        'b','sb','h','sh','w','sw');
 
 {*****************************************************************************
                                 Conditions
@@ -150,13 +166,15 @@ unit cpubase;
       TAsmConds = set of TAsmCond;
 
     const
+      C_CS = C_HS;
+      C_CC = C_LO;
       cond2str : array[TAsmCond] of string[2]=('',
         'eq','ne','hs','lo','mi','pl','vs','vc','hi','ls',
         'ge','lt','gt','le','al','nv'
       );
 
       uppercond2str : array[TAsmCond] of string[2]=('',
-        'EQ','NE','hs','LO','MI','PL','VS','VC','HI','LS',
+        'EQ','NE','HS','LO','MI','PL','VS','VC','HI','LS',
         'GE','LT','GT','LE','AL','NV'
       );
 
@@ -168,12 +186,28 @@ unit cpubase;
       TResFlags = (F_EQ,F_NE,F_CS,F_CC,F_MI,F_PL,F_VS,F_VC,F_HI,F_LS,
         F_GE,F_LT,F_GT,F_LE);
 
+    const
+      F_HS = F_CS;
+      F_LO = F_CC;
+
 {*****************************************************************************
                                 Operands
 *****************************************************************************}
 
+    type
       taddressmode = (AM_OFFSET,AM_PREINDEXED,AM_POSTINDEXED);
-      tshiftmode = (SM_None,SM_LSL,SM_LSR,SM_ASR,SM_ROR);
+
+      tshiftmode = (SM_None,
+                    { shifted register instructions. LSL can also be used for
+                      the index register of certain loads/stores }
+                    SM_LSL,SM_LSR,SM_ASR,
+                    { extended register instructions: zero/sign extension +
+                        optional shift (interpreted as LSL after extension)
+                       -- the index register of certain loads/stores can be
+                          extended via (s|u)xtw with a shiftval of either 0 or
+                          log2(transfer size of the load/store)
+                    }
+                    SM_UXTB,SM_UXTH,SM_UXTW,SM_UXTX,SM_SXTB,SM_SXTH,SM_SXTW,SM_SXTX);
 
       tupdatereg = (UR_None,UR_Update);
 
@@ -184,12 +218,6 @@ unit cpubase;
         shiftimm : byte;
       end;
 
-      tcpumodeflag = (mfA, mfI, mfF);
-      tcpumodeflags = set of tcpumodeflag;
-
-      tspecialregflag = (srC, srX, srS, srF);
-      tspecialregflags = set of tspecialregflag;
-
 {*****************************************************************************
                                  Constants
 *****************************************************************************}
@@ -201,6 +229,10 @@ unit cpubase;
       maxfpuregs = 32;
       maxaddrregs = 0;
 
+      shiftedregmodes = [SM_LSL,SM_UXTB,SM_UXTH,SM_UXTW,SM_UXTX,SM_SXTB,SM_SXTH,SM_SXTW,SM_SXTX];
+      extendedregmodes = [SM_LSL,SM_LSR,SM_ASR];
+
+
 {*****************************************************************************
                                 Operand Sizes
 *****************************************************************************}
@@ -232,17 +264,23 @@ unit cpubase;
                           Generic Register names
 *****************************************************************************}
 
-      NR_SP = NR_XZR;
-      RS_SP = RS_XZR;
-      NR_WSP = NR_WZR;
-      RS_WSP = RS_WZR;
+
+      NR_FP = NR_X29;
+      RS_FP = RS_X29;
+      NR_WFP = NR_W29;
+      RS_WFP = RS_W29;
+
+      NR_LR = NR_X30;
+      RS_LR = RS_X30;
+      NR_WLR = NR_W30;
+      RS_WLR = RS_W30;
 
       { Stack pointer register }
       NR_STACK_POINTER_REG = NR_SP;
       RS_STACK_POINTER_REG = RS_SP;
-      { Frame pointer register (initialized in tarmprocinfo.init_framepointer) }
-      RS_FRAME_POINTER_REG: tsuperregister = RS_X29;
-      NR_FRAME_POINTER_REG: tregister = NR_X29;
+      { Frame pointer register }
+      NR_FRAME_POINTER_REG = NR_X29;
+      RS_FRAME_POINTER_REG = RS_X29;
       { Register for addressing absolute data in a position independant way,
         such as in PIC code. The exact meaning is ABI specific. For
         further information look at GCC source : PIC_OFFSET_TABLE_REGNUM
@@ -307,6 +345,9 @@ unit cpubase;
 
     function dwarf_reg(r:tregister):shortint;
 
+    function is_shifter_const(d: aint; size: tcgsize): boolean;
+
+
   implementation
 
     uses
@@ -329,13 +370,24 @@ unit cpubase;
     function cgsize2subreg(regtype: tregistertype; s:Tcgsize):Tsubregister;
       begin
         case regtype of
+          R_INTREGISTER:
+            begin
+              case s of
+                { there's only Wn and Xn }
+                OS_64,
+                OS_S64:
+                  cgsize2subreg:=R_SUBWHOLE;
+                else
+                  cgsize2subreg:=R_SUBD;
+                end;
+            end;
           R_MMREGISTER:
             begin
               case s of
                 OS_F32:
-                  cgsize2subreg:=R_SUBFS;
+                  cgsize2subreg:=R_SUBMMS;
                 OS_F64:
-                  cgsize2subreg:=R_SUBFD;
+                  cgsize2subreg:=R_SUBMMD;
                 else
                   internalerror(2009112701);
               end;
@@ -349,18 +401,22 @@ unit cpubase;
     function reg_cgsize(const reg: tregister): tcgsize;
       begin
         case getregtype(reg) of
-          R_INTREGISTER :
-            reg_cgsize:=OS_32;
-          R_FPUREGISTER :
-            reg_cgsize:=OS_F80;
+          R_INTREGISTER:
+            case getsubreg(reg) of
+              R_SUBD:
+                result:=OS_32
+              else
+                result:=OS_64;
+            end;
           R_MMREGISTER :
             begin
               case getsubreg(reg) of
-                R_SUBFD,
-                R_SUBWHOLE:
+                R_SUBMMD:
                   result:=OS_F64;
-                R_SUBFS:
+                R_SUBMMS:
                   result:=OS_F32;
+                R_SUBMMWHOLE:
+                  result:=OS_M128;
                 else
                   internalerror(2009112903);
               end;
@@ -373,9 +429,7 @@ unit cpubase;
 
     function is_calljmp(o:tasmop):boolean;{$ifdef USEINLINE}inline;{$endif USEINLINE}
       begin
-        { This isn't 100% perfect because the arm allows jumps also by writing to PC=R15.
-          To overcome this problem we simply forbid that FPC generates jumps by loading R15 }
-        is_calljmp:= o in [A_B,A_BLR,A_RET];
+        is_calljmp:=o in [A_B,A_BL,A_BLR,A_RET,A_CBNZ,A_CBZ];
       end;
 
 
@@ -391,8 +445,8 @@ unit cpubase;
 
     function flags_to_cond(const f: TResFlags) : TAsmCond;
       const
-        flag_2_cond: array[F_EQ..F_LE] of TAsmCond =
-          (C_EQ,C_NE,C_HI,C_LO,C_MI,C_PL,C_VS,C_VC,C_HI,C_LS,
+        flag_2_cond: array[TResFlags] of TAsmCond =
+          (C_EQ,C_NE,C_HS,C_LO,C_MI,C_PL,C_VS,C_VC,C_HI,C_LS,
            C_GE,C_LT,C_GT,C_LE);
       begin
         if f>high(flag_2_cond) then
@@ -434,7 +488,7 @@ unit cpubase;
     function inverse_cond(const c: TAsmCond): TAsmCond; {$ifdef USEINLINE}inline;{$endif USEINLINE}
       const
         inverse: array[TAsmCond] of TAsmCond=(C_None,
-          C_NE,C_EQ,C_LO,C_HI,C_PL,C_MI,C_VC,C_VS,C_LS,C_HI,
+          C_NE,C_EQ,C_LO,C_HS,C_PL,C_MI,C_VC,C_VS,C_LS,C_HI,
           C_LT,C_GE,C_LE,C_GT,C_None,C_None
         );
       begin
@@ -456,4 +510,112 @@ unit cpubase;
       end;
 
 
+    function is_shifter_const(d: aint; size: tcgsize): boolean;
+      var
+         pattern, checkpattern: qword;
+         patternlen, maxbits, replicatedlen: longint;
+         rightmostone, rightmostzero, checkbit, secondrightmostbit: longint;
+      begin
+        result:=false;
+        { patterns with all bits 0 or 1 cannot be represented this way }
+        if (d=0) then
+          exit;
+        case size of
+          OS_64,
+          OS_S64:
+            begin
+              if d=-1 then
+                exit;
+              maxbits:=64;
+            end
+          else
+            begin
+              if longint(d)=-1 then
+                exit;
+              { we'll generate a 32 bit pattern -> ignore upper sign bits in
+                case of negative longint value }
+              d:=cardinal(d);
+              maxbits:=32;
+            end;
+        end;
+        { "The Logical (immediate) instructions accept a bitmask immediate value
+          that is a 32-bit pattern or a 64-bit pattern viewed as a vector of
+          identical elements of size e = 2, 4, 8, 16, 32 or, 64 bits. Each
+          element contains the same sub-pattern, that is a single run of
+          1 to (e - 1) nonzero bits from bit 0 followed by zero bits, then
+          rotated by 0 to (e - 1) bits." (ARMv8 ARM)
+
+          Rather than generating all possible patterns and checking whether they
+          match our constant, we check whether the lowest 2/4/8/... bits are
+          a valid pattern, and if so whether the constant consists of a
+          replication of this pattern. Such a valid pattern has the form of
+          either (regexp notation)
+            * 1+0+1*
+            * 0+1+0* }
+        patternlen:=2;
+        while patternlen<=maxbits do
+          begin
+            { try lowest <patternlen> bits of d as pattern }
+            if patternlen<>64 then
+              pattern:=qword(d) and ((qword(1) shl patternlen)-1)
+            else
+              pattern:=qword(d);
+            { valid pattern? If it contains too many 1<->0 transitions, larger
+              parts of d cannot be a valid pattern either }
+            rightmostone:=BsfQWord(pattern);
+            rightmostzero:=BsfQWord(not(pattern));
+            { pattern all ones or zeroes -> not a valid pattern (but larger ones
+              can still be valid, since we have too few transitions) }
+            if (rightmostone<patternlen) and
+               (rightmostzero<patternlen) then
+              begin
+                if rightmostone>rightmostzero then
+                  begin
+                    { we have .*1*0* -> check next zero position by shifting
+                      out the existing zeroes (shr rightmostone), inverting and
+                      then again looking for the rightmost one position }
+                    checkpattern:=not(pattern);
+                    checkbit:=rightmostone;
+                  end
+                else
+                  begin
+                    { same as above, but for .*0*1* }
+                    checkpattern:=pattern;
+                    checkbit:=rightmostzero;
+                  end;
+                secondrightmostbit:=BsfQWord(checkpattern shr checkbit)+checkbit;
+                { if this position is >= patternlen -> ok (1 transition),
+                  otherwise we now have 2 transitions and have to check for a
+                  third (if there is one, abort)
+
+                  bsf returns 255 if no 1 bit is found, so in that case it's
+                  also ok
+                  }
+                if secondrightmostbit<patternlen then
+                  begin
+                    secondrightmostbit:=BsfQWord(not(checkpattern) shr secondrightmostbit)+secondrightmostbit;
+                    if secondrightmostbit<patternlen then
+                      exit;
+                  end;
+                { ok, this is a valid pattern, now does d consist of a
+                  repetition of this pattern? }
+                replicatedlen:=patternlen;
+                checkpattern:=pattern;
+                while replicatedlen<maxbits do
+                  begin
+                    { douplicate current pattern }
+                    checkpattern:=checkpattern or (checkpattern shl replicatedlen);
+                    replicatedlen:=replicatedlen*2;
+                  end;
+                if qword(d)=checkpattern then
+                  begin
+                    { yes! }
+                    result:=true;
+                    exit;
+                  end;
+              end;
+            patternlen:=patternlen*2;
+          end;
+      end;
+
 end.

+ 17 - 2
compiler/aarch64/cpuinfo.pas

@@ -21,6 +21,9 @@ Interface
 
 Type
    bestreal = double;
+{$if FPC_FULLVERSION>20700}
+   bestrealrec = TDoubleRec;
+{$endif FPC_FULLVERSION>20700}
    ts32real = single;
    ts64real = double;
    ts80real = type extended;
@@ -47,6 +50,9 @@ Type
 
 
 Const
+   { Is there support for dealing with multiple microcontrollers available }
+   { for this platform? }
+   ControllerSupport = false; (* Not yet at least ;-) *)
    {# Size of native extended floating point type }
    extended_size = 8;
    {# Size of a multimedia register               }
@@ -54,6 +60,15 @@ Const
    { target cpu string (used by compiler options) }
    target_cpu_string = 'aarch64';
 
+   { We know that there are fields after sramsize
+     but we don't care about this warning }
+   {$PUSH}
+    {$WARN 3177 OFF}
+   embedded_controllers : array [tcontrollertype] of tcontrollerdatatype =
+   (
+      (controllertypestr:''; controllerunitstr:''; flashbase:0; flashsize:0; srambase:0; sramsize:0));
+   {$POP}
+
    { calling conventions supported by the code generator }
    supported_calling_conventions : tproccalloptions = [
      pocall_internproc,
@@ -88,12 +103,12 @@ Const
                                  { no need to write info about those }
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
                                  [cs_opt_regvar,cs_opt_loopunroll,cs_opt_tailrecursion,
-				  cs_opt_stackframe,cs_opt_nodecse,cs_opt_reorder_fields,cs_opt_fastmath];
+				  cs_opt_nodecse,cs_opt_reorder_fields,cs_opt_fastmath];
 
    level1optimizerswitches = genericlevel1optimizerswitches;
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches +
      [cs_opt_regvar,cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse];
-   level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [cs_opt_scheduler{,cs_opt_loopunroll}];
+   level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [{,cs_opt_loopunroll}];
    level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [];
 
 Implementation

+ 40 - 0
compiler/aarch64/cpunode.pas

@@ -0,0 +1,40 @@
+{******************************************************************************
+    Copyright (c) 2014 by Florian Klaempfl
+
+    Includes the aarch64 code generator
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ *****************************************************************************}
+unit cpunode;
+
+{$I fpcdefs.inc}
+
+interface
+{ This unit is used to define the specific CPU implementations. All needed
+actions are included in the INITALIZATION part of these units. This explains
+the behaviour of such a unit having just a USES clause! }
+
+implementation
+
+  uses
+    ncgbas,ncgflw,ncgcal,ncgcnv,ncgld,ncgmem,ncgcon,ncgset,ncgobjc,
+    ncpuadd,ncpumat,ncpumem,ncpuinl,ncpucnv,ncpuset,
+    { this not really a node }
+    rgcpu,
+    { symtable }
+    symcpu;
+
+end.

+ 457 - 521
compiler/aarch64/cpupara.pas

@@ -1,5 +1,5 @@
 {
-    Copyright (c) 2003-2012 by Florian Klaempfl and others
+    Copyright (c) 2013-2014 by Jonas Maebe, Florian Klaempfl and others
 
     AArch64 specific calling conventions
 
@@ -34,19 +34,24 @@ unit cpupara;
 
     type
        taarch64paramanager = class(tparamanager)
-          function get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;override;
-          function get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;override;
-          function get_volatile_registers_mm(calloption : tproccalloption):tcpuregisterset;override;
-          function push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;override;
-          function ret_in_param(def:tdef;pd:tabstractprocdef):boolean;override;
-          procedure getintparaloc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);override;
-          function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
-          function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
-          function get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
+          function get_volatile_registers_int(calloption: tproccalloption): tcpuregisterset; override;
+          function get_volatile_registers_fpu(calloption: tproccalloption): tcpuregisterset; override;
+          function get_volatile_registers_mm(calloption: tproccalloption): tcpuregisterset; override;
+          function push_addr_param(varspez: tvarspez; def: tdef; calloption: tproccalloption): boolean; override;
+          function ret_in_param(def: tdef; pd: tabstractprocdef):boolean;override;
+          function create_paraloc_info(p: tabstractprocdef; side: tcallercallee):longint;override;
+          function create_varargs_paraloc_info(p: tabstractprocdef; varargspara: tvarargsparalist):longint;override;
+          function get_funcretloc(p: tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
+          function param_use_paraloc(const cgpara: tcgpara): boolean; override;
          private
-          procedure init_values(var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword; var sparesinglereg: tregister);
-          function create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
-            var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword; var sparesinglereg: tregister; isvariadic: boolean):longint;
+          curintreg,
+          curmmreg: tsuperregister;
+          curstackoffset: aword;
+
+          procedure init_para_alloc_values;
+          procedure alloc_para(out result: tcgpara; p: tabstractprocdef; varspez: tvarspez; side: tcallercallee; paradef: tdef; isvariadic, isdelphinestedcc: boolean);
+
+          procedure create_paraloc_info_intern(p: tabstractprocdef; side: tcallercallee; paras: tparalist; isvariadic: boolean);
        end;
 
   implementation
@@ -56,6 +61,13 @@ unit cpupara;
        rgobj,
        defutil,symsym,symtable;
 
+    const
+      RS_FIRST_INT_PARAM_SUPREG = RS_X0;
+      RS_LAST_INT_PARAM_SUPREG = RS_X7;
+      { Q0/D0/S0/H0/B0 all have the same superregister number }
+      RS_FIRST_MM_PARAM_SUPREG = RS_D0;
+      RS_LAST_MM_PARAM_SUPREG = RS_D7;
+
 
     function taarch64paramanager.get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;
       begin
@@ -75,47 +87,90 @@ unit cpupara;
       end;
 
 
-    procedure taarch64paramanager.getintparaloc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);
+    function is_hfa_internal(p: tdef; var basedef: tdef; var elecount: longint): boolean;
       var
-        paraloc : pcgparalocation;
-        def : tdef;
+        i: longint;
+        sym: tsym;
+        tmpelecount: longint;
       begin
-        if nr<1 then
-          internalerror(2002070801);
-        def:=tparavarsym(pd.paras[nr-1]).vardef;
-        cgpara.reset;
-        cgpara.size:=def_cgsize(def);
-        cgpara.intsize:=tcgsize2size[cgpara.size];
-        cgpara.alignment:=std_param_align;
-        cgpara.def:=def;
-        paraloc:=cgpara.add_location;
-        with paraloc^ do
-          begin
-            size:=OS_INT;
-            { the four first parameters are passed into registers }
-            if nr<=8 then
-              begin
-                loc:=LOC_REGISTER;
-                register:=newreg(R_INTREGISTER,RS_X0+nr-1,R_SUBWHOLE);
-              end
-            else
-              begin
-                { the other parameters are passed on the stack }
-                loc:=LOC_REFERENCE;
-                reference.index:=NR_STACK_POINTER_REG;
-                reference.offset:=(nr-9)*8;
-              end;
-          end;
+        result:=false;
+        case p.typ of
+          arraydef:
+            begin
+              if is_special_array(p) then
+                exit;
+              case tarraydef(p).elementdef.typ of
+                floatdef:
+                  begin
+                    { an array of empty records has no influence }
+                    if tarraydef(p).elementdef.size=0 then
+                      begin
+                        result:=true;
+                        exit
+                      end;
+                    tmpelecount:=0;
+                    if not is_hfa_internal(tarraydef(p).elementdef,basedef,tmpelecount) then
+                      exit;
+                    { tmpelecount now contains the number of hfa elements in a
+                      single array element (e.g. 2 if it's an array of a record
+                      containing two singles) -> multiply by number of elements
+                      in the array }
+                    inc(elecount,tarraydef(p).elecount*tmpelecount);
+                    if elecount>4 then
+                      exit;
+                  end;
+                else
+                  result:=is_hfa_internal(tarraydef(p).elementdef,basedef,elecount);
+                end;
+            end;
+          floatdef:
+            begin
+              if not assigned(basedef) then
+                basedef:=p
+              else if basedef<>p then
+                exit;
+              inc(elecount);
+              result:=true;
+            end;
+          recorddef:
+            begin
+              for i:=0 to tabstractrecorddef(p).symtable.symlist.count-1 do
+                begin
+                  sym:=tsym(tabstractrecorddef(p).symtable.symlist[i]);
+                  if sym.typ<>fieldvarsym then
+                    continue;
+                  if not is_hfa_internal(tfieldvarsym(sym).vardef,basedef,elecount) then
+                    exit
+                end;
+              result:=true;
+            end;
+          else
+            exit
+        end;
       end;
 
 
-    function Is_HFA(p : tdef) : boolean;
+    { Returns whether a def is a "homogeneous float array" at the machine level.
+      This means that in the memory layout, the def only consists of maximally
+      4 floating point values that appear consecutively in memory }
+    function is_hfa(p: tdef; out basedef: tdef) : boolean;
+      var
+        elecount: longint;
       begin
         result:=false;
+        basedef:=nil;
+        elecount:=0;
+        result:=is_hfa_internal(p,basedef,elecount);
+        result:=
+          result and
+          (elecount>0) and
+          (p.size=basedef.size*elecount)
       end;
 
 
-    function getparaloc(calloption : tproccalloption; p : tdef; isvariadic: boolean) : tcgloc;
+    function getparaloc(calloption: tproccalloption; p: tdef): tcgloc;
+      var
+        hfabasedef: tdef;
       begin
          { Later, the LOC_REFERENCE is in most cases changed into LOC_REGISTER
            if push_addr_param for the def is true
@@ -124,7 +179,7 @@ unit cpupara;
             orddef:
               getparaloc:=LOC_REGISTER;
             floatdef:
-              getparaloc:=LOC_MMREGISTER
+              getparaloc:=LOC_MMREGISTER;
             enumdef:
               getparaloc:=LOC_REGISTER;
             pointerdef:
@@ -134,7 +189,10 @@ unit cpupara;
             classrefdef:
               getparaloc:=LOC_REGISTER;
             recorddef:
-              getparaloc:=LOC_REGISTER;
+              if not is_hfa(p,hfabasedef) then
+                getparaloc:=LOC_REGISTER
+              else
+                getparaloc:=LOC_MMREGISTER;
             objectdef:
               getparaloc:=LOC_REGISTER;
             stringdef:
@@ -147,12 +205,12 @@ unit cpupara;
             filedef:
               getparaloc:=LOC_REGISTER;
             arraydef:
-              getparaloc:=LOC_REFERENCE;
-            setdef:
-              if is_smallset(p) then
+              if not is_hfa(p,hfabasedef) then
                 getparaloc:=LOC_REGISTER
               else
-                getparaloc:=LOC_REFERENCE;
+                getparaloc:=LOC_MMREGISTER;
+            setdef:
+              getparaloc:=LOC_REGISTER;
             variantdef:
               getparaloc:=LOC_REGISTER;
             { avoid problems with errornous definitions }
@@ -164,7 +222,9 @@ unit cpupara;
       end;
 
 
-    function taarch64paramanager.push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;
+    function taarch64paramanager.push_addr_param(varspez: tvarspez; def :tdef; calloption: tproccalloption): boolean;
+      var
+        hfabasedef: tdef;
       begin
         result:=false;
         if varspez in [vs_var,vs_out,vs_constref] then
@@ -174,19 +234,36 @@ unit cpupara;
           end;
         case def.typ of
           objectdef:
-            result:=not(Is_HFA(def) and (is_object(def) and ((varspez=vs_const) or (def.size=0));
+            result:=is_object(def);
           recorddef:
-            { note: should this ever be changed, make sure that const records
-                are always passed by reference for calloption=pocall_mwpascal }
-            result:=(varspez=vs_const) or (def.size=0);
+            { ABI: any composite > 16 bytes that not a hfa/hva
+              Special case: MWPascal, which passes all const parameters by
+                reference for compatibility reasons
+            }
+            result:=
+              ((varspez=vs_const) and
+               (calloption=pocall_mwpascal)) or
+              (not is_hfa(def,hfabasedef) and
+               (def.size>16));
           variantdef,
           formaldef:
             result:=true;
+          { arrays are composites and hence treated the same as records by the
+            ABI (watch out for C, where an array is a pointer)
+            Also: all other platforms pass const arrays by reference. Do the
+              same here, because there is too much hacky code out there that
+              relies on this ("array[0..0] of x" passed as const parameter and
+              then indexed beyond its bounds) }
           arraydef:
-            result:=(tarraydef(def).highrange>=tarraydef(def).lowrange) or
-                             is_open_array(def) or
-                             is_array_of_const(def) or
-                             is_array_constructor(def);
+            result:=
+              (calloption in cdecl_pocalls) or
+              is_open_array(def) or
+              is_array_of_const(def) or
+              is_array_constructor(def) or
+              ((tarraydef(def).highrange>=tarraydef(def).lowrange) and
+               ((varspez=vs_const) or
+                (not is_hfa(def,hfabasedef) and
+                 (def.size>16))));
           setdef :
             result:=def.size>16;
           stringdef :
@@ -195,511 +272,370 @@ unit cpupara;
       end;
 
 
-    function taarch64paramanager.ret_in_param(def:tdef;pd:tabstractprocdef):boolean;
-      var
-        i: longint;
-        sym: tsym;
-        fpufield: boolean;
+    function taarch64paramanager.ret_in_param(def: tdef; pd: tabstractprocdef): boolean;
       begin
         if handle_common_ret_in_param(def,pd,result) then
           exit;
-        case def.typ of
-          recorddef:
-            begin
-              result:=def.size>4;
-              if not result and
-                 (target_info.abi in [abi_default,abi_armeb]) then
-                begin
-                  { in case of the old ARM abi (APCS), a struct is returned in
-                    a register only if it is simple. And what is a (non-)simple
-                    struct:
-
-                    "A non-simple type is any non-floating-point type of size
-                     greater than one word (including structures containing only
-                     floating-point fields), and certain single-word structured
-                     types."
-                       (-- ARM APCS documentation)
-
-                    So only floating point types or more than one word ->
-                    definitely non-simple (more than one word is already
-                    checked above). This includes unions/variant records with
-                    overlaid floating point and integer fields.
-
-                    Smaller than one word struct types are simple if they are
-                    "integer-like", and:
-
-                    "A structure is termed integer-like if its size is less than
-                    or equal to one word, and the offset of each of its
-                    addressable subfields is zero."
-                      (-- ARM APCS documentation)
-
-                    An "addressable subfield" is a field of which you can take
-                    the address, which in practive means any non-bitfield.
-                    In Pascal, there is no way to express the difference that
-                    you can have in C between "char" and "int :8". In this
-                    context, we use the fake distinction that a type defined
-                    inside the record itself (such as "a: 0..255;") indicates
-                    a bitpacked field while a field using a different type
-                    (such as "a: byte;") is not.
-                  }
-                  for i:=0 to trecorddef(def).symtable.SymList.count-1 do
-                    begin
-                      sym:=tsym(trecorddef(def).symtable.SymList[i]);
-                      if sym.typ<>fieldvarsym then
-                        continue;
-                      { bitfield -> ignore }
-                      if (trecordsymtable(trecorddef(def).symtable).usefieldalignment=bit_alignment) and
-                         (tfieldvarsym(sym).vardef.typ in [orddef,enumdef]) and
-                         (tfieldvarsym(sym).vardef.owner.defowner=def) then
-                        continue;
-                      { all other fields must be at offset zero }
-                      if tfieldvarsym(sym).fieldoffset<>0 then
-                        begin
-                          result:=true;
-                          exit;
-                        end;
-                      { floating point field -> also by reference }
-                      if tfieldvarsym(sym).vardef.typ=floatdef then
-                        begin
-                          result:=true;
-                          exit;
-                        end;
-                    end;
-                end;
-            end;
-          procvardef:
-            if not tprocvardef(def).is_addressonly then
-              result:=true
-            else
-              result:=false
-          else
-            result:=inherited ret_in_param(def,pd);
-        end;
-      end;
-
-
-    procedure taarch64paramanager.init_values(var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword; var sparesinglereg: tregister);
-      begin
-        curintreg:=RS_R0;
-        curfloatreg:=RS_F0;
-        curmmreg:=RS_D0;
-        cur_stack_offset:=0;
-        sparesinglereg := NR_NO;
+        { ABI: if the parameter would be passed in registers, it is returned
+            in those registers; otherwise, it's returned by reference }
+        result:=push_addr_param(vs_value,def,pd.proccalloption);
       end;
 
 
-    function taarch64paramanager.create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
-        var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword; var sparesinglereg: tregister; isvariadic: boolean):longint;
-
+    procedure taarch64paramanager.create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist; isvariadic: boolean);
       var
-        nextintreg,nextfloatreg,nextmmreg : tsuperregister;
-        paradef : tdef;
-        paraloc : pcgparalocation;
-        stack_offset : aword;
-        hp : tparavarsym;
-        loc : tcgloc;
-        paracgsize   : tcgsize;
-        paralen : longint;
-        i : integer;
-        firstparaloc: boolean;
-
-      procedure assignintreg;
-        begin
-          { In case of po_delphi_nested_cc, the parent frame pointer
-            is always passed on the stack. }
-           if (nextintreg<=RS_R3) and
-              (not(vo_is_parentfp in hp.varoptions) or
-               not(po_delphi_nested_cc in p.procoptions)) then
-             begin
-               paraloc^.loc:=LOC_REGISTER;
-               paraloc^.register:=newreg(R_INTREGISTER,nextintreg,R_SUBWHOLE);
-               inc(nextintreg);
-             end
-           else
-             begin
-               paraloc^.loc:=LOC_REFERENCE;
-               paraloc^.reference.index:=NR_STACK_POINTER_REG;
-               paraloc^.reference.offset:=stack_offset;
-               inc(stack_offset,4);
-            end;
-        end;
-
-
+        hp: tparavarsym;
+        i: longint;
       begin
-        result:=0;
-        nextintreg:=curintreg;
-        nextfloatreg:=curfloatreg;
-        nextmmreg:=curmmreg;
-        stack_offset:=cur_stack_offset;
-
         for i:=0 to paras.count-1 do
           begin
             hp:=tparavarsym(paras[i]);
-            paradef:=hp.vardef;
-
-            hp.paraloc[side].reset;
-
-            { currently only support C-style array of const,
-              there should be no location assigned to the vararg array itself }
-            if (p.proccalloption in cstylearrayofconst) and
-               is_array_of_const(paradef) then
-              begin
-                paraloc:=hp.paraloc[side].add_location;
-                { hack: the paraloc must be valid, but is not actually used }
-                paraloc^.loc:=LOC_REGISTER;
-                paraloc^.register:=NR_R0;
-                paraloc^.size:=OS_ADDR;
-                break;
-              end;
-
-            if push_addr_param(hp.varspez,paradef,p.proccalloption) then
+            { hidden function result parameter is passed in X8 (doesn't have to
+              be valid on return) according to the ABI
+
+              -- don't follow the ABI for managed types, because
+               a) they are passed in registers as parameters, so we should also
+                  return them in a register to be ABI-compliant (which we can't
+                  because the entire compiler is built around the idea that
+                  they are returned by reference, for ref-counting performance
+                  and Delphi-compatibility reasons)
+               b) there are hacks in the system unit that expect that you can
+                  call
+                    function f: com_interface;
+                  as
+                    procedure p(out o: obj);
+                  That can only work in case we do not use x8 to return them
+                  from the function, but the regular first parameter register.
+
+              As the ABI says this behaviour is ok for C++ classes with a
+              non-trivial copy constructor or destructor, it seems reasonable
+              for us to do this for managed types as well.}
+            if (vo_is_funcret in hp.varoptions) and
+               not is_managed_type(hp.vardef) then
               begin
-                paradef:=getpointerdef(paradef);
-                loc:=LOC_REGISTER;
-                paracgsize := OS_ADDR;
-                paralen := tcgsize2size[OS_ADDR];
-              end
-            else
-              begin
-                if not is_special_array(paradef) then
-                  paralen := paradef.size
-                else
-                  paralen := tcgsize2size[def_cgsize(paradef)];
-                loc := getparaloc(p.proccalloption,paradef,isvariadic);
-                if (paradef.typ in [objectdef,arraydef,recorddef]) and
-                  not is_special_array(paradef) and
-                  (hp.varspez in [vs_value,vs_const]) then
-                  paracgsize := int_cgsize(paralen)
-                else
+                hp.paraloc[side].reset;
+                hp.paraloc[side].size:=OS_ADDR;
+                hp.paraloc[side].alignment:=voidpointertype.alignment;
+                hp.paraloc[side].intsize:=voidpointertype.size;
+                hp.paraloc[side].def:=getpointerdef(hp.vardef);
+                with hp.paraloc[side].add_location^ do
                   begin
-                    paracgsize:=def_cgsize(paradef);
-                    { for things like formaldef }
-                    if (paracgsize=OS_NO) then
-                      begin
-                        paracgsize:=OS_ADDR;
-                        paralen:=tcgsize2size[OS_ADDR];
-                        paradef:=voidpointertype;
-                      end;
+                    size:=OS_ADDR;
+                    def:=hp.paraloc[side].def;
+                    loc:=LOC_REGISTER;
+                    register:=NR_X8;
                   end
-              end;
-
-             hp.paraloc[side].size:=paracgsize;
-             hp.paraloc[side].Alignment:=std_param_align;
-             hp.paraloc[side].intsize:=paralen;
-             hp.paraloc[side].def:=paradef;
-             firstparaloc:=true;
-
-{$ifdef EXTDEBUG}
-             if paralen=0 then
-               internalerror(200410311);
-{$endif EXTDEBUG}
-             while paralen>0 do
-               begin
-                 paraloc:=hp.paraloc[side].add_location;
-
-                 if (loc=LOC_REGISTER) and (paracgsize in [OS_F32,OS_F64,OS_F80]) then
-                   case paracgsize of
-                     OS_F32:
-                       paraloc^.size:=OS_32;
-                     OS_F64:
-                       paraloc^.size:=OS_32;
-                     else
-                       internalerror(2005082901);
-                   end
-                 else if (paracgsize in [OS_NO,OS_64,OS_S64]) then
-                   paraloc^.size := OS_32
-                 else
-                   paraloc^.size:=paracgsize;
-                 case loc of
-                    LOC_REGISTER:
-                      begin
-                        { align registers for eabi }
-                        if (target_info.abi in [abi_eabi,abi_eabihf]) and
-                           firstparaloc and
-                           (paradef.alignment=8) then
-                          begin
-                            if (nextintreg in [RS_R1,RS_R3]) then
-                              inc(nextintreg)
-                            else if nextintreg>RS_R3 then
-                              stack_offset:=align(stack_offset,8);
-                          end;
-                        { this is not abi compliant
-                          why? (FK) }
-                        if nextintreg<=RS_R3 then
-                          begin
-                            paraloc^.loc:=LOC_REGISTER;
-                            paraloc^.register:=newreg(R_INTREGISTER,nextintreg,R_SUBWHOLE);
-                            inc(nextintreg);
-                          end
-                        else
-                          begin
-                            { LOC_REFERENCE always contains everything that's left }
-                            paraloc^.loc:=LOC_REFERENCE;
-                            paraloc^.size:=int_cgsize(paralen);
-                            if (side=callerside) then
-                              paraloc^.reference.index:=NR_STACK_POINTER_REG;
-                            paraloc^.reference.offset:=stack_offset;
-                            inc(stack_offset,align(paralen,4));
-                            paralen:=0;
-                         end;
-                      end;
-                    LOC_FPUREGISTER:
-                      begin
-                        if nextfloatreg<=RS_F3 then
-                          begin
-                            paraloc^.loc:=LOC_FPUREGISTER;
-                            paraloc^.register:=newreg(R_FPUREGISTER,nextfloatreg,R_SUBWHOLE);
-                            inc(nextfloatreg);
-                          end
-                        else
-                          begin
-                            paraloc^.loc:=LOC_REFERENCE;
-                            paraloc^.reference.index:=NR_STACK_POINTER_REG;
-                            paraloc^.reference.offset:=stack_offset;
-                            case paraloc^.size of
-                              OS_F32:
-                                inc(stack_offset,4);
-                              OS_F64:
-                                inc(stack_offset,8);
-                              OS_F80:
-                                inc(stack_offset,10);
-                              OS_F128:
-                                inc(stack_offset,16);
-                              else
-                                internalerror(200403201);
-                            end;
-                          end;
-                      end;
-                    LOC_MMREGISTER:
-                      begin
-                        if (nextmmreg<=RS_D7) or
-                           ((paraloc^.size = OS_F32) and
-                            (sparesinglereg<>NR_NO)) then
-                          begin
-                            paraloc^.loc:=LOC_MMREGISTER;
-                            case paraloc^.size of
-                              OS_F32:
-                                if sparesinglereg = NR_NO then 
-                                  begin     
-                                    paraloc^.register:=newreg(R_MMREGISTER,nextmmreg,R_SUBFS);
-                                    sparesinglereg:=newreg(R_MMREGISTER,nextmmreg-RS_S0+RS_S1,R_SUBFS);
-                                    inc(nextmmreg);
-                                  end
-                                else
-                                  begin
-                                    paraloc^.register:=sparesinglereg;
-                                    sparesinglereg := NR_NO;
-                                  end;
-                              OS_F64:
-                                begin
-                                  paraloc^.register:=newreg(R_MMREGISTER,nextmmreg,R_SUBFD);
-                                  inc(nextmmreg);
-                                end;
-                              else
-                                internalerror(2012031601);
-                            end;
-                          end
-                        else
-                          begin
-                            { once a floating point parameters has been placed
-                            on the stack we must not pass any more in vfp regs
-                            even if there is a single precision register still
-                            free}
-                            sparesinglereg := NR_NO;
-                            { LOC_REFERENCE always contains everything that's left }
-                            paraloc^.loc:=LOC_REFERENCE;
-                            paraloc^.size:=int_cgsize(paralen);
-                            if (side=callerside) then
-                              paraloc^.reference.index:=NR_STACK_POINTER_REG;
-                            paraloc^.reference.offset:=stack_offset;
-                            inc(stack_offset,align(paralen,4));
-                            paralen:=0;
-                         end;
-                      end;
-                    LOC_REFERENCE:
-                      begin
-                        if push_addr_param(hp.varspez,paradef,p.proccalloption) then
-                          begin
-                            paraloc^.size:=OS_ADDR;
-                            assignintreg
-                          end
-                        else
-                          begin
-                            { align stack for eabi }
-                            if (target_info.abi in [abi_eabi,abi_eabihf]) and
-                               firstparaloc and
-                               (paradef.alignment=8) then
-                              stack_offset:=align(stack_offset,8);
-
-                             paraloc^.size:=paracgsize;
-                             paraloc^.loc:=LOC_REFERENCE;
-                             paraloc^.reference.index:=NR_STACK_POINTER_REG;
-                             paraloc^.reference.offset:=stack_offset;
-                             inc(stack_offset,align(paralen,4));
-                             paralen:=0
-                          end;
-                      end;
-                    else
-                      internalerror(2002071002);
-                 end;
-                 if side=calleeside then
-                   begin
-                     if paraloc^.loc=LOC_REFERENCE then
-                       begin
-                         paraloc^.reference.index:=NR_FRAME_POINTER_REG;
-                         { on non-Darwin, the framepointer contains the value
-                           of the stack pointer on entry. On Darwin, the
-                           framepointer points to the previously saved
-                           framepointer (which is followed only by the saved
-                           return address -> framepointer + 4 = stack pointer
-                           on entry }
-                         if not(target_info.system in systems_darwin) then
-                           inc(paraloc^.reference.offset,4)
-                         else
-                           inc(paraloc^.reference.offset,8);
-                       end;
-                   end;
-                 dec(paralen,tcgsize2size[paraloc^.size]);
-                 firstparaloc:=false
-               end;
+              end
+            else
+              alloc_para(hp.paraloc[side],p,hp.varspez,side,hp.vardef,isvariadic,
+                (vo_is_parentfp in hp.varoptions) and
+                (po_delphi_nested_cc in p.procoptions));
           end;
-        curintreg:=nextintreg;
-        curfloatreg:=nextfloatreg;
-        curmmreg:=nextmmreg;
-        cur_stack_offset:=stack_offset;
-        result:=cur_stack_offset;
       end;
 
 
     function  taarch64paramanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;
       var
-        paraloc : pcgparalocation;
-        retcgsize  : tcgsize;
+        retcgsize: tcgsize;
       begin
          if set_common_funcretloc_info(p,forcetempdef,retcgsize,result) then
            exit;
 
-        paraloc:=result.add_location;
-        { Return in FPU register? }
-        if result.def.typ=floatdef then
+         { in this case, it must be returned in registers as if it were passed
+           as the first parameter }
+         init_para_alloc_values;
+         alloc_para(result,p,vs_value,side,result.def,false,false);
+         { sanity check (LOC_VOID for empty records) }
+         if not assigned(result.location) or
+            not(result.location^.loc in [LOC_REGISTER,LOC_MMREGISTER,LOC_VOID]) then
+           internalerror(2014113001);
+      end;
+
+    function taarch64paramanager.param_use_paraloc(const cgpara: tcgpara): boolean;
+      begin
+        { we always set up a stack frame -> we can always access the parameters
+          this way }
+        result:=
+          (cgpara.location^.loc=LOC_REFERENCE) and
+          not assigned(cgpara.location^.next);
+      end;
+
+
+    procedure taarch64paramanager.init_para_alloc_values;
+      begin
+        curintreg:=RS_FIRST_INT_PARAM_SUPREG;
+        curmmreg:=RS_FIRST_MM_PARAM_SUPREG;
+        curstackoffset:=0;
+      end;
+
+
+    procedure taarch64paramanager.alloc_para(out result: tcgpara; p: tabstractprocdef; varspez: tvarspez; side: tcallercallee; paradef: tdef; isvariadic, isdelphinestedcc: boolean);
+      var
+        hfabasedef, locdef: tdef;
+        paraloc: pcgparalocation;
+        paralen, stackslotlen: asizeint;
+        loc: tcgloc;
+        paracgsize, locsize: tcgsize;
+        firstparaloc: boolean;
+      begin
+        result.reset;
+
+        { currently only support C-style array of const,
+          there should be no location assigned to the vararg array itself }
+        if (p.proccalloption in cstylearrayofconst) and
+           is_array_of_const(paradef) then
           begin
-            if target_info.abi = abi_eabihf then 
-              begin
-                paraloc^.loc:=LOC_MMREGISTER;
-                case retcgsize of
-                  OS_64,
-                  OS_F64:
-                    begin
-                      paraloc^.register:=NR_MM_RESULT_REG;
-                    end;
-                  OS_32,
-                  OS_F32:
-                    begin
-                      paraloc^.register:=NR_S0;
-                    end;
-                  else
-                    internalerror(2012032501);
-                end;
-                paraloc^.size:=retcgsize;
-              end
-            else if (p.proccalloption in [pocall_softfloat]) or
-               (cs_fp_emulation in current_settings.moduleswitches) or
-               (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16,fpu_fpv4_s16]) then
-              begin
-                case retcgsize of
-                  OS_64,
-                  OS_F64:
-                    begin
-                      paraloc^.loc:=LOC_REGISTER;
-                      if target_info.endian = endian_big then
-                        paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG
-                      else
-                        paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG;
-                      paraloc^.size:=OS_32;
-                      paraloc:=result.add_location;
-                      paraloc^.loc:=LOC_REGISTER;
-                      if target_info.endian = endian_big then
-                        paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG
-                      else
-                        paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG;
-                      paraloc^.size:=OS_32;
-                    end;
-                  OS_32,
-                  OS_F32:
-                    begin
-                      paraloc^.loc:=LOC_REGISTER;
-                      paraloc^.register:=NR_FUNCTION_RETURN_REG;
-                      paraloc^.size:=OS_32;
-                    end;
-                  else
-                    internalerror(2005082603);
-                end;
-              end
-            else
-              begin
-                paraloc^.loc:=LOC_FPUREGISTER;
-                paraloc^.register:=NR_FPU_RESULT_REG;
-                paraloc^.size:=retcgsize;
-              end;
+            paraloc:=result.add_location;
+            { hack: the paraloc must be valid, but is not actually used }
+            paraloc^.loc:=LOC_REGISTER;
+            paraloc^.register:=NR_X0;
+            paraloc^.size:=OS_ADDR;
+            exit;
+          end;
+
+        if push_addr_param(varspez,paradef,p.proccalloption) then
+          begin
+            paradef:=getpointerdef(paradef);
+            loc:=LOC_REGISTER;
+            paracgsize:=OS_ADDR;
+            paralen:=tcgsize2size[OS_ADDR];
           end
-          { Return in register }
         else
           begin
-            if retcgsize in [OS_64,OS_S64] then
-              begin
-                paraloc^.loc:=LOC_REGISTER;
-                if target_info.endian = endian_big then
-                  paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG
-                else
-                  paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG;
-                paraloc^.size:=OS_32;
-                paraloc:=result.add_location;
-                paraloc^.loc:=LOC_REGISTER;
-                if target_info.endian = endian_big then
-                  paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG
-                else
-                  paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG;
-                paraloc^.size:=OS_32;
-              end
+            if not is_special_array(paradef) then
+              paralen:=paradef.size
+            else
+              paralen:=tcgsize2size[def_cgsize(paradef)];
+            loc:=getparaloc(p.proccalloption,paradef);
+            if (paradef.typ in [objectdef,arraydef,recorddef]) and
+               not is_special_array(paradef) and
+               (varspez in [vs_value,vs_const]) then
+              paracgsize:=int_cgsize(paralen)
             else
               begin
-                paraloc^.loc:=LOC_REGISTER;
-                paraloc^.register:=NR_FUNCTION_RETURN_REG;
-                if (result.intsize<>3) then
-                  paraloc^.size:=retcgsize
-                else
-                  paraloc^.size:=OS_32;
-              end;
+                paracgsize:=def_cgsize(paradef);
+                { for things like formaldef }
+                if paracgsize=OS_NO then
+                  begin
+                    paracgsize:=OS_ADDR;
+                    paralen:=tcgsize2size[OS_ADDR];
+                    paradef:=voidpointertype;
+                  end;
+              end
           end;
+
+          { get hfa basedef if applicable }
+          if not is_hfa(paradef,hfabasedef) then
+            hfabasedef:=nil;
+
+         result.size:=paracgsize;
+         result.alignment:=std_param_align;
+         result.intsize:=paralen;
+         result.def:=paradef;
+
+         { empty record: skipped (explicitly defined by Apple ABI, undefined
+           by general ABI; libffi also skips them in all cases) }
+         if not is_special_array(paradef) and
+            (paradef.size=0) then
+           begin
+             paraloc:=result.add_location;
+             paraloc^.loc:=LOC_VOID;
+             paraloc^.def:=paradef;
+             paraloc^.size:=OS_NO;
+             exit;
+           end;
+
+         { sufficient registers left? }
+         case loc of
+           LOC_REGISTER:
+             begin
+               { In case of po_delphi_nested_cc, the parent frame pointer
+                 is always passed on the stack. }
+               if isdelphinestedcc then
+                 loc:=LOC_REFERENCE
+               else if curintreg+((paralen-1) shr 3)>RS_LAST_INT_PARAM_SUPREG then
+                 begin
+                   { not enough integer registers left -> no more register
+                     parameters, copy all to stack
+                   }
+                   curintreg:=succ(RS_LAST_INT_PARAM_SUPREG);
+                   loc:=LOC_REFERENCE;
+                 end;
+             end;
+           LOC_MMREGISTER:
+             begin;
+               { every hfa element must be passed in a separate register }
+               if (assigned(hfabasedef) and
+                   (curmmreg+(paralen div hfabasedef.size)>RS_LAST_MM_PARAM_SUPREG)) or
+                  (curmmreg+((paralen-1) shr 3)>RS_LAST_MM_PARAM_SUPREG) then
+                 begin
+                   { not enough mm registers left -> no more register
+                     parameters, copy all to stack
+                   }
+                   curmmreg:=succ(RS_LAST_MM_PARAM_SUPREG);
+                   loc:=LOC_REFERENCE;
+                 end;
+             end;
+         end;
+
+         { allocate registers/stack locations }
+         firstparaloc:=true;
+         repeat
+           paraloc:=result.add_location;
+
+           { set paraloc size/def }
+           if assigned(hfabasedef) then
+             begin
+               locsize:=def_cgsize(hfabasedef);
+               locdef:=hfabasedef;
+             end
+           { make sure we don't lose whether or not the type is signed }
+           else if (loc=LOC_REGISTER) and
+                   (paradef.typ<>orddef) then
+             begin
+               locsize:=int_cgsize(paralen);
+               locdef:=get_paraloc_def(paradef,paralen,firstparaloc);
+             end
+           else
+             begin
+               locsize:=paracgsize;
+               locdef:=paradef;
+             end;
+           if locsize in [OS_NO,OS_128,OS_S128] then
+             begin
+               if paralen>4 then
+                 begin
+                   paraloc^.size:=OS_INT;
+                   paraloc^.def:=u64inttype;
+                 end
+               else
+                 begin
+                   { for 3-byte records }
+                   paraloc^.size:=OS_32;
+                   paraloc^.def:=u32inttype;
+                 end;
+             end
+           else
+             begin
+               paraloc^.size:=locsize;
+               paraloc^.def:=locdef;
+             end;
+
+           { paraloc loc }
+           paraloc^.loc:=loc;
+
+           { assign register/stack address }
+           case loc of
+             LOC_REGISTER:
+               begin
+                 paraloc^.register:=newreg(R_INTREGISTER,curintreg,cgsize2subreg(R_INTREGISTER,paraloc^.size));
+                 inc(curintreg);
+                 dec(paralen,tcgsize2size[paraloc^.size]);
+
+                 { "The general ABI specifies that it is the callee's
+                    responsibility to sign or zero-extend arguments having fewer
+                    than 32 bits, and that unused bits in a register are
+                    unspecified. In iOS, however, the caller must perform such
+                    extensions, up to 32 bits." }
+                 if (target_info.abi=abi_aarch64_darwin) and
+                    (side=callerside) and
+                    is_ordinal(paradef) and
+                    (paradef.size<4) then
+                   paraloc^.size:=OS_32;
+
+                 { in case it's a composite, "The argument is passed as though
+                   it had been loaded into the registers from a double-word-
+                   aligned address with an appropriate sequence of LDR
+                   instructions loading consecutive registers from memory" ->
+                   in case of big endian, values in not completely filled
+                   registers must be shifted to the top bits }
+                 if (target_info.endian=endian_big) and
+                    not(paraloc^.size in [OS_64,OS_S64]) and
+                    (paradef.typ in [setdef,recorddef,arraydef,objectdef]) then
+                   paraloc^.shiftval:=-(8-tcgsize2size[paraloc^.size]);
+               end;
+             LOC_MMREGISTER:
+               begin
+                 paraloc^.register:=newreg(R_MMREGISTER,curmmreg,cgsize2subreg(R_MMREGISTER,paraloc^.size));
+                 inc(curmmreg);
+                 dec(paralen,tcgsize2size[paraloc^.size]);
+               end;
+             LOC_REFERENCE:
+               begin
+                  paraloc^.size:=paracgsize;
+                  paraloc^.loc:=LOC_REFERENCE;
+
+                  { the current stack offset may not be properly aligned in
+                    case we're on Darwin have allocated a non-variadic argument
+                    < 8 bytes previously }
+                  if target_info.abi=abi_aarch64_darwin then
+                    curstackoffset:=align(curstackoffset,paraloc^.def.alignment);
+
+                  { on Darwin, non-variadic arguments take up their actual size
+                    on the stack; on other platforms, they take up a multiple of
+                    8 bytes }
+                  if (target_info.abi=abi_aarch64_darwin) and
+                     not isvariadic then
+                    stackslotlen:=paralen
+                  else
+                    stackslotlen:=align(paralen,8);
+
+                  { from the ABI: if arguments occupy partial stack space, they
+                    have to occupy the lowest significant bits of a register
+                    containing that value which is then stored to memory ->
+                    in case of big endian, skip the alignment bytes (if any) }
+                  if target_info.endian=endian_little then
+                    paraloc^.reference.offset:=curstackoffset
+                  else
+                    paraloc^.reference.offset:=curstackoffset+stackslotlen-paralen;
+                  if side=callerside then
+                    paraloc^.reference.index:=NR_STACK_POINTER_REG
+                  else
+                    begin
+                      paraloc^.reference.index:=NR_FRAME_POINTER_REG;
+                      inc(paraloc^.reference.offset,16);
+                    end;
+                  inc(curstackoffset,stackslotlen);
+                  paralen:=0
+               end;
+             else
+               internalerror(2002071002);
+           end;
+         firstparaloc:=false;
+         { <=0 for sign/zero-extended locations }
+         until paralen<=0;
       end;
 
 
-    function taarch64paramanager.create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;
-      var
-        cur_stack_offset: aword;
-        curintreg, curfloatreg, curmmreg: tsuperregister;
-        sparesinglereg:tregister;
+    function taarch64paramanager.create_paraloc_info(p: tabstractprocdef; side: tcallercallee):longint;
       begin
-        init_values(curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg);
+        init_para_alloc_values;
 
-        result:=create_paraloc_info_intern(p,side,p.paras,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg,false);
+        create_paraloc_info_intern(p,side,p.paras,false);
+        result:=curstackoffset;
 
         create_funcretloc_info(p,side);
      end;
 
 
-    function taarch64paramanager.create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;
-      var
-        cur_stack_offset: aword;
-        curintreg, curfloatreg, curmmreg: tsuperregister;
-        sparesinglereg:tregister;
+    function taarch64paramanager.create_varargs_paraloc_info(p: tabstractprocdef; varargspara: tvarargsparalist):longint;
       begin
-        init_values(curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg);
+        init_para_alloc_values;
 
-        result:=create_paraloc_info_intern(p,callerside,p.paras,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg,true);
-        if (p.proccalloption in cstylearrayofconst) then
-          { just continue loading the parameters in the registers }
-          result:=create_paraloc_info_intern(p,callerside,varargspara,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg,true)
+        { non-variadic parameters }
+        create_paraloc_info_intern(p,callerside,p.paras,false);
+        if p.proccalloption in cstylearrayofconst then
+          begin
+            { on Darwin, we cannot use any registers for variadic parameters }
+            if target_info.abi=abi_aarch64_darwin then
+              begin
+                curintreg:=succ(RS_LAST_INT_PARAM_SUPREG);
+                curmmreg:=succ(RS_LAST_MM_PARAM_SUPREG);
+              end;
+            { continue loading the parameters  }
+            create_paraloc_info_intern(p,callerside,varargspara,true);
+            result:=curstackoffset;
+          end
         else
           internalerror(200410231);
       end;

+ 68 - 0
compiler/aarch64/cpupi.pas

@@ -0,0 +1,68 @@
+{
+    Copyright (c) 2002 by Florian Klaempfl
+
+    This unit contains the CPU specific part of tprocinfo
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit cpupi;
+
+{$i fpcdefs.inc}
+
+interface
+
+  uses
+    procinfo,
+    psub;
+
+  type
+    taarch64procinfo=class(tcgprocinfo)
+      constructor create(aparent: tprocinfo); override;
+      procedure set_first_temp_offset; override;
+    end;
+
+implementation
+
+  uses
+    tgobj,
+    cpubase;
+
+  constructor taarch64procinfo.create(aparent: tprocinfo);
+    begin
+      inherited;
+      { use the stack pointer as framepointer, because
+         1) we exactly know the offsets of the temps from the stack pointer
+            after pass 1 (based on the require parameter stack size for called
+            routines), while we don't know it for the frame pointer (it depends
+            on the number of saved registers)
+         2) temp offsets from the stack pointer are positive while those from
+            the frame pointer are negative, and we can directly encode much
+            bigger positive offsets in the instructions
+      }
+      framepointer:=NR_STACK_POINTER_REG;
+    end;
+
+  procedure taarch64procinfo.set_first_temp_offset;
+    begin
+     { leave room for allocated parameters }
+     tg.setfirsttemp(align(maxpushedparasize,16));
+    end;
+
+
+begin
+  cprocinfo:=taarch64procinfo;
+end.

+ 70 - 0
compiler/aarch64/cputarg.pas

@@ -0,0 +1,70 @@
+{
+    Copyright (c) 2001-2002 by Peter Vreman
+
+    Includes the AArch64 dependent target units
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit cputarg;
+
+{$i fpcdefs.inc}
+
+interface
+
+
+implementation
+
+    uses
+      systems { prevent a syntax error when nothing is included }
+
+{**************************************
+             Targets
+**************************************}
+
+    {$ifndef NOTARGETLINUX}
+//      ,t_linux
+    {$endif}
+    {$ifndef NOTARGETBSD}
+      ,t_bsd
+    {$endif}
+
+{**************************************
+             Assemblers
+**************************************}
+
+    {$ifndef NOAGCPUGAS}
+      ,agcpugas
+    {$endif}
+
+{**************************************
+        Assembler Readers
+**************************************}
+
+  {$ifndef NoRaarmgas}
+       ,racpugas
+  {$endif NoRaarmgas}
+
+{**************************************
+             Debuginfo
+**************************************}
+
+  {$ifndef NoDbgDwarf}
+      ,dbgdwarf
+  {$endif NoDbgDwarf}
+      ;
+
+end.

+ 156 - 0
compiler/aarch64/hlcgcpu.pas

@@ -0,0 +1,156 @@
+{
+    Copyright (c) 1998-2010 by Florian Klaempfl and Jonas Maebe
+    Member of the Free Pascal development team
+
+    This unit contains routines to create a pass-through high-level code
+    generator. This is used by most regular code generators.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+
+unit hlcgcpu;
+
+{$i fpcdefs.inc}
+
+interface
+
+  uses
+    symtype,
+    aasmdata,
+    cgbase,cgutils,
+    hlcgobj, hlcg2ll;
+
+  type
+    thlcgaarch64 = class(thlcg2ll)
+      procedure a_load_subsetreg_reg(list: TAsmList; subsetsize, tosize: tdef; const sreg: tsubsetregister; destreg: tregister); override;
+      procedure a_load_subsetreg_subsetreg(list: TAsmlist; fromsubsetsize, tosubsetsize: tdef; const fromsreg, tosreg: tsubsetregister); override;
+     protected
+      procedure a_load_regconst_subsetreg_intern(list: TAsmList; fromsize, subsetsize: tdef; fromreg: tregister; const sreg: tsubsetregister; slopt: tsubsetloadopt); override;
+    end;
+
+  procedure create_hlcodegen;
+
+implementation
+
+  uses
+    defutil,
+    cpubase,aasmcpu,
+    cgobj,cgcpu;
+
+  procedure thlcgaarch64.a_load_subsetreg_reg(list: TAsmList; subsetsize, tosize: tdef; const sreg: tsubsetregister; destreg: tregister);
+    var
+      op: tasmop;
+      tocgsize: tcgsize;
+      tmpdestreg: tregister;
+    begin
+      tocgsize:=def_cgsize(tosize);
+      if (sreg.startbit<>0) or
+         not(sreg.bitlen in [32,64]) then
+        begin
+          if is_signed(subsetsize) then
+            op:=A_SBFX
+          else
+            op:=A_UBFX;
+          { source and destination register of SBFX/UBFX have to be the same size }
+          if (sreg.subsetregsize in [OS_64,OS_S64]) and
+             not(tocgsize in [OS_64,OS_S64]) then
+            tmpdestreg:=cg.getintregister(list,OS_64)
+          else if not(sreg.subsetregsize in [OS_64,OS_S64]) and
+             (tocgsize in [OS_64,OS_S64]) then
+            tmpdestreg:=cg.getintregister(list,OS_32)
+          else
+            tmpdestreg:=destreg;
+          list.concat(taicpu.op_reg_reg_const_const(op,tmpdestreg,sreg.subsetreg,sreg.startbit,sreg.bitlen));
+          { need to sign extend further or truncate? }
+          if (sreg.subsetregsize=OS_S64) and
+             not(tocgsize in [OS_64,OS_S64]) then
+            cg.a_load_reg_reg(list,OS_S64,tocgsize,tmpdestreg,destreg)
+          else if is_signed(subsetsize) and
+             (tocgsize in [OS_8,OS_16]) then
+            cg.a_load_reg_reg(list,OS_32,tocgsize,tmpdestreg,destreg)
+          else if tmpdestreg<>destreg then
+            cg.a_load_reg_reg(list,def_cgsize(subsetsize),tocgsize,tmpdestreg,destreg)
+        end
+      else
+        cg.a_load_reg_reg(list,def_cgsize(subsetsize),tocgsize,sreg.subsetreg,destreg);
+    end;
+
+
+  procedure makeregssamesize(list: tasmlist; fromsize, tosize: tcgsize; orgfromreg, orgtoreg: tregister; out newfromreg, newtoreg: tregister);
+    begin
+      if (fromsize in [OS_S64,OS_64])<>
+         (tosize in [OS_S64,OS_64]) then
+        begin
+          newfromreg:=cg.makeregsize(list,orgfromreg,OS_64);
+          newtoreg:=cg.makeregsize(list,orgtoreg,OS_64);
+        end
+      else
+        begin
+          newfromreg:=orgfromreg;
+          newtoreg:=orgtoreg;
+        end;
+    end;
+
+
+  procedure thlcgaarch64.a_load_subsetreg_subsetreg(list: TAsmlist; fromsubsetsize, tosubsetsize: tdef; const fromsreg, tosreg: tsubsetregister);
+    var
+      fromreg, toreg: tregister;
+
+    begin
+      { BFM can only insert a bitfield that starts at position 0 in the source
+        source or destination register }
+      if (tosreg.startbit=0) and
+         (fromsreg.bitlen>=tosreg.bitlen) then
+        begin
+          makeregssamesize(list,fromsreg.subsetregsize,tosreg.subsetregsize,fromsreg.subsetreg,tosreg.subsetreg,fromreg,toreg);
+          list.concat(taicpu.op_reg_reg_const_const(A_BFXIL,toreg,fromreg,fromsreg.startbit,tosreg.bitlen))
+        end
+      else if (fromsreg.startbit=0) and
+         (fromsreg.bitlen>=tosreg.bitlen) then
+        begin
+          makeregssamesize(list,fromsreg.subsetregsize,tosreg.subsetregsize,fromsreg.subsetreg,tosreg.subsetreg,fromreg,toreg);
+          list.concat(taicpu.op_reg_reg_const_const(A_BFI,toreg,fromreg,tosreg.startbit,tosreg.bitlen))
+        end
+      else
+        inherited;
+    end;
+
+
+  procedure thlcgaarch64.a_load_regconst_subsetreg_intern(list: TAsmList; fromsize, subsetsize: tdef; fromreg: tregister; const sreg: tsubsetregister; slopt: tsubsetloadopt);
+    var
+      toreg: tregister;
+    begin
+      if slopt in [SL_SETZERO,SL_SETMAX] then
+        inherited
+      else if not(sreg.bitlen in [32,64]) then
+        begin
+          makeregssamesize(list,def_cgsize(fromsize),sreg.subsetregsize,fromreg,sreg.subsetreg,fromreg,toreg);
+          list.concat(taicpu.op_reg_reg_const_const(A_BFI,toreg,fromreg,sreg.startbit,sreg.bitlen))
+        end
+      else
+        a_load_reg_reg(list,fromsize,subsetsize,fromreg,sreg.subsetreg);
+    end;
+
+
+  procedure create_hlcodegen;
+    begin
+      hlcg:=thlcgaarch64.create;
+      create_codegen;
+    end;
+
+
+end.

+ 398 - 0
compiler/aarch64/ncpuadd.pas

@@ -0,0 +1,398 @@
+{
+    Copyright (c) 2014 Jonas Maebe
+
+    Code generation for add nodes on AArch64
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit ncpuadd;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+       node,ncgadd,cpubase;
+
+    type
+       taarch64addnode = class(tcgaddnode)
+       private
+          function  GetResFlags(unsigned:Boolean):TResFlags;
+          function  GetFPUResFlags:TResFlags;
+       protected
+          procedure second_addfloat;override;
+          procedure second_cmpfloat;override;
+          procedure second_cmpboolean;override;
+          procedure second_cmpsmallset;override;
+          procedure second_cmpordinal;override;
+          procedure second_addordinal;override;
+          procedure second_add64bit; override;
+          procedure second_cmp64bit; override;
+       public
+          function use_generic_mul32to64: boolean; override;
+       end;
+
+  implementation
+
+    uses
+      systems,
+      cutils,verbose,
+      paramgr,procinfo,
+      aasmtai,aasmdata,aasmcpu,defutil,
+      cgbase,cgcpu,cgutils,
+      cpupara,
+      ncon,nset,nadd,
+      hlcgobj, ncgutil,cgobj;
+
+{*****************************************************************************
+                               taarch64addnode
+*****************************************************************************}
+
+    function taarch64addnode.GetResFlags(unsigned:Boolean):TResFlags;
+      begin
+        case NodeType of
+          equaln:
+            GetResFlags:=F_EQ;
+          unequaln:
+            GetResFlags:=F_NE;
+          else
+            if not(unsigned) then
+              begin
+                if nf_swapped in flags then
+                  case NodeType of
+                    ltn:
+                      GetResFlags:=F_GT;
+                    lten:
+                      GetResFlags:=F_GE;
+                    gtn:
+                      GetResFlags:=F_LT;
+                    gten:
+                      GetResFlags:=F_LE;
+                    else
+                      internalerror(2014082010);
+                  end
+                else
+                  case NodeType of
+                    ltn:
+                      GetResFlags:=F_LT;
+                    lten:
+                      GetResFlags:=F_LE;
+                    gtn:
+                      GetResFlags:=F_GT;
+                    gten:
+                      GetResFlags:=F_GE;
+                    else
+                      internalerror(2014082011);
+                  end;
+              end
+            else
+              begin
+                if nf_swapped in Flags then
+                  case NodeType of
+                    ltn:
+                      GetResFlags:=F_HI;
+                    lten:
+                      GetResFlags:=F_HS;
+                    gtn:
+                      GetResFlags:=F_LO;
+                    gten:
+                      GetResFlags:=F_LS;
+                    else
+                      internalerror(2014082012);
+                  end
+                else
+                  case NodeType of
+                    ltn:
+                      GetResFlags:=F_LO;
+                    lten:
+                      GetResFlags:=F_LS;
+                    gtn:
+                      GetResFlags:=F_HI;
+                    gten:
+                      GetResFlags:=F_HS;
+                    else
+                      internalerror(2014082013);
+                  end;
+              end;
+        end;
+      end;
+
+
+    function taarch64addnode.GetFPUResFlags:TResFlags;
+      begin
+        case NodeType of
+          equaln:
+            result:=F_EQ;
+          unequaln:
+            result:=F_NE;
+          else
+            begin
+              if nf_swapped in Flags then
+                case NodeType of
+                  ltn:
+                    result:=F_GT;
+                  lten:
+                    result:=F_GE;
+                  gtn:
+                    result:=F_LO;
+                  gten:
+                    result:=F_LS;
+                  else
+                    internalerror(2014082014);
+                end
+              else
+                case NodeType of
+                  ltn:
+                    result:=F_LO;
+                  lten:
+                    result:=F_LS;
+                  gtn:
+                    result:=F_GT;
+                  gten:
+                    result:=F_GE;
+                  else
+                    internalerror(2014082015);
+                end;
+            end;
+        end;
+      end;
+
+
+    procedure taarch64addnode.second_addfloat;
+      var
+        op : TAsmOp;
+      begin
+        pass_left_right;
+        if nf_swapped in flags then
+          swapleftright;
+
+        { force fpureg as location, left right doesn't matter
+          as both will be in a fpureg }
+        hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+        hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
+
+        location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
+        location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+
+        case nodetype of
+          addn :
+            begin
+              op:=A_FADD;
+            end;
+          muln :
+            begin
+              op:=A_FMUL;
+            end;
+          subn :
+            begin
+              op:=A_FSUB;
+            end;
+          slashn :
+            begin
+              op:=A_FDIV;
+            end;
+          else
+            internalerror(200306014);
+        end;
+
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,
+           location.register,left.location.register,right.location.register));
+      end;
+
+
+    procedure taarch64addnode.second_cmpfloat;
+      begin
+        pass_left_right;
+        if nf_swapped in flags then
+          swapleftright;
+
+        { force fpureg as location, left right doesn't matter
+          as both will be in a fpureg }
+        hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+        hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
+
+        location_reset(location,LOC_FLAGS,OS_NO);
+        location.resflags:=getfpuresflags;
+
+        { signalling compare so we can get exceptions }
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCMPE,
+             left.location.register,right.location.register));
+      end;
+
+
+    procedure taarch64addnode.second_cmpboolean;
+      begin
+        pass_left_right;
+        force_reg_left_right(true,true);
+
+        if right.location.loc=LOC_CONSTANT then
+          begin
+            if right.location.value>=0 then
+              Tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,A_CMP,left.location.size,left.location.register,right.location.value,NR_XZR,NR_NO,false,false)
+            else
+              { avoid overflow if value=low(int64) }
+{$push}{$r-}{$q-}
+              Tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,A_CMN,left.location.size,left.location.register,-right.location.value,NR_XZR,NR_NO,false,false)
+{$pop}
+          end
+        else
+          current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,left.location.register,right.location.register));
+
+        location_reset(location,LOC_FLAGS,OS_NO);
+        location.resflags:=getresflags(true);
+      end;
+
+
+    procedure taarch64addnode.second_cmpsmallset;
+      var
+        tmpreg : tregister;
+        op: tasmop;
+      begin
+        pass_left_right;
+
+        location_reset(location,LOC_FLAGS,OS_NO);
+
+        force_reg_left_right(true,true);
+
+        if right.location.loc=LOC_CONSTANT then
+          begin
+            { when doing a cmp/cmn on 32 bit, we care whether the *lower 32 bit*
+              is a positive/negative value -> sign extend }
+            if not(right.location.size in [OS_64,OS_S64]) then
+              right.location.value:=longint(right.location.value);
+            if right.location.value>=0 then
+              op:=A_CMP
+            else
+              op:=A_CMN;
+          end
+        else
+          { for DFA }
+          op:=A_NONE;
+
+        case nodetype of
+          equaln,
+          unequaln:
+            begin
+              if right.location.loc=LOC_CONSTANT then
+                tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,op,def_cgsize(resultdef),left.location.register,abs(right.location.value),NR_XZR,NR_NO,false,false)
+              else
+                current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,left.location.register,right.location.register));
+              location.resflags:=getresflags(true);
+            end;
+          lten,
+          gten:
+            begin
+              if (not(nf_swapped in flags) and
+                  (nodetype=lten)) or
+                 ((nf_swapped in flags) and
+                  (nodetype=gten)) then
+                swapleftright;
+              { we can't handle left as a constant yet }
+              if left.location.loc=LOC_CONSTANT then
+                hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
+              tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,left.location.size);
+              if right.location.loc=LOC_CONSTANT then
+                begin
+                  hlcg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_AND,resultdef,right.location.value,left.location.register,tmpreg);
+                  tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,op,def_cgsize(resultdef),tmpreg,abs(right.location.value),NR_XZR,NR_NO,false,false)
+                end
+              else
+                begin
+                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_AND,tmpreg,left.location.register,right.location.register));
+                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,tmpreg,right.location.register));
+                end;
+              location.resflags:=F_EQ;
+            end;
+          else
+            internalerror(2012042701);
+        end;
+      end;
+
+
+    procedure taarch64addnode.second_cmpordinal;
+      var
+        unsigned : boolean;
+      begin
+        pass_left_right;
+        force_reg_left_right(true,true);
+
+        unsigned:=not(is_signed(left.resultdef)) or
+                  not(is_signed(right.resultdef));
+
+        if right.location.loc = LOC_CONSTANT then
+          begin
+            if right.location.value>=0 then
+              Tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,A_CMP,left.location.size,left.location.register,right.location.value,NR_XZR,NR_NO,false,false)
+            else
+{$push}{$r-}{$q-}
+              Tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,A_CMN,left.location.size,left.location.register,-right.location.value,NR_XZR,NR_NO,false,false)
+{$pop}
+          end
+        else
+          current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,left.location.register,right.location.register));
+
+        location_reset(location,LOC_FLAGS,OS_NO);
+        location.resflags:=getresflags(unsigned);
+      end;
+
+
+    procedure taarch64addnode.second_addordinal;
+      const
+        multops: array[boolean] of TAsmOp = (A_SMULL,A_UMULL);
+      var
+        unsigned: boolean;
+      begin
+        { 32x32->64 multiplication }
+        if (nodetype=muln) and
+           is_32bit(left.resultdef) and
+           is_32bit(right.resultdef) and
+           is_64bit(resultdef) then
+          begin
+            unsigned:=not(is_signed(left.resultdef)) or
+                      not(is_signed(right.resultdef));
+            pass_left_right;
+            force_reg_left_right(true,true);
+            location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
+            location.register:=cg.getintregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
+            current_asmdata.CurrAsmList.Concat(taicpu.op_reg_reg_reg(multops[unsigned],location.register,left.location.register,right.location.register));
+          end
+        else
+          inherited second_addordinal;
+      end;
+
+
+    procedure taarch64addnode.second_add64bit;
+      begin
+        second_addordinal;
+      end;
+
+
+    procedure taarch64addnode.second_cmp64bit;
+      begin
+        second_cmpordinal;
+      end;
+
+
+    function taarch64addnode.use_generic_mul32to64: boolean;
+      begin
+        result:=false;
+      end;
+
+
+begin
+  caddnode:=taarch64addnode;
+end.

+ 201 - 0
compiler/aarch64/ncpucnv.pas

@@ -0,0 +1,201 @@
+{
+    Copyright (c) 2014 by Jonas Maebe
+
+    Generate AArch64 assembler for type converting nodes
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************}
+unit ncpucnv;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+      node,ncnv,ncgcnv;
+
+    type
+      taarch64typeconvnode = class(TCgTypeConvNode)
+        protected
+         function typecheck_int_to_real: tnode; override;
+         function first_int_to_real: tnode; override;
+
+        { procedure second_int_to_int;override; }
+        { procedure second_string_to_string;override; }
+        { procedure second_cstring_to_pchar;override; }
+        { procedure second_string_to_chararray;override; }
+        { procedure second_array_to_pointer;override; }
+        { procedure second_pointer_to_array;override; }
+        { procedure second_chararray_to_string;override; }
+        { procedure second_char_to_string;override; }
+         procedure second_int_to_real;override;
+        { procedure second_real_to_real;override; }
+        { procedure second_cord_to_pointer;override; }
+        { procedure second_proc_to_procvar;override; }
+        { procedure second_bool_to_int;override; }
+         procedure second_int_to_bool;override;
+        { procedure second_load_smallset;override;  }
+        { procedure second_ansistring_to_pchar;override; }
+        { procedure second_pchar_to_string;override; }
+        { procedure second_class_to_intf;override; }
+        { procedure second_char_to_char;override; }
+      end;
+
+implementation
+
+  uses
+    verbose,globals,
+    symdef,aasmdata,aasmbase,
+    defutil,
+    cgbase,cgutils,procinfo,
+    cpubase,aasmcpu,
+    pass_2,cgobj,
+    hlcgobj;
+
+
+{*****************************************************************************
+                             FirstTypeConv
+*****************************************************************************}
+
+  function taarch64typeconvnode.typecheck_int_to_real: tnode;
+    begin
+      { aarch64 supports converting everything to floating point, even fixed
+        point! Unfortunately, it only supports fixed point with a power-of-2
+        fraction, which is not the case for currency.
+
+        Generate the division by 10000 via nodes so the 10000.0 constant can
+        be reused. }
+      if is_currency(resultdef) and
+         not(nf_is_currency in flags) then
+        begin
+          { convert the equivalent int64 value to double without conversion
+            (internal typecast -> will set nf_is_currency flag) }
+          result:=ctypeconvnode.create_internal(left,s64floattype);
+          { turn into currency with conversion, which will divide by 10000
+            (regular typecast) }
+          result:=ctypeconvnode.create(result,s64currencytype);
+          exit;
+        end;
+      { The only other thing we have to take care of: convert values < 32 bit
+        to 32 bit }
+      if left.resultdef.size<4 then
+        begin
+          if is_signed(left.resultdef) then
+            inserttypeconv(left,s32inttype)
+          else
+            inserttypeconv(left,u32inttype)
+        end;
+      result:=inherited;
+    end;
+
+
+  function taarch64typeconvnode.first_int_to_real: tnode;
+    begin
+      result:=nil;
+      expectloc:=LOC_MMREGISTER;
+    end;
+
+
+{*****************************************************************************
+                             SecondTypeConv
+*****************************************************************************}
+
+  procedure taarch64typeconvnode.second_int_to_real;
+    var
+      op: tasmop;
+    begin
+      location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
+      location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+      hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
+      if not(left.location.loc in [LOC_REGISTER,LOC_CREGISTER]) then
+        internalerror(2014120401);
+      case left.location.size of
+        OS_32,
+        OS_64:
+          op:=A_UCVTF;
+        OS_S32,
+        OS_S64,
+        { for currency and comp }
+        OS_F64:
+          op:=A_SCVTF;
+        else
+          internalerror(2014120402);
+      end;
+      current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,location.register,left.location.register));
+      { no scaling for currency, that's handled in pass_typecheck }
+    end;
+
+
+  procedure taarch64typeconvnode.second_int_to_bool;
+    var
+      resflags: tresflags;
+      hlabel,oldTrueLabel,oldFalseLabel : tasmlabel;
+    begin
+      if (nf_explicit in flags) and
+         not(left.expectloc in [LOC_FLAGS,LOC_JUMP]) then
+        begin
+          inherited;
+          exit;
+        end;
+
+      { can't use the generic code, as it assumes that OP_OR automatically sets
+        the flags. We can also do things more efficiently directly }
+
+      oldTrueLabel:=current_procinfo.CurrTrueLabel;
+      oldFalseLabel:=current_procinfo.CurrFalseLabel;
+      current_asmdata.getjumplabel(current_procinfo.CurrTrueLabel);
+      current_asmdata.getjumplabel(current_procinfo.CurrFalseLabel);
+      secondpass(left);
+      if codegenerror then
+       exit;
+
+      case left.location.loc of
+        LOC_CREFERENCE,
+        LOC_REFERENCE,
+        LOC_REGISTER,
+        LOC_CREGISTER,
+        LOC_JUMP:
+          begin
+             hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
+             current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,left.location.register,0));
+             resflags:=F_NE;
+          end;
+        LOC_FLAGS :
+          resflags:=left.location.resflags;
+        else
+          internalerror(2014122902);
+      end;
+      { load flags to register }
+      location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
+      location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
+      if is_cbool(resultdef) then
+        begin
+          current_asmdata.CurrAsmList.concat(taicpu.op_reg_cond(A_CSETM,location.register,flags_to_cond(resflags)));
+            { truncate? (in case cbools are ever made unsigned) }
+            if resultdef.size<4 then
+              cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_32,location.size,location.register,location.register);
+        end
+      else
+        cg.g_flags2reg(current_asmdata.CurrAsmList,location.size,resflags,location.register);
+      cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
+      current_procinfo.CurrTrueLabel:=oldTrueLabel;
+      current_procinfo.CurrFalseLabel:=oldFalseLabel;
+    end;
+
+
+begin
+   ctypeconvnode:=taarch64typeconvnode;
+end.

+ 184 - 0
compiler/aarch64/ncpuinl.pas

@@ -0,0 +1,184 @@
+{
+    Copyright (c) 1998-2002 by Florian Klaempfl
+
+    Generates ARM inline nodes
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit ncpuinl;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+      node,ninl,ncginl;
+
+    type
+      taarch64inlinenode = class(tcgInlineNode)
+        function first_abs_real: tnode; override;
+        function first_sqr_real: tnode; override;
+        function first_sqrt_real: tnode; override;
+        function first_round_real: tnode; override;
+        function first_trunc_real: tnode; override;
+        procedure second_abs_real; override;
+        procedure second_sqr_real; override;
+        procedure second_sqrt_real; override;
+        procedure second_abs_long; override;
+        procedure second_round_real; override;
+        procedure second_trunc_real; override;
+        procedure second_get_frame; override;
+      private
+        procedure load_fpu_location;
+      end;
+
+
+implementation
+
+    uses
+      globtype,verbose,globals,
+      cpuinfo, defutil,symdef,aasmdata,aasmcpu,
+      cgbase,cgutils,pass_1,pass_2,
+      cpubase,ncgutil,cgobj,cgcpu, hlcgobj;
+
+{*****************************************************************************
+                              taarch64inlinenode
+*****************************************************************************}
+
+    procedure taarch64inlinenode.load_fpu_location;
+      begin
+        secondpass(left);
+        hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+        location_copy(location,left.location);
+        location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+        location.loc:=LOC_MMREGISTER;
+      end;
+
+
+    function taarch64inlinenode.first_abs_real : tnode;
+      begin
+        expectloc:=LOC_MMREGISTER;
+        result:=nil;
+      end;
+
+
+    function taarch64inlinenode.first_sqr_real : tnode;
+      begin
+        expectloc:=LOC_MMREGISTER;
+        result:=nil;
+      end;
+
+
+    function taarch64inlinenode.first_sqrt_real : tnode;
+      begin
+        expectloc:=LOC_MMREGISTER;
+        result:=nil;
+      end;
+
+
+    function taarch64inlinenode.first_round_real: tnode;
+      begin
+        expectloc:=LOC_MMREGISTER;
+        result:=nil;
+      end;
+
+
+    function taarch64inlinenode.first_trunc_real: tnode;
+      begin
+        expectloc:=LOC_MMREGISTER;
+        result:=nil;
+      end;
+
+
+    procedure taarch64inlinenode.second_abs_real;
+      begin
+        load_fpu_location;
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FABS,location.register,left.location.register));
+      end;
+
+
+    procedure taarch64inlinenode.second_sqr_real;
+      begin
+        load_fpu_location;
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_FMUL,location.register,left.location.register,left.location.register));
+      end;
+
+
+    procedure taarch64inlinenode.second_sqrt_real;
+      begin
+        load_fpu_location;
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FSQRT,location.register,left.location.register));
+      end;
+
+
+    procedure taarch64inlinenode.second_abs_long;
+      var
+        opsize : tcgsize;
+        hp : taicpu;
+      begin
+        secondpass(left);
+        opsize:=def_cgsize(left.resultdef);
+        hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
+        location:=left.location;
+        location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
+
+        current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_NEG,location.register,left.location.register),PF_S));
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_cond(A_CSEL,location.register,location.register,left.location.register,C_GE));
+      end;
+
+
+    procedure taarch64inlinenode.second_round_real;
+      var
+        hreg: tregister;
+      begin
+        secondpass(left);
+        hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+        location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
+        location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
+        hreg:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
+        { round as floating point using current rounding mode }
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FRINTX,hreg,left.location.register));
+        { convert to signed integer rounding towards zero (there's no "round to
+          integer using current rounding mode") }
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCVTZS,location.register,hreg));
+      end;
+
+
+    procedure taarch64inlinenode.second_trunc_real;
+      begin
+        secondpass(left);
+        hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+        location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
+        location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
+        { convert to signed integer rounding towards zero }
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCVTZS,location.register,left.location.register));
+      end;
+
+
+    procedure taarch64inlinenode.second_get_frame;
+      begin
+        location_reset(location,LOC_CREGISTER,OS_ADDR);
+        { this routine is used to get the frame pointer for backtracing
+          purposes. current_procinfo.framepointer is set to SP because that one
+          is used to access temps. On most platforms these two frame pointers
+          are the same, but not on AArch64. }
+        location.register:=NR_FRAME_POINTER_REG;
+      end;
+
+begin
+  cinlinenode:=taarch64inlinenode;
+end.

+ 187 - 0
compiler/aarch64/ncpumat.pas

@@ -0,0 +1,187 @@
+{
+    Copyright (c) 1998-2002, 2014 by Florian Klaempfl and Jonas Maebe
+
+    Generate AArch64 assembler for math nodes
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit ncpumat;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+      node,nmat,ncgmat;
+
+    type
+      taarch64moddivnode = class(tmoddivnode)
+         procedure pass_generate_code;override;
+      end;
+
+      taarch64notnode = class(tcgnotnode)
+         procedure second_boolean;override;
+      end;
+
+      taarch64unaryminusnode = class(tcgunaryminusnode)
+         procedure second_float; override;
+      end;
+
+implementation
+
+    uses
+      globtype,systems,constexp,
+      cutils,verbose,globals,
+      symconst,symdef,
+      aasmbase,aasmcpu,aasmtai,aasmdata,
+      defutil,
+      cgbase,cgobj,hlcgobj,pass_2,procinfo,
+      ncon,
+      cpubase,
+      ncgutil,cgcpu,cgutils;
+
+{*****************************************************************************
+                             taarch64moddivnode
+*****************************************************************************}
+
+    procedure taarch64moddivnode.pass_generate_code;
+      var
+         op         : tasmop;
+         tmpreg,
+         numerator,
+         divider,
+         resultreg  : tregister;
+         hl : tasmlabel;
+         overflowloc: tlocation;
+      begin
+       secondpass(left);
+       secondpass(right);
+
+       { set result location }
+       location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
+       location.register:=cg.getintregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
+       resultreg:=location.register;
+
+       { put numerator in register }
+       hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
+       numerator:=left.location.register;
+
+       { load divider in a register }
+       hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
+       divider:=right.location.register;
+
+       { start division }
+       if is_signed(left.resultdef) then
+         op:=A_SDIV
+       else
+         op:=A_UDIV;
+       current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,location.register,numerator,divider));
+
+       { no divide-by-zero detection available in hardware, emulate (if it's a
+         constant, this will have been detected earlier already) }
+       if (right.nodetype<>ordconstn) then
+         begin
+           current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,
+             right.location.register,0));
+
+           current_asmdata.getjumplabel(hl);
+           current_asmdata.CurrAsmList.concat(taicpu.op_cond_sym(A_B,C_NE,hl));
+           cg.a_call_name(current_asmdata.CurrAsmList,'FPC_DIVBYZERO',false);
+           cg.a_label(current_asmdata.CurrAsmList,hl);
+         end;
+
+       { in case of overflow checking, also check for low(int64) div (-1)
+         (no hardware support for this either) }
+       if (cs_check_overflow in current_settings.localswitches) and
+          is_signed(left.resultdef) and
+          ((right.nodetype<>ordconstn) or
+           (tordconstnode(right).value=-1)) then
+         begin
+           { num=ffff... and div=8000... <=>
+             num xor not(div xor 8000...) = 0
+             (and we have the "eon" operation, which performs "xor not(...)" }
+           tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,left.resultdef);
+           hlcg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_XOR,left.resultdef,low(int64),left.location.register,tmpreg);
+           current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_EON,
+             tmpreg,left.location.register,tmpreg));
+           current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,tmpreg,0));
+           { now the zero/equal flag is set in case we divided low(int64) by
+             (-1) }
+           location_reset(overflowloc,LOC_FLAGS,OS_NO);
+           overflowloc.resflags:=F_EQ;
+           cg.g_overflowcheck_loc(current_asmdata.CurrAsmList,location,resultdef,overflowloc);
+         end;
+
+       { in case of modulo, multiply result again by the divider and subtract
+         from the numerator }
+       if nodetype=modn then
+         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_reg(A_MSUB,resultreg,
+           resultreg,divider,numerator));
+    end;
+
+
+{*****************************************************************************
+                               taarch64notnode
+*****************************************************************************}
+
+    procedure taarch64notnode.second_boolean;
+      begin
+        if not handle_locjump then
+          begin
+            secondpass(left);
+            case left.location.loc of
+              LOC_FLAGS :
+                begin
+                  location_copy(location,left.location);
+                  inverse_flags(location.resflags);
+                end;
+              LOC_REGISTER, LOC_CREGISTER,
+              LOC_REFERENCE, LOC_CREFERENCE,
+              LOC_SUBSETREG, LOC_CSUBSETREG,
+              LOC_SUBSETREF, LOC_CSUBSETREF:
+                begin
+                  hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
+                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,
+                    left.location.register,0));
+                  location_reset(location,LOC_FLAGS,OS_NO);
+                  location.resflags:=F_EQ;
+               end;
+              else
+                internalerror(2003042401);
+            end;
+          end;
+      end;
+
+
+{*****************************************************************************
+                                   taarch64unaryminusnode
+*****************************************************************************}
+
+    procedure taarch64unaryminusnode.second_float;
+      begin
+        secondpass(left);
+        hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+        location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
+        location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FNEG,location.register,left.location.register));
+      end;
+
+begin
+   cmoddivnode:=taarch64moddivnode;
+   cnotnode:=taarch64notnode;
+   cunaryminusnode:=taarch64unaryminusnode;
+end.

+ 142 - 0
compiler/aarch64/ncpumem.pas

@@ -0,0 +1,142 @@
+{
+    Copyright (c) 2014 by Jonas Maebe
+
+    Generate AArch64 code for in memory related nodes
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit ncpumem;
+
+{$i fpcdefs.inc}
+
+interface
+
+  uses
+    globtype,
+    cgbase,
+    symtype,
+    node,nmem,ncgmem;
+
+  type
+    taarch64loadparentfpnode = class(tcgloadparentfpnode)
+      procedure pass_generate_code; override;
+    end;
+
+    taarch64vecnode = class(tcgvecnode)
+     protected
+      function valid_index_size(size: tcgsize): boolean; override;
+     public
+       procedure update_reference_reg_mul(maybe_const_reg: tregister; regsize: tdef; l: aint); override;
+    end;
+
+implementation
+
+  uses
+    cutils,verbose,
+    defutil,
+    aasmdata,cpubase,
+    cgutils,
+    cgobj;
+
+  { taarch64loadparentfpnode }
+
+  procedure taarch64loadparentfpnode.pass_generate_code;
+    begin
+      inherited pass_generate_code;
+      { see the comments in tcgaarch64.g_proc_entry }
+      if (location.loc in [LOC_REGISTER,LOC_CREGISTER]) and
+         (location.register=NR_STACK_POINTER_REG) then
+        if (kind=lpf_forpara) then
+          location.register:=NR_FRAME_POINTER_REG
+        else
+          begin
+            { load stack pointer in a different register, as many instructions
+              cannot directly work with the stack pointer. The register
+              allocator can merge them if possible }
+            location.register:=cg.getaddressregister(current_asmdata.CurrAsmList);
+            cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,NR_STACK_POINTER_REG,location.register);
+            location.loc:=LOC_REGISTER;
+          end;
+    end;
+
+
+  { taarch64vecnode }
+
+  function taarch64vecnode.valid_index_size(size: tcgsize): boolean;
+    begin
+      { all sizes are ok if we handle the "reference reg mul", because
+         a) we use a 64 bit register for 64 bit values, and a 32 bit one (that
+            we can sign/zero-extend inside the reference) for smaller values
+         b) for values < 32 bit, the entire 32 bit register always contains the
+            sign/zero-extended version of the value }
+      result:=
+        not is_packed_array(left.resultdef) and
+        (get_mul_size in [1,2,4,8,16]);
+    end;
+
+
+  procedure taarch64vecnode.update_reference_reg_mul(maybe_const_reg: tregister; regsize: tdef; l: aint);
+    var
+      base: tregister;
+      oldoffset: asizeint;
+      shift: byte;
+    begin
+      { we can only scale the index by shl 0..4 }
+      if not(l in [1,2,4,8,16]) then
+        begin
+          inherited;
+          exit;
+        end;
+      { we need a base set and an index available }
+      if (location.reference.base=NR_NO) or
+         (location.reference.index<>NR_NO) then
+        begin
+          { don't integrate the offset yet, make_simple_ref() may be able to
+            handle it more efficiently later (unless an offset is all we have
+            -> optimization for someone that wants to add support for AArch64
+            embedded targets) }
+          oldoffset:=location.reference.offset;
+          location.reference.offset:=0;
+          base:=cg.getaddressregister(current_asmdata.CurrAsmList);
+          cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,location.reference,base);
+          reference_reset_base(location.reference,base,oldoffset,location.reference.alignment);
+        end;
+      shift:=BsfDWord(l);
+      location.reference.index:=maybe_const_reg;
+      { sign/zero-extend? }
+      if regsize.size=8 then
+        if shift<>0 then
+          location.reference.shiftmode:=SM_LSL
+        else
+          location.reference.shiftmode:=SM_NONE
+      else if is_signed(regsize) then
+        location.reference.shiftmode:=SM_SXTW
+      else if shift<>0 then
+        location.reference.shiftmode:=SM_UXTW
+      else
+        { the upper 32 bits are always already zero-extended -> just use 64 bit
+          register }
+        location.reference.index:=cg.makeregsize(current_asmdata.CurrAsmList,location.reference.index,OS_64);
+      location.reference.shiftimm:=shift;
+      location.reference.alignment:=newalignment(location.reference.alignment,l);
+    end;
+
+
+begin
+  cloadparentfpnode:=taarch64loadparentfpnode;
+  cvecnode:=taarch64vecnode;
+end.

+ 175 - 0
compiler/aarch64/ncpuset.pas

@@ -0,0 +1,175 @@
+{
+    Copyright (c) 2015 by Jonas Maebe
+
+    Generate AArch64 assembler for in set/case nodes
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit ncpuset;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+       node,nset,ncgset,cpubase,cgbase,cgobj,aasmbase,aasmtai,aasmdata,globtype;
+
+    type
+       taarch64casenode = class(tcgcasenode)
+         protected
+           procedure optimizevalues(var max_linear_list: aint; var max_dist: aword);override;
+           function  has_jumptable: boolean;override;
+           procedure genjumptable(hp: pcaselabel ;min_, max_: aint);override;
+       end;
+
+
+implementation
+
+    uses
+      systems,
+      verbose,globals,constexp,
+      symconst,symdef,defutil,
+      paramgr,
+      cpuinfo,
+      pass_2,cgcpu,
+      ncon,
+      tgobj,ncgutil,regvars,rgobj,aasmcpu,
+      procinfo,
+      cgutils;
+
+{*****************************************************************************
+                            TCGCASENODE
+*****************************************************************************}
+
+
+    procedure taarch64casenode.optimizevalues(var max_linear_list: aint; var max_dist: aword);
+      begin
+        max_linear_list:=10;
+      end;
+    
+
+    function taarch64casenode.has_jumptable: boolean;
+      begin
+        has_jumptable:=true;
+      end;
+
+
+    procedure taarch64casenode.genjumptable(hp: pcaselabel; min_, max_: aint);
+      var
+        last: TConstExprInt;
+        tablelabel: TAsmLabel;
+        basereg,indexreg,jumpreg: TRegister;
+        href: TReference;
+        opcgsize: tcgsize;
+        sectype: TAsmSectiontype;
+        jtitemconsttype: taiconst_type;
+
+      procedure genitem(list:TAsmList;t : pcaselabel);
+        var
+          i : aint;
+        begin
+          if assigned(t^.less) then
+            genitem(list,t^.less);
+          { fill possible hole }
+          i:=last.svalue+1;
+          while i<=t^._low.svalue-1 do
+            begin
+              list.concat(Tai_const.Create_rel_sym(jtitemconsttype,tablelabel,elselabel));
+              inc(i);
+            end;
+          i:=t^._low.svalue;
+          while i<=t^._high.svalue do
+            begin
+              list.concat(Tai_const.Create_rel_sym(jtitemconsttype,tablelabel,blocklabel(t^.blockid)));
+              inc(i);
+            end;
+          last:=t^._high;
+          if assigned(t^.greater) then
+            genitem(list,t^.greater);
+        end;
+
+      begin
+        if not(target_info.system in systems_darwin) then
+          jtitemconsttype:=aitconst_32bit
+        else
+          { see https://gmplib.org/list-archives/gmp-bugs/2012-December/002836.html }
+          jtitemconsttype:=aitconst_darwin_dwarf_delta32;
+
+        last:=min_;
+        opcgsize:=def_cgsize(opsize);
+        { a <= x <= b <-> unsigned(x-a) <= (b-a) }
+        cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SUB,opcgsize,aint(min_),hregister);
+        if not(jumptable_no_range) then
+          begin
+             { case expr greater than max_ => goto elselabel }
+             cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opcgsize,OC_A,aint(max_)-aint(min_),hregister,elselabel);
+             min_:=0;
+          end;
+        { local label in order to avoid using GOT }
+        current_asmdata.getlabel(tablelabel,alt_data);
+        indexreg:=cg.makeregsize(current_asmdata.CurrAsmList,hregister,OS_ADDR);
+        cg.a_load_reg_reg(current_asmdata.CurrAsmList,opcgsize,OS_ADDR,hregister,indexreg);
+        { load table address }
+        reference_reset_symbol(href,tablelabel,0,4);
+        basereg:=cg.getaddressregister(current_asmdata.CurrAsmList);
+        cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,basereg);
+        { load table slot, 32-bit sign extended }
+        reference_reset_base(href,basereg,0,4);
+        href.index:=indexreg;
+        href.shiftmode:=SM_LSL;
+        href.shiftimm:=2;
+        jumpreg:=cg.getaddressregister(current_asmdata.CurrAsmList);
+        cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_S32,OS_ADDR,href,jumpreg);
+        { add table address }
+        cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_ADD,OS_ADDR,basereg,jumpreg);
+        { and finally jump }
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_BR,jumpreg));
+        { generate jump table }
+        if not(target_info.system in systems_darwin) then
+          sectype:=sec_rodata
+        else
+          begin
+            { on Mac OS X, dead code stripping ("smart linking") happens based on
+              global symbols: every global/static symbol (symbols that do not
+              start with "L") marks the start of a new "subsection" that is
+              discarded by the linker if there are no references to this symbol.
+              This means that if you put the jump table in the rodata section, it
+              will become part of the block of data associated with the previous
+              non-L-label in the rodata section and stay or be thrown away
+              depending on whether that block of data is referenced. Therefore,
+              jump tables must be added in the code section and since aktlocaldata
+              is inserted right after the routine, it will become part of the
+              same subsection that contains the routine's code }
+            sectype:=sec_code;
+          end;
+        new_section(current_procinfo.aktlocaldata,sectype,current_procinfo.procdef.mangledname,4);
+        if target_info.system in systems_darwin then
+          begin
+            { additionally, these tables are now marked via ".data_region jt32"
+              and ".end_data_region" }
+            current_procinfo.aktlocaldata.concat(tai_directive.Create(asd_data_region,'jt32'));
+          end;
+        current_procinfo.aktlocaldata.concat(Tai_label.Create(tablelabel));
+        genitem(current_procinfo.aktlocaldata,hp);
+        if target_info.system in systems_darwin then
+          current_procinfo.aktlocaldata.concat(tai_directive.Create(asd_end_data_region,''));
+      end;
+
+
+begin
+   ccasenode:=taarch64casenode;
+end.

+ 5 - 0
compiler/aarch64/ra64con.inc

@@ -64,6 +64,8 @@ NR_W30 = tregister($0104001E);
 NR_X30 = tregister($0105001E);
 NR_WZR = tregister($0104001F);
 NR_XZR = tregister($0105001F);
+NR_WSP = tregister($01040020);
+NR_SP = tregister($01050020);
 NR_B0 = tregister($04010000);
 NR_H0 = tregister($04030000);
 NR_S0 = tregister($04090000);
@@ -225,3 +227,6 @@ NR_S31 = tregister($0409001F);
 NR_D31 = tregister($040a001F);
 NR_Q31 = tregister($0405001F);
 NR_NZCV = tregister($05000000);
+NR_FPCR = tregister($05000001);
+NR_FPSR = tregister($05000002);
+NR_TPIDR_EL0 = tregister($05000003);

+ 162 - 157
compiler/aarch64/ra64dwa.inc

@@ -64,164 +64,169 @@
 30,
 31,
 31,
+31,
+31,
+64,
+64,
+64,
+64,
+64,
+65,
+65,
+65,
+65,
+65,
+66,
+66,
+66,
+66,
+66,
+67,
+67,
+67,
+67,
+67,
+68,
+68,
+68,
+68,
+68,
+69,
+69,
+69,
+69,
+69,
+70,
+70,
+70,
+70,
+70,
+71,
+71,
+71,
+71,
+71,
+72,
+72,
+72,
+72,
+72,
+73,
+73,
+73,
+73,
+73,
+74,
+74,
+74,
+74,
+74,
+75,
+75,
+75,
+75,
+75,
+76,
+76,
+76,
+76,
+76,
+77,
+77,
+77,
+77,
+77,
+78,
+78,
+78,
+78,
+78,
+79,
+79,
+79,
+79,
+79,
+80,
+80,
+80,
+80,
+80,
+81,
+81,
+81,
+81,
+81,
+82,
+82,
+82,
+82,
+82,
+83,
+83,
+83,
+83,
+83,
+84,
+84,
+84,
+84,
+84,
+85,
+85,
+85,
+85,
+85,
+86,
+86,
+86,
+86,
+86,
+87,
+87,
+87,
+87,
+87,
+88,
+88,
+88,
+88,
+88,
+89,
+89,
+89,
+89,
+89,
+90,
+90,
+90,
+90,
+90,
+91,
+91,
+91,
+91,
+91,
+92,
+92,
+92,
+92,
+92,
+93,
+93,
+93,
+93,
+93,
+94,
+94,
+94,
+94,
+94,
+95,
+95,
+95,
+95,
+95,
 0,
 0,
 0,
-0,
-0,
-1,
-1,
-1,
-1,
-1,
-2,
-2,
-2,
-2,
-2,
-3,
-3,
-3,
-3,
-3,
-4,
-4,
-4,
-4,
-4,
-5,
-5,
-5,
-5,
-5,
-6,
-6,
-6,
-6,
-6,
-7,
-7,
-7,
-7,
-7,
-8,
-8,
-8,
-8,
-8,
-9,
-9,
-9,
-9,
-9,
-10,
-10,
-10,
-10,
-10,
-11,
-11,
-11,
-11,
-11,
-12,
-12,
-12,
-12,
-12,
-13,
-13,
-13,
-13,
-13,
-14,
-14,
-14,
-14,
-14,
-15,
-15,
-15,
-15,
-15,
-16,
-16,
-16,
-16,
-16,
-17,
-17,
-17,
-17,
-17,
-18,
-18,
-18,
-18,
-18,
-19,
-19,
-19,
-19,
-19,
-20,
-20,
-20,
-20,
-20,
-21,
-21,
-21,
-21,
-21,
-22,
-22,
-22,
-22,
-22,
-23,
-23,
-23,
-23,
-23,
-24,
-24,
-24,
-24,
-24,
-25,
-25,
-25,
-25,
-25,
-26,
-26,
-26,
-26,
-26,
-27,
-27,
-27,
-27,
-27,
-28,
-28,
-28,
-28,
-28,
-29,
-29,
-29,
-29,
-29,
-30,
-30,
-30,
-30,
-30,
-31,
-31,
-31,
-31,
-31,
 0

+ 1 - 1
compiler/aarch64/ra64nor.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from a64reg.dat }
-226
+231

+ 6 - 1
compiler/aarch64/ra64num.inc

@@ -64,6 +64,8 @@ tregister($0104001E),
 tregister($0105001E),
 tregister($0104001F),
 tregister($0105001F),
+tregister($01040020),
+tregister($01050020),
 tregister($04010000),
 tregister($04030000),
 tregister($04090000),
@@ -224,4 +226,7 @@ tregister($0403001F),
 tregister($0409001F),
 tregister($040a001F),
 tregister($0405001F),
-tregister($05000000)
+tregister($05000000),
+tregister($05000001),
+tregister($05000002),
+tregister($05000003)

+ 101 - 96
compiler/aarch64/ra64rni.inc

@@ -32,6 +32,7 @@
 59,
 61,
 63,
+65,
 2,
 4,
 6,
@@ -64,102 +65,7 @@
 60,
 62,
 64,
-65,
-70,
-75,
-80,
-85,
-90,
-95,
-100,
-105,
-110,
-115,
-120,
-125,
-130,
-135,
-140,
-145,
-150,
-155,
-160,
-165,
-170,
-175,
-180,
-185,
-190,
-195,
-200,
-205,
-210,
-215,
-220,
 66,
-71,
-76,
-81,
-86,
-91,
-96,
-101,
-106,
-111,
-116,
-121,
-126,
-131,
-136,
-141,
-146,
-151,
-156,
-161,
-166,
-171,
-176,
-181,
-186,
-191,
-196,
-201,
-206,
-211,
-216,
-221,
-69,
-74,
-79,
-84,
-89,
-94,
-99,
-104,
-109,
-114,
-119,
-124,
-129,
-134,
-139,
-144,
-149,
-154,
-159,
-164,
-169,
-174,
-179,
-184,
-189,
-194,
-199,
-204,
-209,
-214,
-219,
-224,
 67,
 72,
 77,
@@ -224,4 +130,103 @@
 213,
 218,
 223,
-225
+71,
+76,
+81,
+86,
+91,
+96,
+101,
+106,
+111,
+116,
+121,
+126,
+131,
+136,
+141,
+146,
+151,
+156,
+161,
+166,
+171,
+176,
+181,
+186,
+191,
+196,
+201,
+206,
+211,
+216,
+221,
+226,
+69,
+74,
+79,
+84,
+89,
+94,
+99,
+104,
+109,
+114,
+119,
+124,
+129,
+134,
+139,
+144,
+149,
+154,
+159,
+164,
+169,
+174,
+179,
+184,
+189,
+194,
+199,
+204,
+209,
+214,
+219,
+224,
+70,
+75,
+80,
+85,
+90,
+95,
+100,
+105,
+110,
+115,
+120,
+125,
+130,
+135,
+140,
+145,
+150,
+155,
+160,
+165,
+170,
+175,
+180,
+185,
+190,
+195,
+200,
+205,
+210,
+215,
+220,
+225,
+227,
+228,
+229,
+230

+ 48 - 43
compiler/aarch64/ra64sri.inc

@@ -1,8 +1,39 @@
 { don't edit, this file is generated from a64reg.dat }
 0,
-65,
+67,
+72,
+117,
+122,
+127,
+132,
+137,
+142,
+147,
+152,
+157,
+162,
+77,
+167,
+172,
+177,
+182,
+187,
+192,
+197,
+202,
+207,
+212,
+82,
+217,
+222,
+87,
+92,
+97,
+102,
+107,
+112,
 70,
-115,
+75,
 120,
 125,
 130,
@@ -12,8 +43,8 @@
 150,
 155,
 160,
-75,
 165,
+80,
 170,
 175,
 180,
@@ -23,15 +54,18 @@
 200,
 205,
 210,
-80,
 215,
-220,
 85,
+220,
+225,
 90,
 95,
 100,
 105,
 110,
+115,
+228,
+229,
 68,
 73,
 118,
@@ -64,9 +98,9 @@
 103,
 108,
 113,
-66,
+227,
 71,
-116,
+76,
 121,
 126,
 131,
@@ -76,8 +110,8 @@
 151,
 156,
 161,
-76,
 166,
+81,
 171,
 176,
 181,
@@ -87,16 +121,16 @@
 201,
 206,
 211,
-81,
 216,
-221,
 86,
+221,
+226,
 91,
 96,
 101,
 106,
 111,
-225,
+116,
 69,
 74,
 119,
@@ -129,38 +163,8 @@
 104,
 109,
 114,
-67,
-72,
-117,
-122,
-127,
-132,
-137,
-142,
-147,
-152,
-157,
-162,
-77,
-167,
-172,
-177,
-182,
-187,
-192,
-197,
-202,
-207,
-212,
-82,
-217,
-222,
-87,
-92,
-97,
-102,
-107,
-112,
+66,
+230,
 1,
 3,
 21,
@@ -192,6 +196,7 @@
 15,
 17,
 19,
+65,
 63,
 2,
 4,

+ 162 - 157
compiler/aarch64/ra64sta.inc

@@ -64,164 +64,169 @@
 30,
 31,
 31,
+31,
+31,
+64,
+64,
+64,
+64,
+64,
+65,
+65,
+65,
+65,
+65,
+66,
+66,
+66,
+66,
+66,
+67,
+67,
+67,
+67,
+67,
+68,
+68,
+68,
+68,
+68,
+69,
+69,
+69,
+69,
+69,
+70,
+70,
+70,
+70,
+70,
+71,
+71,
+71,
+71,
+71,
+72,
+72,
+72,
+72,
+72,
+73,
+73,
+73,
+73,
+73,
+74,
+74,
+74,
+74,
+74,
+75,
+75,
+75,
+75,
+75,
+76,
+76,
+76,
+76,
+76,
+77,
+77,
+77,
+77,
+77,
+78,
+78,
+78,
+78,
+78,
+79,
+79,
+79,
+79,
+79,
+80,
+80,
+80,
+80,
+80,
+81,
+81,
+81,
+81,
+81,
+82,
+82,
+82,
+82,
+82,
+83,
+83,
+83,
+83,
+83,
+84,
+84,
+84,
+84,
+84,
+85,
+85,
+85,
+85,
+85,
+86,
+86,
+86,
+86,
+86,
+87,
+87,
+87,
+87,
+87,
+88,
+88,
+88,
+88,
+88,
+89,
+89,
+89,
+89,
+89,
+90,
+90,
+90,
+90,
+90,
+91,
+91,
+91,
+91,
+91,
+92,
+92,
+92,
+92,
+92,
+93,
+93,
+93,
+93,
+93,
+94,
+94,
+94,
+94,
+94,
+95,
+95,
+95,
+95,
+95,
 0,
 0,
 0,
-0,
-0,
-1,
-1,
-1,
-1,
-1,
-2,
-2,
-2,
-2,
-2,
-3,
-3,
-3,
-3,
-3,
-4,
-4,
-4,
-4,
-4,
-5,
-5,
-5,
-5,
-5,
-6,
-6,
-6,
-6,
-6,
-7,
-7,
-7,
-7,
-7,
-8,
-8,
-8,
-8,
-8,
-9,
-9,
-9,
-9,
-9,
-10,
-10,
-10,
-10,
-10,
-11,
-11,
-11,
-11,
-11,
-12,
-12,
-12,
-12,
-12,
-13,
-13,
-13,
-13,
-13,
-14,
-14,
-14,
-14,
-14,
-15,
-15,
-15,
-15,
-15,
-16,
-16,
-16,
-16,
-16,
-17,
-17,
-17,
-17,
-17,
-18,
-18,
-18,
-18,
-18,
-19,
-19,
-19,
-19,
-19,
-20,
-20,
-20,
-20,
-20,
-21,
-21,
-21,
-21,
-21,
-22,
-22,
-22,
-22,
-22,
-23,
-23,
-23,
-23,
-23,
-24,
-24,
-24,
-24,
-24,
-25,
-25,
-25,
-25,
-25,
-26,
-26,
-26,
-26,
-26,
-27,
-27,
-27,
-27,
-27,
-28,
-28,
-28,
-28,
-28,
-29,
-29,
-29,
-29,
-29,
-30,
-30,
-30,
-30,
-30,
-31,
-31,
-31,
-31,
-31,
 0

+ 6 - 1
compiler/aarch64/ra64std.inc

@@ -64,6 +64,8 @@
 'x30',
 'wzr',
 'xzr',
+'wsp',
+'sp',
 'b0',
 'h0',
 's0',
@@ -224,4 +226,7 @@
 's31',
 'd31',
 'q31',
-'nzcv'
+'nzcv',
+'fpcr',
+'fpsr',
+'tpidr_el0'

+ 5 - 0
compiler/aarch64/ra64sup.inc

@@ -64,6 +64,8 @@ RS_W30 = $1E;
 RS_X30 = $1E;
 RS_WZR = $1F;
 RS_XZR = $1F;
+RS_WSP = $20;
+RS_SP = $20;
 RS_B0 = $00;
 RS_H0 = $00;
 RS_S0 = $00;
@@ -225,3 +227,6 @@ RS_S31 = $1F;
 RS_D31 = $1F;
 RS_Q31 = $1F;
 RS_NZCV = $00;
+RS_FPCR = $01;
+RS_FPSR = $02;
+RS_TPIDR_EL0 = $03;

+ 88 - 0
compiler/aarch64/racpu.pas

@@ -0,0 +1,88 @@
+{
+    Copyright (c) 1998-2003 by Carl Eric Codere and Peter Vreman
+    Copyright (c) 2014 by Jonas Maebe
+
+    Handles the common AArch64 assembler reader routines
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit racpu;
+
+{$i fpcdefs.inc}
+
+  interface
+
+    uses
+      cgbase,
+      cpubase,
+      aasmtai,aasmdata,
+      rautils;
+
+    type
+      TAArch64Operand=class(TOperand)
+      end;
+
+      TAArch64Instruction=class(TInstruction)
+        oppostfix : toppostfix;
+        function ConcatInstruction(p:TAsmList) : tai;override;
+        function Is64bit: boolean;
+        function cgsize: tcgsize;
+      end;
+
+  implementation
+
+    uses
+      verbose,
+      aasmcpu;
+
+    function TAArch64Instruction.ConcatInstruction(p:TAsmList) : tai;
+      begin
+        result:=inherited ConcatInstruction(p);
+        taicpu(result).oppostfix:=oppostfix;
+      end;
+
+
+    function TAArch64Instruction.Is64bit: boolean;
+      begin
+        result:=
+          (operands[1].opr.typ=OPR_REGISTER) and
+          (getsubreg(operands[1].opr.reg)=R_SUBQ);
+      end;
+
+    function TAArch64Instruction.cgsize: tcgsize;
+      begin
+        if ops<1 then
+          internalerror(2014122001);
+        if operands[1].opr.typ<>OPR_REGISTER then
+          internalerror(2014122002);
+        result:=reg_cgsize(operands[1].opr.reg);
+        { a 32 bit integer register could actually be 16 or 8 bit }
+        if result=OS_32 then
+          case oppostfix of
+            PF_B:
+              result:=OS_8;
+            PF_SB:
+              result:=OS_S8;
+            PF_H:
+              result:=OS_16;
+            PF_SH:
+              result:=OS_S16;
+          end;
+      end;
+
+
+end.

+ 1053 - 0
compiler/aarch64/racpugas.pas

@@ -0,0 +1,1053 @@
+{
+    Copyright (c) 1998-2002 by Carl Eric Codere and Peter Vreman
+    Copyright (c) 2014 by Jonas Maebe
+
+    Does the parsing for the AArch64 GNU AS styled inline assembler.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+Unit racpugas;
+
+{$i fpcdefs.inc}
+
+  Interface
+
+    uses
+      raatt,racpu,
+      cpubase;
+
+    type
+      taarch64attreader = class(tattreader)
+        actoppostfix : TOpPostfix;
+        function is_asmopcode(const s: string):boolean;override;
+        function is_register(const s:string):boolean;override;
+        procedure handleopcode;override;
+        procedure BuildReference(oper: taarch64operand; is64bit: boolean);
+        procedure BuildOperand(oper: taarch64operand; is64bit: boolean);
+        function TryBuildShifterOp(instr: taarch64instruction; opnr: longint) : boolean;
+        procedure BuildOpCode(instr: taarch64instruction);
+        procedure ReadSym(oper: taarch64operand; is64bit: boolean);
+        procedure ConvertCalljmp(instr: taarch64instruction);
+        function ToConditionCode(const hs: string; is_operand: boolean): tasmcond;
+      end;
+
+
+  Implementation
+
+    uses
+      { helpers }
+      cutils,
+      { global }
+      globtype,verbose,
+      systems,aasmbase,aasmtai,aasmdata,aasmcpu,
+      { symtable }
+      symconst,symsym,
+      procinfo,
+      rabase,rautils,
+      cgbase,cgutils;
+
+
+    function taarch64attreader.is_register(const s:string):boolean;
+      type
+        treg2str = record
+          name : string[3];
+          reg : tregister;
+        end;
+
+      const
+        extraregs : array[0..3] of treg2str = (
+          (name: 'FP' ; reg: NR_FP),
+          (name: 'LR' ; reg: NR_LR),
+          (name: 'IP0'; reg: NR_IP0),
+          (name: 'IP1'; reg: NR_IP1));
+
+      var
+        i : longint;
+
+      begin
+        result:=inherited is_register(s);
+        { reg found?
+          possible aliases are always 2 or 3 chars
+        }
+        if result or not(length(s) in [2,3]) then
+          exit;
+        for i:=low(extraregs) to high(extraregs) do
+          begin
+            if s=extraregs[i].name then
+              begin
+                actasmregister:=extraregs[i].reg;
+                result:=true;
+                actasmtoken:=AS_REGISTER;
+                exit;
+              end;
+          end;
+      end;
+
+
+    procedure taarch64attreader.ReadSym(oper: taarch64operand; is64bit: boolean);
+      var
+         tempstr, mangledname : string;
+         typesize,l,k: aint;
+      begin
+        tempstr:=actasmpattern;
+        Consume(AS_ID);
+        { typecasting? }
+        if (actasmtoken=AS_LPAREN) and
+           SearchType(tempstr,typesize) then
+          begin
+            oper.hastype:=true;
+            Consume(AS_LPAREN);
+            BuildOperand(oper,is64bit);
+            Consume(AS_RPAREN);
+            if oper.opr.typ in [OPR_REFERENCE,OPR_LOCAL] then
+              oper.SetSize(typesize,true);
+          end
+        else
+          if not oper.SetupVar(tempstr,false) then
+            Message1(sym_e_unknown_id,tempstr);
+        { record.field ? }
+        if actasmtoken=AS_DOT then
+          begin
+            BuildRecordOffsetSize(tempstr,l,k,mangledname,false);
+            if (mangledname<>'') then
+              Message(asmr_e_invalid_reference_syntax);
+            inc(oper.opr.ref.offset,l);
+          end;
+      end;
+
+
+    Procedure taarch64attreader.BuildReference(oper: taarch64operand; is64bit: boolean);
+
+      procedure do_error;
+        begin
+          Message(asmr_e_invalid_reference_syntax);
+          RecoverConsume(false);
+        end;
+
+
+      procedure test_end(require_rbracket : boolean);
+        begin
+          if require_rbracket then begin
+            if not(actasmtoken=AS_RBRACKET) then
+              begin
+                do_error;
+                exit;
+              end
+            else
+              Consume(AS_RBRACKET);
+            if (actasmtoken=AS_NOT) then
+              begin
+                oper.opr.ref.addressmode:=AM_PREINDEXED;
+                Consume(AS_NOT);
+              end;
+          end;
+          if not(actasmtoken in [AS_SEPARATOR,AS_end]) then
+            do_error
+          else
+            begin
+{$IFDEF debugasmreader}
+              writeln('TEST_end_FINAL_OK. Created the following ref:');
+              writeln('oper.opr.ref.shiftimm=',oper.opr.ref.shiftimm);
+              writeln('oper.opr.ref.shiftmode=',ord(oper.opr.ref.shiftmode));
+              writeln('oper.opr.ref.index=',ord(oper.opr.ref.index));
+              writeln('oper.opr.ref.base=',ord(oper.opr.ref.base));
+              writeln('oper.opr.ref.signindex=',ord(oper.opr.ref.signindex));
+              writeln('oper.opr.ref.addressmode=',ord(oper.opr.ref.addressmode));
+              writeln;
+{$endIF debugasmreader}
+            end;
+        end;
+
+
+      function is_shifter_ref_operation(var a : tshiftmode) : boolean;
+        begin
+          a:=SM_NONE;
+          if (actasmpattern='LSL') then
+            a:=SM_LSL
+          else if (actasmpattern='UXTW') then
+            a:=SM_UXTW
+          else if (actasmpattern='SXTW') then
+            a:=SM_SXTW
+          else if (actasmpattern='SXTX') then
+            a:=SM_SXTX;
+          is_shifter_ref_operation:=not(a=SM_NONE);
+        end;
+
+
+      procedure read_index_shift(require_rbracket : boolean);
+        var
+          shift: aint;
+        begin
+          case actasmtoken of
+            AS_COMMA :
+              begin
+                Consume(AS_COMMA);
+                if not(actasmtoken=AS_ID) then
+                  do_error;
+                if is_shifter_ref_operation(oper.opr.ref.shiftmode) then
+                  begin
+                    Consume(actasmtoken);
+                    if actasmtoken=AS_HASH then
+                      begin
+                        Consume(AS_HASH);
+                        shift:=BuildConstExpression(false,true);
+                        if not(shift in [0,2+ord(is64bit)]) then
+                          do_error;
+                        oper.opr.ref.shiftimm:=shift;
+                        test_end(require_rbracket);
+                      end;
+                   end
+                 else
+                   begin
+                     do_error;
+                     exit;
+                   end;
+              end;
+            AS_RBRACKET :
+              if require_rbracket then
+                test_end(require_rbracket)
+              else
+                begin
+                  do_error;
+                  exit;
+                end;
+            AS_SEPARATOR,AS_END :
+              if not require_rbracket then
+                test_end(false)
+               else
+                 do_error;
+            else
+              begin
+                do_error;
+                exit;
+              end;
+          end;
+        end;
+
+
+      procedure read_index(require_rbracket : boolean);
+        var
+          recname : string;
+          o_int,s_int : aint;
+        begin
+          case actasmtoken of
+            AS_REGISTER :
+              begin
+                if getsupreg(actasmregister)=RS_XZR then
+                  Message1(asmr_e_invalid_ref_register,actasmpattern);
+                oper.opr.ref.index:=actasmregister;
+                Consume(AS_REGISTER);
+                read_index_shift(require_rbracket);
+                exit;
+              end;
+            AS_HASH : // constant
+              begin
+                Consume(AS_HASH);
+(*
+                if actasmtoken=AS_COLON then
+                  begin
+                    consume(AS_COLON);
+                    { GNU-style lower 12 bits of address of non-GOT-based
+                      access }
+                    if (actasmpattern='LO12') then
+                      begin
+                        consume(actasmtoken);
+                        consume(AS_COLON);
+                        if not oper.SetupVar(actasmpattern,false) then
+                          begin
+                            do_error;
+                            exit
+                          end;
+                        consume(AS_ID);
+                        oper.opr.ref.refaddr:=addr_??? (not gotpageoffset);
+                      end
+                    else
+                      begin
+                        do_error;
+                        exit
+                      end;
+                  end
+                else
+*)
+                  begin
+                    o_int:=BuildConstExpression(false,true);
+                    inc(oper.opr.ref.offset,o_int);
+                  end;
+                test_end(require_rbracket);
+                exit;
+              end;
+            AS_ID :
+              begin
+                recname:=actasmpattern;
+                Consume(AS_ID);
+                { Apple-style got page offset }
+                if actasmtoken=AS_AT then
+                  begin
+                    if not oper.SetupVar(recname,false) then
+                      begin
+                        do_error;
+                        exit
+                      end;
+                    consume(AS_AT);
+                    if actasmpattern='GOTPAGEOFF' then
+                      begin
+                        consume(actasmtoken);
+                        oper.opr.ref.refaddr:=addr_gotpageoffset;
+                      end
+                    else if actasmpattern='PAGEOFF' then
+                      begin
+                        consume(actasmtoken);
+                        oper.opr.ref.refaddr:=addr_pageoffset;
+                      end
+                    else
+                      begin
+                        do_error;
+                        exit
+                      end;
+                  end
+                else
+                  begin
+                    BuildRecordOffsetSize(recname,o_int,s_int,recname,false);
+                    inc(oper.opr.ref.offset,o_int);
+                  end;
+                test_end(require_rbracket);
+                exit;
+              end;
+            AS_AT:
+              begin
+                do_error;
+                exit;
+              end;
+            AS_RBRACKET :
+              begin
+                if require_rbracket then
+                  begin
+                    test_end(require_rbracket);
+                    exit;
+                  end
+                else
+                  begin
+                    do_error; // unexpected rbracket
+                    exit;
+                  end;
+              end;
+            AS_SEPARATOR,AS_end :
+              begin
+                if not require_rbracket then
+                  begin
+                    test_end(false);
+                    exit;
+                  end
+                else
+                  begin
+                    do_error;
+                    exit;
+                  end;
+              end;
+            else
+              begin
+                // unexpected token
+                do_error;
+                exit;
+              end;
+          end; // case
+        end;
+
+
+      procedure try_prepostindexed;
+        begin
+          Consume(AS_RBRACKET);
+          case actasmtoken of
+            AS_COMMA :
+              begin // post-indexed
+                Consume(AS_COMMA);
+                oper.opr.ref.addressmode:=AM_POSTINDEXED;
+                read_index(false);
+                exit;
+              end;
+            AS_NOT :
+              begin   // pre-indexed
+                Consume(AS_NOT);
+                oper.opr.ref.addressmode:=AM_PREINDEXED;
+                test_end(false);
+                exit;
+              end;
+            else
+              begin
+                test_end(false);
+                exit;
+              end;
+          end; // case
+        end;
+
+      begin
+        Consume(AS_LBRACKET);
+        oper.opr.ref.addressmode:=AM_OFFSET; // assume "neither PRE nor POST inc"
+        if actasmtoken=AS_REGISTER then
+          begin
+            if getsupreg(actasmregister)=RS_XZR then
+              Message1(asmr_e_invalid_ref_register,actasmpattern);
+            oper.opr.ref.base:=actasmregister;
+            Consume(AS_REGISTER);
+            case actasmtoken of
+              AS_RBRACKET :
+                begin
+                  try_prepostindexed;
+                  exit;
+                end;
+              AS_COMMA :
+                begin
+                  Consume(AS_COMMA);
+                  read_index(true);
+                  exit;
+                end;
+              else
+                begin
+                  Message(asmr_e_invalid_reference_syntax);
+                  RecoverConsume(false);
+                end;
+            end;
+          end
+        else
+          Begin
+            case actasmtoken of
+              AS_ID :
+                begin
+                  { TODO: local variables and parameters }
+                  Message(asmr_e_invalid_reference_syntax);
+                  RecoverConsume(false);
+                  exit;
+                end;
+              else
+                begin // elsecase
+                  Message(asmr_e_invalid_reference_syntax);
+                  RecoverConsume(false);
+                  exit;
+                end;
+            end;
+          end;
+      end;
+
+
+    function taarch64attreader.TryBuildShifterOp(instr: taarch64instruction; opnr: longint): boolean;
+
+      procedure handlepara(sm : tshiftmode);
+        begin
+          consume(AS_ID);
+          fillchar(instr.operands[opnr].opr,sizeof(instr.operands[opnr].opr),0);
+          instr.operands[opnr].opr.typ:=OPR_SHIFTEROP;
+          instr.operands[opnr].opr.shifterop.shiftmode:=sm;
+          if (sm=SM_LSL) or
+             (actasmtoken=AS_HASH) then
+            begin
+              consume(AS_HASH);
+              instr.operands[opnr].opr.shifterop.shiftimm:=BuildConstExpression(false,false);
+            end;
+        end;
+
+      const
+        shiftmode2str: array[SM_LSL..SM_SXTX] of string[4] =
+          ('LSL','LSR','ASR',
+           'UXTB','UXTH','UXTW','UXTX',
+           'SXTB','SXTH','SXTW','SXTX');
+      var
+        sm: tshiftmode;
+        i: longint;
+        usessp,
+        useszr: boolean;
+      begin
+        result:=false;
+        if (actasmtoken=AS_ID) then
+          begin
+            for sm:=low(shiftmode2str) to high(shiftmode2str) do
+              if actasmpattern=shiftmode2str[sm] then
+                begin
+                  handlepara(sm);
+                  if instr.operands[1].opr.typ=OPR_REGISTER then
+                    begin
+                      { the possible shifter ops depend on whether this
+                        instruction uses sp and/or zr }
+                      usessp:=false;
+                      useszr:=false;
+                      for i:=low(instr.operands) to pred(opnr) do
+                        begin
+                          if (instr.operands[1].opr.typ=OPR_REGISTER) then
+                            case getsupreg(instr.operands[1].opr.reg) of
+                              RS_XZR:
+                                useszr:=true;
+                              RS_SP:
+                                usessp:=true;
+                            end;
+                        end;
+                      result:=valid_shifter_operand(instr.opcode,useszr,usessp,instr.Is64bit,sm,instr.operands[opnr].opr.shifterop.shiftimm);
+                    end
+                end;
+          end;
+      end;
+
+
+    function taarch64attreader.ToConditionCode(const hs: string; is_operand: boolean): tasmcond;
+      begin
+        case actopcode of
+          A_CSEL,A_CSINC,A_CSINV,A_CSNEG,A_CSET,A_CSETM,
+          A_CINC,A_CINV,A_CNEG,A_CCMN,A_CCMP,
+          A_B:
+            begin
+              { search for condition, conditions are always 2 chars }
+              if (is_operand<>(actopcode=A_B)) and
+                 (length(hs)>1) then
+                begin
+                  { workaround for DFA bug }
+                  result:=low(tasmcond);
+                  for result:=low(tasmcond) to high(tasmcond) do
+                    begin
+                      if hs=uppercond2str[result] then
+                        exit;
+                    end;
+                end;
+            end;
+        end;
+        result:=C_None;;
+      end;
+
+
+    Procedure taarch64attreader.BuildOperand(oper: taarch64operand; is64bit: boolean);
+      var
+        expr: string;
+        typesize, l: aint;
+
+        procedure MaybeAddGotAddrMode;
+          begin
+            if actasmtoken=AS_AT then
+              begin
+                consume(AS_AT);
+                if actasmpattern='GOTPAGE' then
+                  oper.opr.ref.refaddr:=addr_gotpage
+                else if actasmpattern='GOTPAGEOFF' then
+                  oper.opr.ref.refaddr:=addr_gotpageoffset
+                else if actasmpattern='PAGE' then
+                  oper.opr.ref.refaddr:=addr_page
+                else if actasmpattern='PAGEOFF' then
+                  oper.opr.ref.refaddr:=addr_pageoffset
+                else
+                  Message(asmr_e_expr_illegal);
+                consume(actasmtoken);
+              end
+            else
+              oper.opr.ref.refaddr:=addr_pic;
+          end;
+
+        procedure AddLabelOperand(hl:tasmlabel);
+          begin
+            if not(actasmtoken in [AS_PLUS,AS_MINUS,AS_LPAREN]) and
+               is_calljmp(actopcode) then
+             begin
+               oper.opr.typ:=OPR_SYMBOL;
+               oper.opr.symbol:=hl;
+             end
+            else if (actopcode=A_ADR) or
+               (actopcode=A_ADRP) then
+              begin
+                oper.InitRef;
+                MaybeAddGotAddrMode;
+                oper.opr.ref.symbol:=hl;
+                if (actasmtoken in [AS_PLUS, AS_MINUS]) then
+                  begin
+                    l:=BuildConstExpression(true,false);
+                    oper.opr.ref.offset:=l;
+                  end;
+              end;
+          end;
+
+
+        procedure MaybeRecordOffset;
+          var
+            mangledname: string;
+            hasdot  : boolean;
+            l,
+            toffset,
+            tsize   : aint;
+          begin
+            if not(actasmtoken in [AS_DOT,AS_PLUS,AS_MINUS]) then
+              exit;
+            l:=0;
+            hasdot:=(actasmtoken=AS_DOT);
+            if hasdot then
+              begin
+                if expr<>'' then
+                  begin
+                    BuildRecordOffsetSize(expr,toffset,tsize,mangledname,false);
+                    if (oper.opr.typ<>OPR_CONSTANT) and
+                       (mangledname<>'') then
+                      Message(asmr_e_wrong_sym_type);
+                    inc(l,toffset);
+                    oper.SetSize(tsize,true);
+                  end;
+              end;
+            if actasmtoken in [AS_PLUS,AS_MINUS] then
+              inc(l,BuildConstExpression(true,false));
+            case oper.opr.typ of
+              OPR_LOCAL :
+                begin
+                  { don't allow direct access to fields of parameters, because that
+                    will generate buggy code. Allow it only for explicit typecasting }
+                  if hasdot and
+                     (not oper.hastype) and
+                     (tabstractnormalvarsym(oper.opr.localsym).owner.symtabletype=parasymtable) and
+                     (current_procinfo.procdef.proccalloption<>pocall_register) then
+                    Message(asmr_e_cannot_access_field_directly_for_parameters);
+                  inc(oper.opr.localsymofs,l)
+                end;
+              OPR_CONSTANT :
+                inc(oper.opr.val,l);
+              OPR_REFERENCE :
+                if (mangledname<>'') then
+                  begin
+                    if (oper.opr.val<>0) then
+                      Message(asmr_e_wrong_sym_type);
+                    oper.opr.typ:=OPR_SYMBOL;
+                    oper.opr.symbol:=current_asmdata.RefAsmSymbol(mangledname);
+                  end
+                else
+                  inc(oper.opr.val,l);
+              OPR_SYMBOL:
+                Message(asmr_e_invalid_symbol_ref);
+              else
+                internalerror(200309221);
+            end;
+          end;
+
+
+        function MaybeBuildReference(is64bit: boolean):boolean;
+          { Try to create a reference, if not a reference is found then false
+            is returned }
+          begin
+            MaybeBuildReference:=true;
+            case actasmtoken of
+              AS_INTNUM,
+              AS_MINUS,
+              AS_PLUS:
+                Begin
+                  oper.opr.ref.offset:=BuildConstExpression(True,False);
+                  if actasmtoken<>AS_LPAREN then
+                    Message(asmr_e_invalid_reference_syntax)
+                  else
+                    BuildReference(oper,is64bit);
+                end;
+              AS_LPAREN:
+                BuildReference(oper,is64bit);
+              AS_ID: { only a variable is allowed ... }
+                Begin
+                  ReadSym(oper,is64bit);
+                  case actasmtoken of
+                    AS_end,
+                    AS_SEPARATOR,
+                    AS_COMMA: ;
+                    AS_LPAREN:
+                      BuildReference(oper,is64bit);
+                  else
+                    Begin
+                      Message(asmr_e_invalid_reference_syntax);
+                      Consume(actasmtoken);
+                    end;
+                  end; {end case }
+                end;
+              else
+               MaybeBuildReference:=false;
+            end; { end case }
+          end;
+
+
+      var
+        tempreg: tregister;
+        hl: tasmlabel;
+        icond: tasmcond;
+      Begin
+        expr:='';
+        case actasmtoken of
+          AS_LBRACKET: { Memory reference or constant expression }
+            Begin
+              oper.InitRef;
+              BuildReference(oper,is64bit);
+            end;
+
+          AS_HASH: { Constant expression  }
+            Begin
+              Consume(AS_HASH);
+              BuildConstantOperand(oper);
+            end;
+
+          (*
+          AS_INTNUM,
+          AS_MINUS,
+          AS_PLUS:
+            Begin
+              { Constant memory offset }
+              { This must absolutely be followed by (  }
+              oper.InitRef;
+              oper.opr.ref.offset:=BuildConstExpression(True,False);
+              if actasmtoken<>AS_LPAREN then
+                begin
+                  ofs:=oper.opr.ref.offset;
+                  BuildConstantOperand(oper);
+                  inc(oper.opr.val,ofs);
+                end
+              else
+                BuildReference(oper,is64bit);
+            end;
+          *)
+          AS_ID: { A constant expression, or a Variable ref.  }
+            Begin
+              { Condition code? }
+              icond:=ToConditionCode(actasmpattern,true);
+              if icond<>C_None then
+                begin
+                  oper.opr.typ:=OPR_COND;
+                  oper.opr.cc:=icond;
+                  consume(AS_ID);
+                end
+              else
+              { Local Label ? }
+              if is_locallabel(actasmpattern) then
+               begin
+                 CreateLocalLabel(actasmpattern,hl,false);
+                 Consume(AS_ID);
+                 AddLabelOperand(hl);
+               end
+              else
+               { Check for label }
+               if SearchLabel(actasmpattern,hl,false) then
+                 begin
+                   Consume(AS_ID);
+                   AddLabelOperand(hl);
+                 end
+              else
+               { probably a variable or normal expression }
+               { or a procedure (such as in CALL ID)      }
+               begin
+                 { is it a constant ? }
+                 if SearchIConstant(actasmpattern,l) then
+                  begin
+                    if not (oper.opr.typ in [OPR_NONE,OPR_CONSTANT]) then
+                      Message(asmr_e_invalid_operand_type);
+                    BuildConstantOperand(oper);
+                  end
+                 else
+                  begin
+                    expr:=actasmpattern;
+                    Consume(AS_ID);
+                    { typecasting? }
+                    if (actasmtoken=AS_LPAREN) and
+                       SearchType(expr,typesize) then
+                     begin
+                       oper.hastype:=true;
+                       Consume(AS_LPAREN);
+                       BuildOperand(oper,is64bit);
+                       Consume(AS_RPAREN);
+                       if oper.opr.typ in [OPR_REFERENCE,OPR_LOCAL] then
+                         oper.SetSize(typesize,true);
+                     end
+                    else
+                     begin
+                       if not(oper.SetupVar(expr,false)) then
+                        Begin
+                          { look for special symbols ... }
+                          if expr= '__HIGH' then
+                            begin
+                              consume(AS_LPAREN);
+                              if not oper.setupvar('high'+actasmpattern,false) then
+                                Message1(sym_e_unknown_id,'high'+actasmpattern);
+                              consume(AS_ID);
+                              consume(AS_RPAREN);
+                            end
+                          else
+                           if expr = '__RESULT' then
+                            oper.SetUpResult
+                          else
+                           if expr = '__SELF' then
+                            oper.SetupSelf
+                          else
+                           if expr = '__OLDEBP' then
+                            oper.SetupOldEBP
+                          else
+                            Message1(sym_e_unknown_id,expr);
+                        end
+                       else
+                         MaybeAddGotAddrMode;
+                     end;
+                  end;
+                  if actasmtoken=AS_DOT then
+                    MaybeRecordOffset;
+                  { add a constant expression? }
+                  if (actasmtoken=AS_PLUS) then
+                   begin
+                     l:=BuildConstExpression(true,false);
+                     case oper.opr.typ of
+                       OPR_CONSTANT :
+                         inc(oper.opr.val,l);
+                       OPR_LOCAL :
+                         inc(oper.opr.localsymofs,l);
+                       OPR_REFERENCE :
+                         inc(oper.opr.ref.offset,l);
+                       else
+                         internalerror(200309202);
+                     end;
+                   end
+               end;
+              { Do we have a indexing reference, then parse it also }
+              if actasmtoken=AS_LPAREN then
+                BuildReference(oper,is64bit);
+            end;
+
+          { Register, a variable reference or a constant reference  }
+          AS_REGISTER:
+            Begin
+              { save the type of register used. }
+              tempreg:=actasmregister;
+              Consume(AS_REGISTER);
+              if (actasmtoken in [AS_end,AS_SEPARATOR,AS_COMMA]) then
+                Begin
+                  if not (oper.opr.typ in [OPR_NONE,OPR_REGISTER]) then
+                    Message(asmr_e_invalid_operand_type);
+                  oper.opr.typ:=OPR_REGISTER;
+                  oper.opr.reg:=tempreg;
+                end
+              else
+                Message(asmr_e_syn_operand);
+            end;
+
+          AS_end,
+          AS_SEPARATOR,
+          AS_COMMA: ;
+        else
+          Begin
+            Message(asmr_e_syn_operand);
+            Consume(actasmtoken);
+          end;
+        end; { end case }
+      end;
+
+{*****************************************************************************
+                                taarch64attreader
+*****************************************************************************}
+
+    procedure taarch64attreader.BuildOpCode(instr: taarch64instruction);
+      var
+        operandnum : longint;
+      Begin
+        { opcode }
+        if (actasmtoken<>AS_OPCODE) then
+         Begin
+           Message(asmr_e_invalid_or_missing_opcode);
+           RecoverConsume(true);
+           exit;
+         end;
+        { Fill the instr object with the current state }
+        with instr do
+          begin
+            Opcode:=ActOpcode;
+            condition:=ActCondition;
+            oppostfix:=actoppostfix;
+          end;
+        Consume(AS_OPCODE);
+
+        { We are reading operands, so opcode will be an AS_ID }
+        operandnum:=1;
+        { Zero operand opcode ?  }
+        if actasmtoken in [AS_SEPARATOR,AS_end] then
+         begin
+           instr.Ops:=0;
+           exit;
+         end;
+        { Read the operands }
+        repeat
+          case actasmtoken of
+            AS_COMMA: { Operand delimiter }
+              Begin
+                { operandnum and not operandnum+1, because tinstruction is
+                  one-based and taicpu is zero-based)
+                }
+                if can_be_shifter_operand(instr.opcode,operandnum) then
+                  begin
+                    Consume(AS_COMMA);
+                    if not TryBuildShifterOp(instr,operandnum+1) then
+                      Message(asmr_e_illegal_shifterop_syntax);
+                    Inc(operandnum);
+                  end
+                else
+                  begin
+                    if operandnum>Max_Operands then
+                      Message(asmr_e_too_many_operands)
+                    else
+                      Inc(operandnum);
+                    Consume(AS_COMMA);
+                  end;
+              end;
+            AS_SEPARATOR,
+            AS_end : { End of asm operands for this opcode  }
+              begin
+                break;
+              end;
+          else
+            begin
+              BuildOperand(taarch64operand(instr.operands[operandnum]),instr.Is64bit);
+              instr.Ops:=operandnum;
+              if instr.operands[operandnum].opr.typ=OPR_REFERENCE then
+                if simple_ref_type(instr.opcode,instr.cgsize,instr.oppostfix,instr.operands[operandnum].opr.ref)<>sr_simple then
+                  Message(asmr_e_invalid_reference_syntax);
+                ;
+            end;
+          end; { end case }
+        until false;
+      end;
+
+
+    function taarch64attreader.is_asmopcode(const s: string):boolean;
+
+      const
+        { sorted by length so longer postfixes will match first }
+        postfix2strsorted : array[1..7] of string[3] = (
+          'SB','SH','SW',
+          'B','H','W',
+          'S');
+
+        postfixsorted : array[1..7] of TOpPostfix = (
+          PF_SB,PF_SH,PF_SW,
+          PF_B,PF_H,PF_W,
+          PF_S);
+
+      var
+        j  : longint;
+        hs : string;
+        maxlen : longint;
+        icond : tasmcond;
+      Begin
+        { making s a value parameter would break other assembler readers }
+        hs:=s;
+        is_asmopcode:=false;
+
+        { clear opcode }
+        actopcode:=A_None;
+        actcondition:=C_None;
+
+        { b.cond ? }
+        if (length(hs)=4) and
+           (hs[1]='B') and
+           (hs[2]='.') then
+          begin
+            actopcode:=A_B;
+            actasmtoken:=AS_OPCODE;
+            actcondition:=ToConditionCode(copy(hs,3,length(actasmpattern)-2),false);
+            if actcondition<>C_None then
+              is_asmopcode:=true;
+            exit;
+          end;
+
+        maxlen:=max(length(hs),7);
+        actopcode:=A_NONE;
+        for j:=maxlen downto 1 do
+          begin
+            actopcode:=tasmop(PtrUInt(iasmops.Find(copy(hs,1,j))));
+            if actopcode<>A_NONE then
+              begin
+                actasmtoken:=AS_OPCODE;
+                { strip op code }
+                delete(hs,1,j);
+                break;
+              end;
+          end;
+        if actopcode=A_NONE then
+          exit;
+
+        { check for postfix }
+        if length(hs)>0 then
+          begin
+            for j:=low(postfixsorted) to high(postfixsorted) do
+              begin
+                if copy(hs,1,length(postfix2strsorted[j]))=postfix2strsorted[j] then
+                  begin
+                    actoppostfix:=postfixsorted[j];
+                    { strip postfix }
+                    delete(hs,1,length(postfix2strsorted[j]));
+                    break;
+                  end;
+              end;
+          end;
+        { if we stripped all postfixes, it's a valid opcode }
+        is_asmopcode:=length(hs)=0;
+      end;
+
+
+    procedure taarch64attreader.ConvertCalljmp(instr: taarch64instruction);
+      var
+        newopr : toprrec;
+      begin
+        if instr.Operands[1].opr.typ=OPR_REFERENCE then
+          begin
+            newopr.typ:=OPR_SYMBOL;
+            newopr.symbol:=instr.Operands[1].opr.ref.symbol;
+            newopr.symofs:=instr.Operands[1].opr.ref.offset;
+            if (instr.Operands[1].opr.ref.base<>NR_NO) or
+              (instr.Operands[1].opr.ref.index<>NR_NO) or
+              (instr.Operands[1].opr.ref.refaddr<>addr_pic) then
+              Message(asmr_e_syn_operand);
+            instr.Operands[1].opr:=newopr;
+          end;
+      end;
+
+    procedure taarch64attreader.handleopcode;
+      var
+        instr: taarch64instruction;
+      begin
+        instr:=taarch64instruction.Create(taarch64operand);
+        BuildOpcode(instr);
+        if is_calljmp(instr.opcode) then
+          ConvertCalljmp(instr);
+        {
+        instr.AddReferenceSizes;
+        instr.SetInstructionOpsize;
+        instr.CheckOperandSizes;
+        }
+        instr.ConcatInstruction(curlist);
+        instr.Free;
+        actoppostfix:=PF_None;
+      end;
+
+
+{*****************************************************************************
+                                     Initialize
+*****************************************************************************}
+
+const
+  asmmode_arm_att_info : tasmmodeinfo =
+          (
+            id    : asmmode_arm_gas;
+            idtxt : 'GAS';
+            casmreader : taarch64attreader;
+          );
+
+  asmmode_arm_standard_info : tasmmodeinfo =
+          (
+            id    : asmmode_standard;
+            idtxt : 'STANDARD';
+            casmreader : taarch64attreader;
+          );
+
+initialization
+  RegisterAsmMode(asmmode_arm_att_info);
+  RegisterAsmMode(asmmode_arm_standard_info);
+end.

+ 171 - 0
compiler/aarch64/rgcpu.pas

@@ -0,0 +1,171 @@
+{
+    Copyright (c) 1998-2002 by Florian Klaempfl
+
+    This unit implements the SPARC specific class for the register
+    allocator
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************}
+unit rgcpu;
+
+{$i fpcdefs.inc}
+
+  interface
+
+    uses
+      aasmbase,aasmcpu,aasmtai,aasmdata,
+      cgbase,cgutils,
+      cpubase,
+      globtype,
+      rgobj;
+
+    type
+      trgcpu=class(trgobj)
+        procedure do_spill_read(list:TAsmList;pos:tai;const spilltemp:treference;tempreg:tregister);override;
+        procedure do_spill_written(list:TAsmList;pos:tai;const spilltemp:treference;tempreg:tregister);override;
+       protected
+        procedure do_spill_op(list: tasmlist; op: tasmop; pos: tai; const spilltemp: treference; tempreg: tregister);
+      end;
+
+      trgintcpu=class(trgcpu)
+        procedure add_cpu_interferences(p: tai); override;
+      end;
+
+
+implementation
+
+    uses
+      verbose,cutils,
+      cgobj;
+
+    procedure trgcpu.do_spill_read(list:TAsmList;pos:tai;const spilltemp:treference;tempreg:tregister);
+      begin
+        do_spill_op(list,A_LDR,pos,spilltemp,tempreg);
+      end;
+
+
+    procedure trgcpu.do_spill_written(list:TAsmList;pos:tai;const spilltemp:treference;tempreg:tregister);
+      begin
+        do_spill_op(list,A_STR,pos,spilltemp,tempreg);
+      end;
+
+
+    procedure trgcpu.do_spill_op(list: tasmlist; op: tasmop; pos: tai; const spilltemp: treference; tempreg: tregister);
+      var
+        helpins  : tai;
+        tmpref   : treference;
+        helplist : TAsmList;
+        hreg     : tregister;
+        isload   : boolean;
+      begin
+        isload:=op=A_LDR;
+        { offset out of range for regular load/store? }
+        if simple_ref_type(op,reg_cgsize(tempreg),PF_None,spilltemp)<>sr_simple then
+          begin
+            helplist:=TAsmList.create;
+
+            if getregtype(tempreg)=R_INTREGISTER then
+              hreg:=tempreg
+            else
+              hreg:=cg.getaddressregister(helplist);
+
+            cg.a_load_const_reg(helplist,OS_ADDR,spilltemp.offset,hreg);
+            reference_reset_base(tmpref,spilltemp.base,0,sizeof(pint));
+            tmpref.index:=hreg;
+            if isload then
+              helpins:=spilling_create_load(tmpref,tempreg)
+            else
+              helpins:=spilling_create_store(tempreg,tmpref);
+            helplist.concat(helpins);
+            add_cpu_interferences(helpins);
+            list.insertlistafter(pos,helplist);
+            helplist.free;
+          end
+        else if isload then
+          inherited do_spill_read(list,pos,spilltemp,tempreg)
+        else
+          inherited do_spill_written(list,pos,spilltemp,tempreg)
+      end;
+
+
+    procedure trgintcpu.add_cpu_interferences(p: tai);
+     var
+       i, j: longint;
+     begin
+       if p.typ=ait_instruction then
+         begin
+           { add interferences for instructions that can have SP as a register
+             operand }
+           case taicpu(p).opcode of
+             A_MOV:
+               { all operands can be SP }
+               exit;
+             A_ADD,
+             A_SUB,
+             A_CMP,
+             A_CMN:
+               { ok as destination or first source in immediate or extended
+                 register form }
+               if (taicpu(p).oper[taicpu(p).ops-1]^.typ<>top_shifterop) or
+                  valid_shifter_operand(taicpu(p).opcode,false,true,
+                    reg_cgsize(taicpu(p).oper[0]^.reg) in [OS_64,OS_S64],
+                    taicpu(p).oper[taicpu(p).ops-1]^.shifterop^.shiftmode,
+                    taicpu(p).oper[taicpu(p).ops-1]^.shifterop^.shiftimm) then
+                 begin
+                   if taicpu(p).oper[taicpu(p).ops-1]^.typ=top_shifterop then
+                     i:=taicpu(p).ops-2
+                   else
+                     i:=taicpu(p).ops-1;
+                   if (taicpu(p).oper[i]^.typ=top_reg) then
+                     add_edge(getsupreg(taicpu(p).oper[i]^.reg),RS_SP);
+                   exit;
+                 end;
+             A_AND,
+             A_EOR,
+             A_ORR,
+             A_TST:
+               { ok in immediate form }
+               if taicpu(p).oper[taicpu(p).ops-1]^.typ=top_const then
+                 exit;
+           end;
+           { add interferences for other registers }
+           for i:=0 to taicpu(p).ops-1 do
+             begin
+               case taicpu(p).oper[i]^.typ of
+                 top_reg:
+                   if getregtype(taicpu(p).oper[i]^.reg)=R_INTREGISTER then
+                     add_edge(getsupreg(taicpu(p).oper[i]^.reg),RS_SP);
+                 top_ref:
+                   begin
+                     { sp can always be base, never be index }
+                     if taicpu(p).oper[i]^.ref^.index<>NR_NO then
+                       add_edge(getsupreg(taicpu(p).oper[i]^.ref^.index),RS_SP);
+                     { in case of write back, the base register must be
+                       different from the loaded/stored register }
+                     if (taicpu(p).oper[i]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
+                        (taicpu(p).oper[i]^.ref^.base<>NR_NO) then
+                       begin
+                         for j:=pred(i) downto 0 do
+                           if taicpu(p).oper[j]^.typ=TOP_REG then
+                             add_edge(getsupreg(taicpu(p).oper[j]^.reg),getsupreg(taicpu(p).oper[i]^.ref^.base));
+                       end;
+                   end;
+               end;
+             end;
+         end;
+     end;
+
+end.

+ 4 - 2
compiler/aasmbase.pas

@@ -62,6 +62,8 @@ interface
 
     const
        asmlabeltypeprefix : array[TAsmLabeltype] of char = ('j','a','d','l','f','t','c');
+       asmsymbindname : array[TAsmsymbind] of string = ('none', 'external','common',
+       'local','global','weak external','private external','lazy','import');
 
     type
        TAsmSectiontype=(sec_none,
@@ -197,7 +199,7 @@ interface
     function EncodeUleb128(a: qword;out buf) : byte;
     function EncodeSleb128(a: int64;out buf) : byte;
 
-    function ReplaceForbiddenAsmSymbolChars(const s: string): string;
+    function ReplaceForbiddenAsmSymbolChars(const s: ansistring): ansistring;
 
     { dummy default noop callback }
     procedure default_global_used;
@@ -348,7 +350,7 @@ implementation
       end;
 
 
-    function ReplaceForbiddenAsmSymbolChars(const s: string): string;
+    function ReplaceForbiddenAsmSymbolChars(const s: ansistring): ansistring;
       var
         i : longint;
         rchar: char;

+ 15 - 16
compiler/aasmdata.pas

@@ -122,8 +122,6 @@ interface
     type
       TAsmList = class(tlinkedlist)
          constructor create;
-         constructor create_without_marker;
-         function  empty : boolean;
          function  getlasttaifilepos : pfileposinfo;
       end;
 
@@ -284,20 +282,6 @@ implementation
     constructor TAsmList.create;
       begin
         inherited create;
-        { make sure the optimizer won't remove the first tai of this list}
-        insert(tai_marker.create(mark_BlockStart));
-      end;
-
-    constructor TAsmList.create_without_marker;
-      begin
-        inherited create;
-      end;
-
-    function TAsmList.empty : boolean;
-      begin
-        { there is always a mark_BlockStart available,
-          see TAsmList.create }
-        result:=(count<=1);
       end;
 
 
@@ -422,6 +406,21 @@ implementation
                  internalerror(200603261);
              end;
            hp.typ:=_typ;
+           { Changing bind from AB_GLOBAL to AB_LOCAL is wrong
+             if bind is already AB_GLOBAL or AB_EXTERNAL,
+             GOT might have been used, so change might be harmful. }
+           if (_bind<>hp.bind) and (hp.getrefs>0) then
+             begin
+{$ifdef extdebug}
+               { the changes that matter must become internalerrors, the rest
+                 should be ignored; a used cannot change anything about this,
+                 so printing a warning/hint is not useful }
+               if (_bind=AB_LOCAL) then
+                 Message3(asmw_w_changing_bind_type,s,asmsymbindname[hp.bind],asmsymbindname[_bind])
+               else
+                 Message3(asmw_h_changing_bind_type,s,asmsymbindname[hp.bind],asmsymbindname[_bind]);
+{$endif extdebug}
+             end;
            hp.bind:=_bind;
          end
         else

+ 34 - 9
compiler/aasmtai.pas

@@ -221,11 +221,11 @@ interface
 {$ifdef arm}
        { ARM only }
        ,top_regset
-       ,top_conditioncode
        ,top_modeflags
        ,top_specialreg
 {$endif arm}
 {$if defined(arm) or defined(aarch64)}
+       ,top_conditioncode
        ,top_shifterop
 {$endif defined(arm) or defined(aarch64)}
 {$ifdef m68k}
@@ -268,15 +268,15 @@ interface
           top_local  : (localoper:plocaloper);
       {$ifdef arm}
           top_regset : (regset:^tcpuregisterset; regtyp: tregistertype; subreg: tsubregister; usermode: boolean);
-          top_conditioncode : (cc : TAsmCond);
           top_modeflags : (modeflags : tcpumodeflags);
           top_specialreg : (specialreg:tregister; specialflags:tspecialregflags);
       {$endif arm}
       {$if defined(arm) or defined(aarch64)}
           top_shifterop : (shifterop : pshifterop);
+          top_conditioncode : (cc : TAsmCond);
       {$endif defined(arm) or defined(aarch64)}
       {$ifdef m68k}
-          top_regset : (dataregset,addrregset:^tcpuregisterset);
+          top_regset : (dataregset,addrregset,fpuregset:^tcpuregisterset);
       {$endif m68k}
       {$ifdef jvm}
           top_single : (sval:single);
@@ -356,7 +356,9 @@ interface
         { for Jasmin }
         asd_jclass,asd_jinterface,asd_jsuper,asd_jfield,asd_jlimit,asd_jline,
         { .ent/.end for MIPS and Alpha }
-        asd_ent,asd_ent_end
+        asd_ent,asd_ent_end,
+        { supported by recent clang-based assemblers for data-in-code  }
+        asd_data_region, asd_end_data_region
       );
 
       TAsmSehDirective=(
@@ -385,7 +387,9 @@ interface
         { for Jasmin }
         'class','interface','super','field','limit','line',
         { .ent/.end for MIPS and Alpha }
-        'ent','end'
+        'ent','end',
+        { supported by recent clang-based assemblers for data-in-code }
+        'data_region','end_data_region'
       );
       sehdirectivestr : array[TAsmSehDirective] of string[16]=(
         '.seh_proc','.seh_endproc',
@@ -579,6 +583,8 @@ interface
           constructor Create_type_name(_typ:taiconst_type;const name:string;_symtyp:Tasmsymtype;ofs:aint);
           constructor Create_nil_codeptr;
           constructor Create_nil_dataptr;
+          constructor Create_int_codeptr(_value: int64);
+          constructor Create_int_dataptr(_value: int64);
           constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
           procedure ppuwrite(ppufile:tcompilerppufile);override;
           procedure derefimpl;override;
@@ -1747,6 +1753,18 @@ implementation
 
 
     constructor tai_const.Create_nil_codeptr;
+      begin
+        self.Create_int_codeptr(0);
+      end;
+
+
+    constructor tai_const.Create_nil_dataptr;
+      begin
+        self.Create_int_dataptr(0);
+      end;
+
+
+    constructor tai_const.Create_int_codeptr(_value: int64);
       begin
         inherited Create;
         typ:=ait_const;
@@ -1759,11 +1777,11 @@ implementation
         sym:=nil;
         endsym:=nil;
         symofs:=0;
-        value:=0;
+        value:=_value;
       end;
 
 
-    constructor tai_const.Create_nil_dataptr;
+    constructor tai_const.Create_int_dataptr(_value: int64);
       begin
         inherited Create;
         typ:=ait_const;
@@ -1776,7 +1794,7 @@ implementation
         sym:=nil;
         endsym:=nil;
         symofs:=0;
-        value:=0;
+        value:=_value;
       end;
 
 
@@ -2173,7 +2191,10 @@ implementation
          typ:=ait_stab;
          stabtype:=_stabtype;
          getmem(str,length(s)+1);
-         move(s[1],str^,length(s)+1);
+         if length(s)>0 then
+           move(s[1],str^,length(s)+1)
+         else
+           str^:=#0;
       end;
 
     destructor tai_stab.destroy;
@@ -2535,6 +2556,9 @@ implementation
 {$ifdef ARM}
               and not(r.base=NR_R15)
 {$endif ARM}
+{$ifdef aarch64}
+              and not(r.refaddr in [addr_full,addr_gotpageoffset,addr_gotpage])
+{$endif aarch64}
               then
               internalerror(200502052);
             typ:=top_ref;
@@ -2636,6 +2660,7 @@ implementation
                 begin
                   dispose(dataregset);
                   dispose(addrregset);
+                  dispose(fpuregset);
                 end;
 {$endif m68k}
 {$ifdef jvm}

+ 16 - 3
compiler/aggas.pas

@@ -532,6 +532,8 @@ implementation
          system_powerpc64_darwin,
          system_x86_64_darwin,
          system_arm_darwin,
+         system_aarch64_darwin,
+         system_x86_64_iphonesim,
          system_powerpc_aix,
          system_powerpc64_aix:
            begin
@@ -567,7 +569,8 @@ implementation
                     AsmWriteln('__TEXT,__picsymbolstub4,symbol_stubs,none,16')
                   else
                     AsmWriteln('__TEXT,__symbol_stub4,symbol_stubs,none,12')
-                { darwin/x86-64 uses RIP-based GOT addressing, no symbol stubs }
+                { darwin/(x86-64/AArch64) uses PC-based GOT addressing, no
+                  explicit symbol stubs }
                 else
                   internalerror(2006031101);
               end;
@@ -1244,6 +1247,11 @@ implementation
                     end;
                   if tai_label(hp).labsym.bind in [AB_GLOBAL,AB_PRIVATE_EXTERN] then
                    begin
+{$ifdef arm}
+                     { do no change arm mode accidently, .globl seems to reset the mode }
+                     if GenerateThumbCode or GenerateThumb2Code then
+                       AsmWriteln(#9'.thumb_func'#9);
+{$endif arm}
                      AsmWrite('.globl'#9);
                      if replaceforbidden then
                        AsmWriteLn(ReplaceForbiddenAsmSymbolChars(tai_label(hp).labsym.name))
@@ -1447,7 +1455,12 @@ implementation
              begin
                WriteDirectiveName(tai_directive(hp).directive);
                if tai_directive(hp).name <>'' then
-                 AsmWrite(tai_directive(hp).name);
+                 begin
+                   if replaceforbidden then
+                     AsmWrite(ReplaceForbiddenAsmSymbolChars(tai_directive(hp).name))
+                   else
+                     AsmWrite(tai_directive(hp).name);
+                 end;
                AsmLn;
              end;
 
@@ -1713,7 +1726,7 @@ implementation
 
       { "no executable stack" marker }
       { TODO: used by OpenBSD/NetBSD as well? }
-      if (target_info.system in (systems_linux + systems_android + systems_freebsd)) and
+      if (target_info.system in (systems_linux + systems_android + systems_freebsd + systems_dragonfly)) and
          not(cs_executable_stack in current_settings.moduleswitches) then
         begin
           AsmWriteLn('.section .note.GNU-stack,"",%progbits');

+ 4 - 0
compiler/agjasmin.pas

@@ -526,6 +526,8 @@ implementation
         i: longint;
         toplevelowner: tsymtable;
       begin
+        superclass:=nil;
+
         { JVM 1.5+ }
         AsmWriteLn('.bytecode 49.0');
         // include files are not support by Java, and the directory of the main
@@ -763,6 +765,8 @@ implementation
               2:result:=tostr(smallint(csym.value.valueord.svalue));
               4:result:=tostr(longint(csym.value.valueord.svalue));
               8:result:=tostr(csym.value.valueord.svalue);
+              else
+                internalerror(2014082050);
             end;
           conststring:
             result:=constastr(pchar(csym.value.valueptr),csym.value.len);

+ 23 - 0
compiler/alpha/cpuinfo.pas

@@ -21,6 +21,9 @@ Unit CPUInfo;
 
 Interface
 
+uses
+ globtype;
+
 Type
    { Natural integer register type and size for the target machine }
 {$ifdef FPC}
@@ -38,6 +41,9 @@ Type
    TConstPtrUInt = qword;
 
    bestreal = extended;
+{$if FPC_FULLVERSION>20700}
+   bestrealrec = TExtended80Rec;
+{$endif FPC_FULLVERSION>20700}
    ts32real = single;
    ts64real = double;
    ts80real = extended;
@@ -52,7 +58,15 @@ Type
        ClassEV8
       );
 
+   tcontrollertype =
+     (ct_none
+     );
+
+
 Const
+   { Is there support for dealing with multiple microcontrollers available }
+   { for this platform? }
+   ControllerSupport = false;
    { Size of native extended type }
    extended_size = 16;
    {# Size of a pointer                           }
@@ -60,6 +74,15 @@ Const
    {# Size of a multimedia register               }
    mmreg_size = 8;
 
+   { We know that there are fields after sramsize
+     but we don't care about this warning }
+   {$PUSH}
+    {$WARN 3177 OFF}
+   embedded_controllers : array [tcontrollertype] of tcontrollerdatatype =
+   (
+      (controllertypestr:''; controllerunitstr:''; flashbase:0; flashsize:0; srambase:0; sramsize:0));
+   {$POP}
+
    { target cpu string (used by compiler options) }
    target_cpu_string = 'alpha';
 

+ 22 - 5
compiler/aoptobj.pas

@@ -361,7 +361,7 @@ Unit AoptObj;
 
     function JumpTargetOp(ai: taicpu): poper; inline;
       begin
-{$ifdef MIPS}
+{$if defined(MIPS)}
         { MIPS branches can have 1,2 or 3 operands, target label is the last one. }
         result:=ai.oper[ai.ops-1];
 {$else MIPS}
@@ -1041,6 +1041,9 @@ Unit AoptObj;
         Repeat
           While Assigned(StartPai) And
                 ((StartPai.typ in (SkipInstr - [ait_regAlloc])) Or
+{$if defined(MIPS) or defined(SPARC)}
+                ((startpai.typ=ait_instruction) and (taicpu(startpai).opcode=A_NOP)) or
+{$endif MIPS or SPARC}
                  ((StartPai.typ = ait_label) and
                   Not(Tai_Label(StartPai).labsym.Is_Used))) Do
             StartPai := Tai(StartPai.Next);
@@ -1176,9 +1179,9 @@ Unit AoptObj;
     function IsJumpToLabel(hp: taicpu): boolean;
       begin
         result:=(hp.opcode=aopt_uncondjmp) and
-{$ifdef arm}
+{$if defined(arm) or defined(aarch64)}
           (hp.condition=c_None) and
-{$endif arm}
+{$endif arm or aarch64}
           (JumpTargetOp(hp)^.typ = top_ref) and
           (JumpTargetOp(hp)^.ref^.symbol is TAsmLabel);
       end;
@@ -1257,6 +1260,14 @@ Unit AoptObj;
                     exit;
                   if not GetFinalDestination(taicpu(p1),succ(level)) then
                     exit;
+{$if defined(aarch64)}
+                  { can't have conditional branches to
+                    global labels on AArch64, because the
+                    offset may become too big }
+                  if not(taicpu(hp).condition in [C_None,C_AL,C_NV]) and
+                     (tasmlabel(JumpTargetOp(taicpu(p1))^.ref^.symbol).bind<>AB_LOCAL) then
+                    exit;
+{$endif aarch64}
                   tasmlabel(JumpTargetOp(hp)^.ref^.symbol).decrefs;
                   JumpTargetOp(hp)^.ref^.symbol:=JumpTargetOp(taicpu(p1))^.ref^.symbol;
                   tasmlabel(JumpTargetOp(hp)^.ref^.symbol).increfs;
@@ -1395,9 +1406,15 @@ Unit AoptObj;
                                     FindLabel(tasmlabel(JumpTargetOp(taicpu(p))^.ref^.symbol), hp2) then
                                   begin
                                     if (taicpu(p).opcode=aopt_condjmp)
-  {$ifdef arm}
+  {$if defined(arm) or defined(aarch64)}
                                       and (taicpu(p).condition<>C_None)
-  {$endif arm}
+  {$endif arm or aarch64}
+  {$if defined(aarch64)}
+                                      { can't have conditional branches to
+                                        global labels on AArch64, because the
+                                        offset may become too big }
+                                      and (tasmlabel(JumpTargetOp(taicpu(hp1))^.ref^.symbol).bind=AB_LOCAL)
+  {$endif aarch64}
                                     then
                                       begin
                                         taicpu(p).condition:=inverse_cond(taicpu(p).condition);

+ 294 - 21
compiler/arm/aasmcpu.pas

@@ -212,7 +212,7 @@ uses
          function is_same_reg_move(regtype: Tregistertype):boolean; override;
 
          function spilling_get_operation_type(opnr: longint): topertype;override;
-
+         function spilling_get_operation_type_ref(opnr: longint; reg: tregister): topertype;override;
          { assembler }
       public
          { the next will reset all instructions that can change in pass 2 }
@@ -777,6 +777,15 @@ implementation
       end;
 
 
+    function taicpu.spilling_get_operation_type_ref(opnr: longint; reg: tregister): topertype;
+      begin
+        result := operand_read;
+        if (oper[opnr]^.ref^.base = reg) and
+          (oper[opnr]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) then
+           result := operand_readwrite;
+      end;
+
+
     procedure BuildInsTabCache;
       var
         i : longint;
@@ -1055,15 +1064,16 @@ implementation
               (tai(hp).typ=ait_instruction) then
               begin
                 case taicpu(hp).opcode of
-                  A_BX,
+                  A_MOV,
                   A_LDR,
                   A_ADD:
                     { approximation if we hit a case jump table }
                     if ((taicpu(hp).opcode in [A_ADD,A_LDR]) and not(GenerateThumbCode or GenerateThumb2Code) and
                        (taicpu(hp).oper[0]^.typ=top_reg) and
                       (taicpu(hp).oper[0]^.reg=NR_PC)) or
-                      ((taicpu(hp).opcode=A_BX) and (GenerateThumbCode) and
-                       (taicpu(hp).oper[0]^.typ=top_reg))
+                      ((taicpu(hp).opcode=A_MOV) and (GenerateThumbCode) and
+                       (taicpu(hp).oper[0]^.typ=top_reg) and
+                       (taicpu(hp).oper[0]^.reg=NR_PC))
                        then
                       begin
                         penalty:=multiplier;
@@ -1586,6 +1596,12 @@ implementation
                      s:=s+' am2 ';
                  end
                else
+                 if (ot and OT_SHIFTEROP)=OT_SHIFTEROP then
+                  begin
+                    s:=s+'shifterop';
+                    addsize:=false;
+                  end
+                else
                  s:=s+'???';
                { size }
                if addsize then
@@ -1857,8 +1873,10 @@ implementation
                 begin
                   ot:=OT_SHIFTEROP;
                 end;
+              top_conditioncode:
+                ot:=OT_CONDITION;
               else
-                internalerror(200402261);
+                internalerror(2004022623);
             end;
           end;
       end;
@@ -1890,7 +1908,6 @@ implementation
         {siz : array[0..3] of longint;}
       begin
         Matches:=100;
-        writeln(getstring,'---');
 
         { Check the opcode and operands }
         if (p^.opcode<>opcode) or (p^.ops<>ops) then
@@ -1932,7 +1949,7 @@ implementation
         { update condition flags
           or floating point single }
       if (oppostfix=PF_S) and
-        not(p^.code[0] in [#$04]) then
+        not(p^.code[0] in [#$04..#$0B]) then
         begin
           Matches:=0;
           exit;
@@ -2103,61 +2120,317 @@ implementation
 
 
     procedure taicpu.gencode(objdata:TObjData);
+      const
+        CondVal : array[TAsmCond] of byte=(
+         $E, $0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $A,
+         $B, $C, $D, $E, 0);
       var
         bytes : dword;
         i_field : byte;
+        currsym : TObjSymbol;
+        offset : longint;
 
       procedure setshifterop(op : byte);
+        var
+          r : byte;
+          imm : dword;
         begin
           case oper[op]^.typ of
             top_const:
               begin
                 i_field:=1;
-                bytes:=bytes or dword(oper[op]^.val and $fff);
+                if oper[op]^.val and $ff=oper[op]^.val then
+                  bytes:=bytes or dword(oper[op]^.val)
+                else
+                  begin
+                    { calc rotate and adjust imm }
+                    r:=0;
+                    imm:=dword(oper[op]^.val);
+                    repeat
+                      imm:=RolDWord(imm, 2);
+                      inc(r)
+                    until imm and $ff=imm;
+                    bytes:=bytes or (r shl 8) or imm;
+                  end;
               end;
             top_reg:
               begin
                 i_field:=0;
-                bytes:=bytes or (getsupreg(oper[op]^.reg) shl 16);
+                bytes:=bytes or getsupreg(oper[op]^.reg);
 
                 { does a real shifter op follow? }
-                if (op+1<=op) and (oper[op+1]^.typ=top_shifterop) then
-                  begin
-                  end;
+                if (op+1<opercnt) and (oper[op+1]^.typ=top_shifterop) then
+                  with oper[op+1]^.shifterop^ do
+                    begin
+                      bytes:=bytes or (shiftimm shl 7);
+                      if shiftmode<>SM_RRX then
+                        bytes:=bytes or (ord(shiftmode) - ord(SM_LSL)) shl 5
+                      else
+                        bytes:=bytes or (3 shl 5);
+                      if getregtype(rs) <> R_INVALIDREGISTER then
+                        begin
+                          bytes:=bytes or (1 shl 4);
+                          bytes:=bytes or (getsupreg(rs) shl 8);
+                        end
+                    end;
               end;
           else
             internalerror(2005091103);
           end;
         end;
 
+      function MakeRegList(reglist: tcpuregisterset): word;
+        var
+          i, w: word;
+        begin
+          result:=0;
+          w:=1;
+          for i:=RS_R0 to RS_R15 do
+            begin
+              if i in reglist then
+                result:=result or w;
+              w:=w shl 1
+            end;
+        end;
+
       begin
         bytes:=$0;
         i_field:=0;
         { evaluate and set condition code }
+        bytes:=bytes or (CondVal[condition] shl 28);
 
         { condition code allowed? }
 
         { setup rest of the instruction }
         case insentry^.code[0] of
-          #$08:
+          #$01: // B/BL
             begin
               { set instruction code }
-              bytes:=bytes or (ord(insentry^.code[1]) shl 26);
-              bytes:=bytes or (ord(insentry^.code[2]) shl 21);
-
+              bytes:=bytes or (ord(insentry^.code[1]) shl 24);
+              { set offset }
+              currsym:=objdata.symbolref(oper[0]^.ref^.symbol);
+              if (currsym.bind<>AB_LOCAL) and (currsym.objsection<>objdata.CurrObjSec) then
+                objdata.writereloc(oper[0]^.ref^.offset,0,currsym,RELOC_RELATIVE_24)
+              else
+                bytes:=bytes or (((currsym.offset-insoffset-8) shr 2) and $ffffff);
+            end;
+          #$04..#$07: // SUB
+            begin
+              { set instruction code }
+              bytes:=bytes or (ord(insentry^.code[1]) shl 24);
+              bytes:=bytes or (ord(insentry^.code[2]) shl 16);
+              { set destination }
+              bytes:=bytes or (getsupreg(oper[0]^.reg) shl 12);
+              { set Rn }
+              bytes:=bytes or (getsupreg(oper[1]^.reg) shl 16);
+              { create shifter op }
+              setshifterop(2);
+              { set I field }
+              bytes:=bytes or (i_field shl 25);
+              { set S if necessary }
+              if oppostfix=PF_S then
+                bytes:=bytes or (1 shl 20);
+            end;
+          #$08,#$0A,#$0B: // MOV
+            begin
+              { set instruction code }
+              bytes:=bytes or (ord(insentry^.code[1]) shl 24);
+              bytes:=bytes or (ord(insentry^.code[2]) shl 16);
               { set destination }
               bytes:=bytes or (getsupreg(oper[0]^.reg) shl 12);
-
               { create shifter op }
               setshifterop(1);
-
-              { set i field }
+              { set I field }
               bytes:=bytes or (i_field shl 25);
-
-              { set s if necessary }
+              { set S if necessary }
               if oppostfix=PF_S then
                 bytes:=bytes or (1 shl 20);
             end;
+          #$0C,#$0E,#$0F: // CMP
+            begin
+              { set instruction code }
+              bytes:=bytes or (ord(insentry^.code[1]) shl 24);
+              bytes:=bytes or (ord(insentry^.code[2]) shl 16);
+              { set destination }
+              bytes:=bytes or (getsupreg(oper[0]^.reg) shl 16);
+              { create shifter op }
+              setshifterop(1);
+              { set I field }
+              bytes:=bytes or (i_field shl 25);
+              { always set S bit }
+              bytes:=bytes or (1 shl 20);
+            end;
+          #$14: // MUL/MLA r1,r2,r3
+            begin
+              { set instruction code }
+              bytes:=bytes or ord(insentry^.code[1]) shl 24;
+              bytes:=bytes or ord(insentry^.code[2]) shl 16;
+              bytes:=bytes or ord(insentry^.code[3]);
+              { set regs }
+              bytes:=bytes or getsupreg(oper[0]^.reg) shl 16;
+              bytes:=bytes or getsupreg(oper[1]^.reg);
+              bytes:=bytes or getsupreg(oper[2]^.reg) shl 8;
+            end;
+          #$15: // MUL/MLA r1,r2,r3,r4
+            begin
+              { set instruction code }
+              bytes:=bytes or ord(insentry^.code[1]) shl 24;
+              bytes:=bytes or ord(insentry^.code[2]) shl 16;
+              bytes:=bytes or ord(insentry^.code[3]);
+              { set regs }
+              bytes:=bytes or getsupreg(oper[0]^.reg) shl 16;
+              bytes:=bytes or getsupreg(oper[1]^.reg);
+              bytes:=bytes or getsupreg(oper[2]^.reg) shl 8;
+              bytes:=bytes or getsupreg(oper[3]^.reg) shl 12;
+            end;
+          #$16: // MULL r1,r2,r3,r4
+            begin
+              { set instruction code }
+              bytes:=bytes or ord(insentry^.code[1]) shl 24;
+              bytes:=bytes or ord(insentry^.code[2]) shl 16;
+              bytes:=bytes or ord(insentry^.code[3]);
+              { set regs }
+              bytes:=bytes or getsupreg(oper[0]^.reg) shl 12;
+              bytes:=bytes or getsupreg(oper[1]^.reg) shl 16;
+              bytes:=bytes or getsupreg(oper[2]^.reg);
+              bytes:=bytes or getsupreg(oper[3]^.reg) shl 8;
+            end;
+          #$17: // LDR/STR
+            begin
+              { set instruction code }
+              bytes:=bytes or (ord(insentry^.code[1]) shl 24);
+              bytes:=bytes or (ord(insentry^.code[2]) shl 16);
+              { set Rn and Rd }
+              bytes:=bytes or getsupreg(oper[0]^.reg) shl 12;
+              bytes:=bytes or getsupreg(oper[1]^.ref^.base) shl 16;
+              if getregtype(oper[1]^.ref^.index)=R_INVALIDREGISTER then
+                begin
+                  { set offset }
+                  offset:=0;
+                  currsym:=objdata.symbolref(oper[1]^.ref^.symbol);
+                  if assigned(currsym) then
+                    offset:=currsym.offset-insoffset-8;
+                  offset:=offset+oper[1]^.ref^.offset;
+                  if offset>=0 then
+                    begin
+                      { set U flag }
+                      bytes:=bytes or (1 shl 23);
+                      bytes:=bytes or offset
+                    end
+                  else
+                    begin
+                      offset:=-offset;
+                      bytes:=bytes or offset
+                    end;
+                end
+              else
+                begin
+                  { set U flag }
+                  if oper[1]^.ref^.signindex>0 then
+                    bytes:=bytes or (1 shl 23);
+                  { set I flag }
+                  bytes:=bytes or (1 shl 25);
+                  bytes:=bytes or getsupreg(oper[1]^.ref^.index);
+                  { set shift }
+                  with oper[1]^.ref^ do
+                    if shiftmode<>SM_None then
+                      begin
+                        bytes:=bytes or (shiftimm shl 7);
+                        if shiftmode<>SM_RRX then
+                          bytes:=bytes or (ord(shiftmode) - ord(SM_LSL)) shl 5
+                        else
+                          bytes:=bytes or (3 shl 5);
+                      end
+                end;
+              { set W bit }
+              if oper[1]^.ref^.addressmode=AM_PREINDEXED then
+                bytes:=bytes or (1 shl 21);
+              { set P bit if necessary }
+              if oper[1]^.ref^.addressmode<>AM_POSTINDEXED then
+                bytes:=bytes or (1 shl 24);
+            end;
+          #$22: // LDRH/STRH
+            begin
+              { set instruction code }
+              bytes:=bytes or (ord(insentry^.code[1]) shl 16);
+              bytes:=bytes or ord(insentry^.code[2]);
+              { src/dest register (Rd) }
+              bytes:=bytes or getsupreg(oper[0]^.reg) shl 12;
+              { base register (Rn) }
+              bytes:=bytes or getsupreg(oper[1]^.ref^.base) shl 16;
+              if getregtype(oper[1]^.ref^.index)=R_INVALIDREGISTER then
+                begin
+                  bytes:=bytes or (1 shl 22); // with immediate offset
+                  if oper[1]^.ref^.offset < 0 then
+                    begin
+                      bytes:=bytes or ((-oper[1]^.ref^.offset) and $f0 shl 4);
+                      bytes:=bytes or ((-oper[1]^.ref^.offset) and $f);
+                    end
+                  else
+                    begin
+                      { set U bit }
+                      bytes:=bytes or (1 shl 23);
+                      bytes:=bytes or (oper[1]^.ref^.offset and $f0 shl 4);
+                      bytes:=bytes or (oper[1]^.ref^.offset and $f);
+                    end;
+                end
+              else
+                begin
+                  { set U flag }
+                  bytes:=bytes or (1 shl 23);
+                  bytes:=bytes or getsupreg(oper[1]^.ref^.index);
+                end;
+              { set W bit }
+              if oper[1]^.ref^.addressmode=AM_PREINDEXED then
+                bytes:=bytes or (1 shl 21);
+              { set P bit if necessary }
+              if oper[1]^.ref^.addressmode<>AM_POSTINDEXED then
+                bytes:=bytes or (1 shl 24);
+            end;
+          #$26: // LDM/STM
+            begin
+              { set instruction code }
+              bytes:=bytes or (ord(insentry^.code[1]) shl 20);
+              if oper[0]^.typ=top_ref then
+                begin
+                  { set W bit }
+                  if oper[0]^.ref^.addressmode=AM_PREINDEXED then
+                    bytes:=bytes or (1 shl 21);
+                  { set Rn }
+                  bytes:=bytes or (getsupreg(oper[0]^.ref^.index) shl 16);
+                end
+              else { typ=top_reg }
+                begin
+                  { set Rn }
+                  bytes:=bytes or (getsupreg(oper[0]^.reg) shl 16);
+                end;
+              { reglist }
+              bytes:=bytes or MakeRegList(oper[1]^.regset^);
+              { set P bit }
+              if (opcode=A_LDM) and (oppostfix in [PF_ED,PF_EA,PF_IB,PF_DB])
+              or (opcode=A_STM) and (oppostfix in [PF_FA,PF_FD,PF_IB,PF_DB]) then
+                bytes:=bytes or (1 shl 24);
+              { set U bit }
+              if (opcode=A_LDM) and (oppostfix in [PF_ED,PF_FD,PF_IB,PF_IA])
+              or (opcode=A_STM) and (oppostfix in [PF_FA,PF_EA,PF_IB,PF_IA]) then
+                bytes:=bytes or (1 shl 23);
+            end;
+          #$27: // SWP/SWPB
+            begin
+              { set instruction code }
+              bytes:=bytes or (ord(insentry^.code[1]) shl 20);
+              bytes:=bytes or (ord(insentry^.code[2]) shl 4);
+              { set regs }
+              bytes:=bytes or (getsupreg(oper[0]^.reg) shl 12);
+              bytes:=bytes or getsupreg(oper[1]^.reg);
+              bytes:=bytes or (getsupreg(oper[2]^.ref^.base) shl 16);
+            end;
+          #$03:  // BX
+            begin
+              writeln(objdata.CurrObjSec.fullname);
+              Comment(v_warning,'BX instruction');
+              // TBD
+            end;
           #$ff:
             internalerror(2005091101);
           else

+ 1 - 2
compiler/arm/agarmgas.pas

@@ -114,8 +114,7 @@ unit agarmgas;
           result:='-march='+cputype_to_gas_march[current_settings.cputype]+' -mthumb -mthumb-interwork '+result
         else if GenerateThumbCode then
           result:='-march='+cputype_to_gas_march[current_settings.cputype]+' -mthumb -mthumb-interwork '+result
-        // EDSP instructions in RTL require armv5te at least to not generate error
-        else if current_settings.cputype >= cpu_armv5te then
+        else
           result:='-march='+cputype_to_gas_march[current_settings.cputype]+' '+result;
 
         if target_info.abi = abi_eabihf then

+ 26 - 6
compiler/arm/aoptcpu.pas

@@ -482,7 +482,8 @@ Implementation
       hp1 : tai;
     begin
       Result:=false;
-      if (p.oper[1]^.ref^.addressmode=AM_OFFSET) and
+      if (p.oper[1]^.typ = top_ref) and
+        (p.oper[1]^.ref^.addressmode=AM_OFFSET) and
         (p.oper[1]^.ref^.index=NR_NO) and
         (p.oper[1]^.ref^.offset=0) and
         GetNextInstructionUsingReg(p, hp1, p.oper[1]^.ref^.base) and
@@ -538,6 +539,7 @@ Implementation
       TmpUsedRegs: TAllUsedRegs;
       tempop: tasmop;
       oldreg: tregister;
+      dealloc: tai_regalloc;
 
     function IsPowerOf2(const value: DWord): boolean; inline;
       begin
@@ -607,7 +609,8 @@ Implementation
                       str reg1,ref
                       mov reg2,reg1
                     }
-                    if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
+                    if (taicpu(p).oper[1]^.typ = top_ref) and
+                       (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
                        (taicpu(p).oppostfix=PF_None) and
                        GetNextInstruction(p,hp1) and
                        MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [PF_None]) and
@@ -633,7 +636,7 @@ Implementation
                       str reg1,ref
                       str reg2,ref
                       into
-                      strd reg1,ref
+                      strd reg1,reg2,ref
                     }
                     else if (GenerateARMCode or GenerateThumb2Code) and
                        (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
@@ -654,6 +657,9 @@ Implementation
                       begin
                         DebugMsg('Peephole StrStr2Strd done', p);
                         taicpu(p).oppostfix:=PF_D;
+                        taicpu(p).loadref(2,taicpu(p).oper[1]^.ref^);
+                        taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg);
+                        taicpu(p).ops:=3;
                         asml.remove(hp1);
                         hp1.free;
                         result:=true;
@@ -667,7 +673,8 @@ Implementation
                       ldr reg2,ref
                       into ...
                     }
-                    if (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
+                    if (taicpu(p).oper[1]^.typ = top_ref) and
+                       (taicpu(p).oper[1]^.ref^.addressmode=AM_OFFSET) and
                        GetNextInstruction(p,hp1) and
                        { ldrd is not allowed here }
                        MatchInstruction(hp1, A_LDR, [taicpu(p).condition, C_None], [taicpu(p).oppostfix,PF_None]-[PF_D]) then
@@ -700,7 +707,7 @@ Implementation
                           end
                         {
                            ...
-                           ldrd reg1,ref
+                           ldrd reg1,reg1+1,ref
                         }
                         else if (GenerateARMCode or GenerateThumb2Code) and
                           (CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype]) and
@@ -718,6 +725,9 @@ Implementation
                           AlignedToQWord(taicpu(p).oper[1]^.ref^) then
                           begin
                             DebugMsg('Peephole LdrLdr2Ldrd done', p);
+                            taicpu(p).loadref(2,taicpu(p).oper[1]^.ref^);
+                            taicpu(p).loadreg(1, taicpu(hp1).oper[0]^.reg);
+                            taicpu(p).ops:=3;
                             taicpu(p).oppostfix:=PF_D;
                             asml.remove(hp1);
                             hp1.free;
@@ -1200,6 +1210,7 @@ Implementation
                        (taicpu(p).oppostfix = PF_NONE) and
                        GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
                        MatchInstruction(hp1, [A_LDR, A_STR], [taicpu(p).condition], []) and
+                       (taicpu(hp1).oper[1]^.typ = top_ref) and
                        { We can change the base register only when the instruction uses AM_OFFSET }
                        ((taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) or
                          ((taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and
@@ -1222,6 +1233,13 @@ Implementation
                         if taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
                           taicpu(hp1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
 
+                        dealloc:=FindRegDeAlloc(taicpu(p).oper[1]^.reg, taicpu(p.Next));
+                        if Assigned(dealloc) then
+                          begin
+                            asml.remove(dealloc);
+                            asml.InsertAfter(dealloc,hp1);
+                          end;
+
                         GetNextInstruction(p, hp1);
                         asml.remove(p);
                         p.free;
@@ -1616,6 +1634,7 @@ Implementation
                         while GetNextInstructionUsingReg(hp1, hp1, taicpu(p).oper[0]^.reg) and
                           { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
                           MatchInstruction(hp1, [A_LDR, A_STR], [C_None], []) and
+                          (taicpu(hp1).oper[1]^.typ = top_ref) and
                           (taicpu(hp1).oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
                           { don't optimize if the register is stored/overwritten }
                           (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[1]^.reg) and
@@ -2389,7 +2408,7 @@ Implementation
     begin
       result:=true;
 
-      list:=TAsmList.create_without_marker;
+      list:=TAsmList.create;
       p:=BlockStart;
       while p<>BlockEnd Do
         begin
@@ -2410,6 +2429,7 @@ Implementation
                ) or
                { try to prove that the memory accesses don't overlapp }
                ((taicpu(p).opcode in [A_STRB,A_STRH,A_STR]) and
+                (taicpu(p).oper[1]^.typ = top_ref) and
                 (taicpu(p).oper[1]^.ref^.base=taicpu(hp1).oper[1]^.ref^.base) and
                 (taicpu(p).oppostfix=PF_None) and
                 (taicpu(hp1).oppostfix=PF_None) and

+ 8 - 5
compiler/arm/aoptcpub.pas

@@ -124,11 +124,14 @@ Implementation
     begin
       result:=false;
       for i:=0 to taicpu(p1).ops-1 do
-        if (taicpu(p1).oper[i]^.typ=top_reg) and (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
-          begin
-            result:=true;
-            exit;
-          end;
+        case taicpu(p1).oper[i]^.typ of
+          top_reg:
+            if (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
+              exit(true);
+          top_ref:
+            if (taicpu(p1).spilling_get_operation_type_ref(i,Reg)<>operand_read) then
+              exit(true);
+        end;
     end;
 
 End.

+ 73 - 67
compiler/arm/armins.dat

@@ -97,33 +97,34 @@ void                  void                            none
 reg32,reg32,reg32        \4\x0\xA0                     ARM7
 reg32,reg32,reg32,reg32  \5\x0\xA0                     ARM7
 reg32,reg32,reg32,imm    \6\x0\xA0                     ARM7
-reg32,reg32,imm          \7\x2\xA0                     ARM7
+reg32,reg32,immshifter   \7\x2\xA0                     ARM7
 
 [ADDcc]
-reg32,reg32,reg32        \4\x0\x80                     ARM7
-reg32,reg32,reg32,reg32  \5\x0\x80                     ARM7
-reg32,reg32,reg32,imm    \6\x0\x80                     ARM7
-reg32,reg32,imm          \7\x2\x80                     ARM7
+reg32,reg32,reg32           \4\x0\x80                  ARM7
+reg32,reg32,reg32,reg32     \5\x0\x80                  ARM7
+reg32,reg32,reg32,shifterop \6\x0\x80                  ARM7
+reg32,reg32,immshifter      \7\x2\x80                  ARM7
 
 [ADFcc]
 
 [ADRcc]
 
 [ANDcc]
-reg32,reg32,reg32        \4\x0\x00                     ARM7
-reg32,reg32,reg32,reg32  \5\x0\x00                     ARM7
-reg32,reg32,reg32,imm    \6\x0\x00                     ARM7
-reg32,reg32,imm          \7\x2\x00                     ARM7
+reg32,reg32,reg32           \4\x0\x00                  ARM7
+;reg32,reg32,reg32,reg32    \5\x0\x00                  ARM7
+;reg32,reg32,reg32,imm      \6\x0\x00                  ARM7
+reg32,reg32,reg32,shifterop \6\x0\x00                  ARM7
+reg32,reg32,immshifter      \7\x2\x00                  ARM7
 
 [Bcc]
 mem32                    \1\x0A                        ARM7
 imm24                    \1\x0A                        ARM7
 
 [BICcc]
-reg32,reg32,reg32        \4\x1\xC0                     ARM7
-reg32,reg32,reg32,reg32  \5\x1\xC0                     ARM7
-reg32,reg32,reg32,imm    \6\x1\xC0                     ARM7
-reg32,reg32,imm          \7\x3\xC0                     ARM7
+;reg32,reg32,reg32        \4\x1\xC0                     ARM7
+;reg32,reg32,reg32,reg32  \5\x1\xC0                     ARM7
+;reg32,reg32,reg32,imm    \6\x1\xC0                     ARM7
+reg32,reg32,immshifter    \7\x3\xC0                     ARM7
 
 [BLcc]
 mem32                    \1\x0B                        ARM7
@@ -149,13 +150,13 @@ reg8,reg8           \300\1\x10\101                ARM7
 reg32,reg32              \xC\x1\x60                     ARM7
 reg32,reg32,reg32        \xD\x1\x60                     ARM7
 reg32,reg32,imm          \xE\x1\x60                     ARM7
-reg32,imm                \xF\x3\x60                     ARM7
+reg32,immshifter         \xF\x1\x60                     ARM7
 
 [CMPcc]
 reg32,reg32              \xC\x1\x40                     ARM7
 reg32,reg32,reg32        \xD\x1\x40                     ARM7
-reg32,reg32,imm          \xE\x1\x40                     ARM7
-reg32,imm                \xF\x3\x40                     ARM7
+reg32,reg32,shifterop    \xE\x1\x40                     ARM7
+reg32,immshifter         \xF\x3\x40                     ARM7
 
 [CLZcc]
 reg32,reg32              \x27\x01\x01                   ARM7
@@ -171,10 +172,11 @@ reg32,reg32              \x27\x01\x01                   ARM7
 [DVFcc]
 
 [EORcc]
-reg32,reg32,reg32        \4\x0\x20                     ARM7
-reg32,reg32,reg32,reg32  \5\x0\x20                     ARM7
-reg32,reg32,reg32,imm    \6\x0\x20                     ARM7
-reg32,reg32,imm          \7\x2\x20                     ARM7
+reg32,reg32,reg32           \4\x0\x20                     ARM7
+;reg32,reg32,reg32,reg32     \5\x0\x20                     ARM7
+;reg32,reg32,reg32,imm       \6\x0\x20                     ARM7
+reg32,reg32,reg32,shifterop \6\x0\x20                     ARM7
+reg32,reg32,immshifter      \7\x2\x20                     ARM7
 
 [EXPcc]
 
@@ -193,14 +195,15 @@ reg32,reg32         \321\300\1\x11\101            ARM7
 
 [LDMcc]
 memam4,reglist		   \x26\x81			ARM7
+reg32,reglist		   \x26\x81			ARM7
 
 [LDRBTcc]
 
 [LDRBcc]
-reg32,memam2              \x17\x07\x10                            ARM7
+reg32,memam2              \x17\x04\x50                            ARM7
 
 [LDRcc]
-reg32,memam2              \x17\x05\x10                   ARM7
+reg32,memam2              \x17\x04\x10                   ARM7
 ; reg32,imm32              \x17\x05\x10                   ARM7
 ; reg32,reg32              \x18\x04\x10                   ARM7
 ; reg32,reg32,imm32        \x19\x04\x10                   ARM7
@@ -208,22 +211,24 @@ reg32,memam2              \x17\x05\x10                   ARM7
 ; reg32,reg32,reg32,imm32  \x21\x06\x10                   ARM7
 
 [LDRHcc]
-reg32,imm32              \x22\x50\xB0               ARM7
-reg32,reg32              \x23\x50\xB0               ARM7
-reg32,reg32,imm32        \x24\x50\xB0                   ARM7
-reg32,reg32,reg32        \x25\x10\xB0                   ARM7
+reg32,memam2              \x22\x10\xB0               ARM7
+;reg32,imm32              \x22\x50\xB0               ARM7
+;reg32,reg32              \x23\x50\xB0               ARM7
+;reg32,reg32,imm32        \x24\x50\xB0                   ARM7
+;reg32,reg32,reg32        \x25\x10\xB0                   ARM7
 
 [LDRSBcc]
-reg32,imm32              \x22\x50\xD0               ARM7
+reg32,memam2             \x22\x10\xD0               ARM7
 reg32,reg32              \x23\x50\xD0               ARM7
 reg32,reg32,imm32        \x24\x50\xD0                   ARM7
 reg32,reg32,reg32        \x25\x10\xD0                   ARM7
 
 [LDRSHcc]
-reg32,imm32              \x22\x50\xF0               ARM7
-reg32,reg32              \x23\x50\xF0               ARM7
-reg32,reg32,imm32        \x24\x50\xF0                   ARM7
-reg32,reg32,reg32        \x25\x10\xF0                   ARM7
+reg32,memam2              \x22\x10\xF0               ARM7
+;reg32,imm32              \x22\x50\xF0               ARM7
+;reg32,reg32              \x23\x50\xF0               ARM7
+;reg32,reg32,imm32        \x24\x50\xF0                   ARM7
+;reg32,reg32,reg32        \x25\x10\xF0                   ARM7
 
 [LDRTcc]
 
@@ -243,11 +248,10 @@ reg32,imm8,fpureg        \xF0\x02\x01                   FPA
 reg32,reg32,reg32,reg32  \x15\x00\x20\x90               ARM7
 
 [MOVcc]
-; reg32,shifterop              \x8\x0\0xd                   ARM7
-; reg32,immshifter             \x8\x0\0xd                  ARM7
-; reg32,reg32,reg32        \x9\x1\xA0                     ARM7
-; reg32,reg32,imm          \xA\x1\xA0                     ARM7
-; reg32,imm                \xB\x3\xA0                     ARM7
+reg32,shifterop        \x8\x1\xA0                       ARM7
+; reg32,reg32,reg32    \x9\x1\xA0                       ARM7
+reg32,reg32,shifterop  \xA\x1\xA0                       ARM7
+reg32,immshifter       \xB\x1\xA0                       ARM7
 
 [MRC]
 ; reg32,reg32         \321\301\1\x13\110                  ARM7
@@ -272,18 +276,18 @@ fpureg,fpureg              \xF2                      FPA
 fpureg,immfpu              \xF2                      FPA
 
 [MVNcc]
-; reg32,reg32         \x8\x0\0xf                     ARM7
-; reg32,reg32,reg32   \x9\x1\xE0                     ARM7
-; reg32,reg32,imm     \xA\x1\xE0                     ARM7
-; reg32,imm           \xB\x3\xE0                     ARM7
+reg32,reg32            \x8\x1\xE0                       ARM7
+; reg32,reg32,reg32    \x9\x1\xE0                       ARM7
+reg32,reg32,shifterop  \xA\x1\xE0                       ARM7
+reg32,immshifter       \xB\x1\xE0                       ARM7
 
 [NOP]
 
 [ORRcc]
-reg32,reg32,reg32        \4\x1\x80                     ARM7
-reg32,reg32,reg32,reg32  \5\x1\x80                     ARM7
-reg32,reg32,reg32,imm    \6\x1\x80                     ARM7
-reg32,reg32,imm          \7\x3\x80                     ARM7
+reg32,reg32,reg32            \4\x1\x80               ARM7
+reg32,reg32,reg32,reg32      \5\x1\x80               ARM7
+reg32,reg32,reg32,shifterop  \6\x1\x80               ARM7
+reg32,reg32,immshifter       \7\x3\x80               ARM7
 
 [RDFcc]
 
@@ -296,16 +300,16 @@ reg32,reg32,imm          \7\x3\x80                     ARM7
 [RPWcc]
 
 [RSBcc]
-reg32,reg32,reg32        \4\x0\x60                     ARM7
-reg32,reg32,reg32,reg32  \5\x0\x60                     ARM7
-reg32,reg32,reg32,imm    \6\x0\x60                     ARM7
-reg32,reg32,imm          \7\x2\x60                     ARM7
+;reg32,reg32,reg32            \4\x0\x60                    ARM7
+;reg32,reg32,reg32,reg32      \5\x0\x60                    ARM7
+reg32,reg32,reg32,shifterop  \6\x0\x60                     ARM7
+reg32,reg32,immshifter       \7\x0\x60                     ARM7
 
 [RSCcc]
 reg32,reg32,reg32        \4\x0\xE0                     ARM7
 reg32,reg32,reg32,reg32  \5\x0\xE0                     ARM7
 reg32,reg32,reg32,imm    \6\x0\xE0                     ARM7
-reg32,reg32,imm          \7\x2\xE0                     ARM7
+reg32,reg32,immshifter   \7\x2\xE0                     ARM7
 
 [RSFcc]
 
@@ -317,7 +321,7 @@ reg32,reg32,imm          \7\x2\xE0                     ARM7
 reg32,reg32,reg32        \4\x0\xC0                     ARM7
 reg32,reg32,reg32,reg32  \5\x0\xC0                     ARM7
 reg32,reg32,reg32,imm    \6\x0\xC0                     ARM7
-reg32,reg32,imm          \7\x2\xC0                     ARM7
+reg32,reg32,immshifter   \7\x2\xC0                     ARM7
 
 [SFMcc]
 reg32,imm8,fpureg        \xF0\x02\x00                   FPA
@@ -338,6 +342,7 @@ reg32,reg32,reg32,reg32  \x16\x00\xC0\x90		 ARM7
 
 [STMcc]
 memam4,reglist		   \x26\x80			ARM7
+reg32,reglist		   \x26\x80			ARM7
 
 [STRcc]
 reg32,memam2              \x17\x04\x00                   ARM7
@@ -348,35 +353,36 @@ reg32,memam2              \x17\x04\x00                   ARM7
 ; reg32,reg32,reg32,imm32  \x21\x06\x00                   ARM7
 
 [STRBcc]
-reg32,memam2              \x17\x06\x00                           ARM7
+reg32,memam2              \x17\x04\x40                           ARM7
 
 [STRBTcc]
 
 ; A dummy since it is parsed as STR{cond}H
 [STRHcc]
-reg32,imm32              \x22\x40\xB0              ARM7
-reg32,reg32              \x23\x40\xB0               ARM7
-reg32,reg32,imm32        \x24\x40\xB0                   ARM7
-reg32,reg32,reg32        \x25\x00\xB0                   ARM7
+reg32,memam2              \x22\x00\xB0              ARM7
+;reg32,imm32              \x22\x40\xB0              ARM7
+;reg32,reg32              \x23\x40\xB0               ARM7
+;reg32,reg32,imm32        \x24\x40\xB0                   ARM7
+;reg32,reg32,reg32        \x25\x00\xB0                   ARM7
 
 [STRTcc]
 
 [SUBcc]
-reg32,reg32,shifterop     \4\x0\x40                     ARM7
-reg32,reg32,immshifter    \4\x0\x40                     ARM7
-reg32,reg32,reg32        \4\x0\x40                     ARM7
-; reg32,reg32,reg32,reg32  \5\x0\x40                     ARM7
-; reg32,reg32,reg32,imm    \6\x0\x40                     ARM7
-; reg32,reg32,imm          \7\x2\x40                     ARM7
+reg32,reg32,shifterop       \4\x0\x40                     ARM7
+reg32,reg32,immshifter      \4\x0\x40                     ARM7
+reg32,reg32,reg32           \4\x0\x40                     ARM7
+; reg32,reg32,reg32,reg32     \5\x0\x40                     ARM7
+reg32,reg32,reg32,shifterop \6\x0\x40                     ARM7
+; reg32,reg32,imm           \7\x2\x40                     ARM7
 
 [SWIcc]
 imm                 \2\x0F                        ARM7
 
 [SWPcc]
-reg32,reg32,reg32   \x27\x01\x90                   ARM7
+reg32,reg32,memam2   \x27\x10\x09                   ARM7
 
 [SWPBcc]
-reg32,reg32,reg32   \x27\x01\x90                   ARM7
+reg32,reg32,reg32    \x27\x14\x09                   ARM7
 
 [TANcc]
 
@@ -387,10 +393,10 @@ reg32,reg32,imm     \xE\x1\x20                     ARM7
 reg32,imm           \xF\x3\x20                     ARM7
 
 [TSTcc]
-reg32,reg32         \xC\x1\x00                     ARM7
-reg32,reg32,reg32   \xD\x1\x00                     ARM7
-reg32,reg32,imm     \xE\x1\x00                     ARM7
-reg32,imm           \xF\x3\x00                     ARM7
+reg32,reg32           \xC\x1\x00                     ARM7
+reg32,reg32,reg32     \xD\x1\x00                     ARM7
+reg32,reg32,shifterop \xE\x1\x00                     ARM7
+reg32,immshifter      \xF\x3\x00                     ARM7
 
 [UMLALcc]
 reg32,reg32,reg32,reg32  \x16\x00\xA0\x90		 ARM7

+ 1 - 1
compiler/arm/armnop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from armins.dat }
-105;
+98;

+ 97 - 146
compiler/arm/armtab.inc

@@ -31,7 +31,7 @@
   (
     opcode  : A_ADC;
     ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_immediate,ot_none);
+    optypes : (ot_reg32,ot_reg32,ot_immediateshifter,ot_none);
     code    : #7#2#160;
     flags   : if_arm7
   ),
@@ -52,14 +52,14 @@
   (
     opcode  : A_ADD;
     ops     : 4;
-    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_immediate);
+    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_shifterop);
     code    : #6#0#128;
     flags   : if_arm7
   ),
   (
     opcode  : A_ADD;
     ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_immediate,ot_none);
+    optypes : (ot_reg32,ot_reg32,ot_immediateshifter,ot_none);
     code    : #7#2#128;
     flags   : if_arm7
   ),
@@ -73,21 +73,14 @@
   (
     opcode  : A_AND;
     ops     : 4;
-    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_reg32);
-    code    : #5#0#0;
-    flags   : if_arm7
-  ),
-  (
-    opcode  : A_AND;
-    ops     : 4;
-    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_immediate);
+    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_shifterop);
     code    : #6#0#0;
     flags   : if_arm7
   ),
   (
     opcode  : A_AND;
     ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_immediate,ot_none);
+    optypes : (ot_reg32,ot_reg32,ot_immediateshifter,ot_none);
     code    : #7#2#0;
     flags   : if_arm7
   ),
@@ -108,28 +101,7 @@
   (
     opcode  : A_BIC;
     ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_none);
-    code    : #4#1#192;
-    flags   : if_arm7
-  ),
-  (
-    opcode  : A_BIC;
-    ops     : 4;
-    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_reg32);
-    code    : #5#1#192;
-    flags   : if_arm7
-  ),
-  (
-    opcode  : A_BIC;
-    ops     : 4;
-    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_immediate);
-    code    : #6#1#192;
-    flags   : if_arm7
-  ),
-  (
-    opcode  : A_BIC;
-    ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_immediate,ot_none);
+    optypes : (ot_reg32,ot_reg32,ot_immediateshifter,ot_none);
     code    : #7#3#192;
     flags   : if_arm7
   ),
@@ -199,8 +171,8 @@
   (
     opcode  : A_CMN;
     ops     : 2;
-    optypes : (ot_reg32,ot_immediate,ot_none,ot_none);
-    code    : #15#3#96;
+    optypes : (ot_reg32,ot_immediateshifter,ot_none,ot_none);
+    code    : #15#1#96;
     flags   : if_arm7
   ),
   (
@@ -220,14 +192,14 @@
   (
     opcode  : A_CMP;
     ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_immediate,ot_none);
+    optypes : (ot_reg32,ot_reg32,ot_shifterop,ot_none);
     code    : #14#1#64;
     flags   : if_arm7
   ),
   (
     opcode  : A_CMP;
     ops     : 2;
-    optypes : (ot_reg32,ot_immediate,ot_none,ot_none);
+    optypes : (ot_reg32,ot_immediateshifter,ot_none,ot_none);
     code    : #15#3#64;
     flags   : if_arm7
   ),
@@ -248,21 +220,14 @@
   (
     opcode  : A_EOR;
     ops     : 4;
-    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_reg32);
-    code    : #5#0#32;
-    flags   : if_arm7
-  ),
-  (
-    opcode  : A_EOR;
-    ops     : 4;
-    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_immediate);
+    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_shifterop);
     code    : #6#0#32;
     flags   : if_arm7
   ),
   (
     opcode  : A_EOR;
     ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_immediate,ot_none);
+    optypes : (ot_reg32,ot_reg32,ot_immediateshifter,ot_none);
     code    : #7#2#32;
     flags   : if_arm7
   ),
@@ -281,52 +246,38 @@
     flags   : if_arm7
   ),
   (
-    opcode  : A_LDRB;
+    opcode  : A_LDM;
     ops     : 2;
-    optypes : (ot_reg32,ot_memoryam2,ot_none,ot_none);
-    code    : #23#7#16;
+    optypes : (ot_reg32,ot_reglist,ot_none,ot_none);
+    code    : #38#129;
     flags   : if_arm7
   ),
   (
-    opcode  : A_LDR;
+    opcode  : A_LDRB;
     ops     : 2;
     optypes : (ot_reg32,ot_memoryam2,ot_none,ot_none);
-    code    : #23#5#16;
+    code    : #23#4#80;
     flags   : if_arm7
   ),
   (
-    opcode  : A_LDRH;
+    opcode  : A_LDR;
     ops     : 2;
-    optypes : (ot_reg32,ot_immediate or ot_bits32,ot_none,ot_none);
-    code    : #34#80#176;
+    optypes : (ot_reg32,ot_memoryam2,ot_none,ot_none);
+    code    : #23#4#16;
     flags   : if_arm7
   ),
   (
     opcode  : A_LDRH;
     ops     : 2;
-    optypes : (ot_reg32,ot_reg32,ot_none,ot_none);
-    code    : #35#80#176;
-    flags   : if_arm7
-  ),
-  (
-    opcode  : A_LDRH;
-    ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_immediate or ot_bits32,ot_none);
-    code    : #36#80#176;
-    flags   : if_arm7
-  ),
-  (
-    opcode  : A_LDRH;
-    ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_none);
-    code    : #37#16#176;
+    optypes : (ot_reg32,ot_memoryam2,ot_none,ot_none);
+    code    : #34#16#176;
     flags   : if_arm7
   ),
   (
     opcode  : A_LDRSB;
     ops     : 2;
-    optypes : (ot_reg32,ot_immediate or ot_bits32,ot_none,ot_none);
-    code    : #34#80#208;
+    optypes : (ot_reg32,ot_memoryam2,ot_none,ot_none);
+    code    : #34#16#208;
     flags   : if_arm7
   ),
   (
@@ -353,29 +304,8 @@
   (
     opcode  : A_LDRSH;
     ops     : 2;
-    optypes : (ot_reg32,ot_immediate or ot_bits32,ot_none,ot_none);
-    code    : #34#80#240;
-    flags   : if_arm7
-  ),
-  (
-    opcode  : A_LDRSH;
-    ops     : 2;
-    optypes : (ot_reg32,ot_reg32,ot_none,ot_none);
-    code    : #35#80#240;
-    flags   : if_arm7
-  ),
-  (
-    opcode  : A_LDRSH;
-    ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_immediate or ot_bits32,ot_none);
-    code    : #36#80#240;
-    flags   : if_arm7
-  ),
-  (
-    opcode  : A_LDRSH;
-    ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_none);
-    code    : #37#16#240;
+    optypes : (ot_reg32,ot_memoryam2,ot_none,ot_none);
+    code    : #34#16#240;
     flags   : if_arm7
   ),
   (
@@ -392,6 +322,27 @@
     code    : #21#0#32#144;
     flags   : if_arm7
   ),
+  (
+    opcode  : A_MOV;
+    ops     : 2;
+    optypes : (ot_reg32,ot_shifterop,ot_none,ot_none);
+    code    : #8#1#160;
+    flags   : if_arm7
+  ),
+  (
+    opcode  : A_MOV;
+    ops     : 3;
+    optypes : (ot_reg32,ot_reg32,ot_shifterop,ot_none);
+    code    : #10#1#160;
+    flags   : if_arm7
+  ),
+  (
+    opcode  : A_MOV;
+    ops     : 2;
+    optypes : (ot_reg32,ot_immediateshifter,ot_none,ot_none);
+    code    : #11#1#160;
+    flags   : if_arm7
+  ),
   (
     opcode  : A_MRS;
     ops     : 2;
@@ -441,6 +392,27 @@
     code    : #242;
     flags   : if_fpa
   ),
+  (
+    opcode  : A_MVN;
+    ops     : 2;
+    optypes : (ot_reg32,ot_reg32,ot_none,ot_none);
+    code    : #8#1#224;
+    flags   : if_arm7
+  ),
+  (
+    opcode  : A_MVN;
+    ops     : 3;
+    optypes : (ot_reg32,ot_reg32,ot_shifterop,ot_none);
+    code    : #10#1#224;
+    flags   : if_arm7
+  ),
+  (
+    opcode  : A_MVN;
+    ops     : 2;
+    optypes : (ot_reg32,ot_immediateshifter,ot_none,ot_none);
+    code    : #11#1#224;
+    flags   : if_arm7
+  ),
   (
     opcode  : A_ORR;
     ops     : 3;
@@ -458,43 +430,29 @@
   (
     opcode  : A_ORR;
     ops     : 4;
-    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_immediate);
+    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_shifterop);
     code    : #6#1#128;
     flags   : if_arm7
   ),
   (
     opcode  : A_ORR;
     ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_immediate,ot_none);
+    optypes : (ot_reg32,ot_reg32,ot_immediateshifter,ot_none);
     code    : #7#3#128;
     flags   : if_arm7
   ),
-  (
-    opcode  : A_RSB;
-    ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_none);
-    code    : #4#0#96;
-    flags   : if_arm7
-  ),
-  (
-    opcode  : A_RSB;
-    ops     : 4;
-    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_reg32);
-    code    : #5#0#96;
-    flags   : if_arm7
-  ),
   (
     opcode  : A_RSB;
     ops     : 4;
-    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_immediate);
+    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_shifterop);
     code    : #6#0#96;
     flags   : if_arm7
   ),
   (
     opcode  : A_RSB;
     ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_immediate,ot_none);
-    code    : #7#2#96;
+    optypes : (ot_reg32,ot_reg32,ot_immediateshifter,ot_none);
+    code    : #7#0#96;
     flags   : if_arm7
   ),
   (
@@ -521,7 +479,7 @@
   (
     opcode  : A_RSC;
     ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_immediate,ot_none);
+    optypes : (ot_reg32,ot_reg32,ot_immediateshifter,ot_none);
     code    : #7#2#224;
     flags   : if_arm7
   ),
@@ -549,7 +507,7 @@
   (
     opcode  : A_SBC;
     ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_immediate,ot_none);
+    optypes : (ot_reg32,ot_reg32,ot_immediateshifter,ot_none);
     code    : #7#2#192;
     flags   : if_arm7
   ),
@@ -582,45 +540,31 @@
     flags   : if_arm7
   ),
   (
-    opcode  : A_STR;
+    opcode  : A_STM;
     ops     : 2;
-    optypes : (ot_reg32,ot_memoryam2,ot_none,ot_none);
-    code    : #23#4#0;
+    optypes : (ot_reg32,ot_reglist,ot_none,ot_none);
+    code    : #38#128;
     flags   : if_arm7
   ),
   (
-    opcode  : A_STRB;
+    opcode  : A_STR;
     ops     : 2;
     optypes : (ot_reg32,ot_memoryam2,ot_none,ot_none);
-    code    : #23#6#0;
+    code    : #23#4#0;
     flags   : if_arm7
   ),
   (
-    opcode  : A_STRH;
+    opcode  : A_STRB;
     ops     : 2;
-    optypes : (ot_reg32,ot_immediate or ot_bits32,ot_none,ot_none);
-    code    : #34#64#176;
+    optypes : (ot_reg32,ot_memoryam2,ot_none,ot_none);
+    code    : #23#4#64;
     flags   : if_arm7
   ),
   (
     opcode  : A_STRH;
     ops     : 2;
-    optypes : (ot_reg32,ot_reg32,ot_none,ot_none);
-    code    : #35#64#176;
-    flags   : if_arm7
-  ),
-  (
-    opcode  : A_STRH;
-    ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_immediate or ot_bits32,ot_none);
-    code    : #36#64#176;
-    flags   : if_arm7
-  ),
-  (
-    opcode  : A_STRH;
-    ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_none);
-    code    : #37#0#176;
+    optypes : (ot_reg32,ot_memoryam2,ot_none,ot_none);
+    code    : #34#0#176;
     flags   : if_arm7
   ),
   (
@@ -644,6 +588,13 @@
     code    : #4#0#64;
     flags   : if_arm7
   ),
+  (
+    opcode  : A_SUB;
+    ops     : 4;
+    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_shifterop);
+    code    : #6#0#64;
+    flags   : if_arm7
+  ),
   (
     opcode  : A_SWI;
     ops     : 1;
@@ -654,15 +605,15 @@
   (
     opcode  : A_SWP;
     ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_reg32,ot_none);
-    code    : #39#1#144;
+    optypes : (ot_reg32,ot_reg32,ot_memoryam2,ot_none);
+    code    : #39#16#9;
     flags   : if_arm7
   ),
   (
     opcode  : A_SWPB;
     ops     : 3;
     optypes : (ot_reg32,ot_reg32,ot_reg32,ot_none);
-    code    : #39#1#144;
+    code    : #39#20#9;
     flags   : if_arm7
   ),
   (
@@ -710,14 +661,14 @@
   (
     opcode  : A_TST;
     ops     : 3;
-    optypes : (ot_reg32,ot_reg32,ot_immediate,ot_none);
+    optypes : (ot_reg32,ot_reg32,ot_shifterop,ot_none);
     code    : #14#1#0;
     flags   : if_arm7
   ),
   (
     opcode  : A_TST;
     ops     : 2;
-    optypes : (ot_reg32,ot_immediate,ot_none,ot_none);
+    optypes : (ot_reg32,ot_immediateshifter,ot_none,ot_none);
     code    : #15#3#0;
     flags   : if_arm7
   ),

+ 86 - 18
compiler/arm/cgcpu.pas

@@ -71,6 +71,8 @@ unit cgcpu;
 
         procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: TResFlags; reg: TRegister); override;
 
+        procedure g_profilecode(list : TAsmList); override;
+
         procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
         procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
         procedure g_maybe_got_init(list : TAsmList); override;
@@ -102,7 +104,7 @@ unit cgcpu;
 
         procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle); override;
         { Transform unsupported methods into Internal errors }
-        procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: TCGSize; src, dst: TRegister); override;
+        procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); override;
 
         { try to generate optimized 32 Bit multiplication, returns true if successful generated }
         function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
@@ -182,6 +184,8 @@ unit cgcpu;
         procedure g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint); override;
 
         function handle_load_store(list: TAsmList; op: tasmop; oppostfix: toppostfix; reg: tregister; ref: treference): treference; override;
+
+        procedure g_external_wrapper(list : TAsmList; procdef : tprocdef; const externalname : string); override;
       end;
 
       tthumbcg64farm = class(tbasecg64farm)
@@ -636,7 +640,9 @@ unit cgcpu;
         sym : TAsmSymbol;
       begin
         { check not really correct: should only be used for non-Thumb cpus }
-        if CPUARM_HAS_BLX_LABEL in cpu_capabilities[current_settings.cputype] then
+        if (CPUARM_HAS_BLX_LABEL in cpu_capabilities[current_settings.cputype]) and
+          { WinCE GNU AS (not sure if this applies in general) does not support BLX imm }
+          (target_info.system<>system_arm_wince) then
           branchopcode:=A_BLX
         else
           branchopcode:=A_BL;
@@ -1700,7 +1706,7 @@ unit cgcpu;
       end;
 
 
-    procedure tbasecgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: TCGSize; src, dst: TRegister);
+    procedure tbasecgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister);
       begin
         if reverse then
           begin
@@ -1794,6 +1800,16 @@ unit cgcpu;
         list.concat(setcondition(taicpu.op_reg_const(A_MOV,reg,0),inverse_cond(flags_to_cond(f))));
       end;
 
+    procedure tbasecgarm.g_profilecode(list : TAsmList);
+      begin
+        if target_info.system = system_arm_linux then
+          begin
+            list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R14]));
+            a_call_name(list,'__gnu_mcount_nc',false);
+          end
+        else
+          internalerror(2014091201);
+      end;
 
     procedure tbasecgarm.g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);
       var
@@ -3201,6 +3217,7 @@ unit cgcpu;
               if (href.offset in [0..124]) and ((href.offset mod 4)=0) then
                 begin
                   list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R0]));
+                  list.concat(taicpu.op_reg_reg(A_MOV,NR_R0,NR_R12));
                   cg.a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_R0);
                   list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_R0));
                   list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R0]));
@@ -3218,20 +3235,24 @@ unit cgcpu;
                   tmpref.symbol:=l;
                   tmpref.base:=NR_PC;
                   list.concat(taicpu.op_reg_ref(A_LDR,NR_R1,tmpref));
+                  list.concat(taicpu.op_reg_reg(A_MOV,NR_R0,NR_R12));
                   href.offset:=0;
+                  href.base:=NR_R0;
                   href.index:=NR_R1;
                   cg.a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_R0);
                   list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_R0));
                   list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R0,RS_R1]));
                 end;
-              list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R12));
             end
           else
             begin
               reference_reset_base(href,NR_R12,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),sizeof(pint));
               cg.a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_R12);
-              list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R12));
             end;
+          if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
+            list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R12))
+          else
+            list.concat(taicpu.op_reg(A_BX,NR_R12));
         end;
 
       var
@@ -3248,6 +3269,9 @@ unit cgcpu;
         if procdef.owner.symtabletype<>ObjectSymtable then
           Internalerror(200109191);
 
+          if GenerateThumbCode or GenerateThumb2Code then
+            list.concat(tai_thumb_func.create);
+
         make_global:=false;
         if (not current_module.is_unit) or
            create_smartlink or
@@ -3293,7 +3317,7 @@ unit cgcpu;
             cg.a_load_ref_reg(list,OS_ADDR,OS_ADDR,tmpref,NR_R0);
             list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_R0));
             list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R0]));
-            list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R12));
+            list.concat(taicpu.op_reg(A_BX,NR_R12));
           end
         else
           list.concat(taicpu.op_sym(A_B,current_asmdata.RefAsmSymbol(procdef.mangledname)));
@@ -4050,18 +4074,36 @@ unit cgcpu;
         tmpreg : TRegister;
       begin
         href:=ref;
-        if (op in [A_STR,A_STRB,A_STRH]) and
-           (abs(ref.offset)>124) then
-          begin
-            tmpreg:=getintregister(list,OS_ADDR);
-            a_loadaddr_ref_reg(list,ref,tmpreg);
-
-            reference_reset_base(href,tmpreg,0,ref.alignment);
-          end
-        else if (op=A_LDR) and
-           (oppostfix in [PF_None]) and
-           (ref.base<>NR_STACK_POINTER_REG)  and
-           (abs(ref.offset)>124) then
+        if { LDR/STR limitations }
+           (
+            (((op=A_LDR) and (oppostfix=PF_None)) or
+             ((op=A_STR) and (oppostfix=PF_None))) and
+            (ref.base<>NR_STACK_POINTER_REG) and
+            (abs(ref.offset)>124)
+           ) or
+           { LDRB/STRB limitations }
+           (
+           (((op=A_LDR) and (oppostfix=PF_B)) or
+            ((op=A_LDRB) and (oppostfix=PF_None)) or
+            ((op=A_STR) and (oppostfix=PF_B)) or
+            ((op=A_STRB) and (oppostfix=PF_None))) and
+            ((ref.base=NR_STACK_POINTER_REG) or
+             (ref.index=NR_STACK_POINTER_REG) or
+             (abs(ref.offset)>31)
+            )
+           ) or
+           { LDRH/STRH limitations }
+           (
+            (((op=A_LDR) and (oppostfix=PF_H)) or
+             ((op=A_LDRH) and (oppostfix=PF_None)) or
+             ((op=A_STR) and (oppostfix=PF_H)) or
+             ((op=A_STRH) and (oppostfix=PF_None))) and
+            ((ref.base=NR_STACK_POINTER_REG) or
+             (ref.index=NR_STACK_POINTER_REG) or
+             (abs(ref.offset)>62) or
+             ((abs(ref.offset) mod 2)<>0)
+            )
+           ) then
           begin
             tmpreg:=getintregister(list,OS_ADDR);
             a_loadaddr_ref_reg(list,ref,tmpreg);
@@ -4274,6 +4316,32 @@ unit cgcpu;
       end;
 
 
+    procedure tthumbcgarm.g_external_wrapper(list: TAsmList; procdef: tprocdef; const externalname: string);
+      var
+        tmpref : treference;
+        l : tasmlabel;
+      begin
+        { there is no branch instruction on thumb which allows big distances and which leaves LR as it is
+          and which allows to switch the instruction set }
+
+        { create const entry }
+        reference_reset(tmpref,4);
+        current_asmdata.getjumplabel(l);
+        tmpref.symbol:=l;
+        tmpref.base:=NR_PC;
+        list.concat(taicpu.op_regset(A_PUSH,R_INTREGISTER,R_SUBWHOLE,[RS_R0]));
+        list.concat(taicpu.op_reg_ref(A_LDR,NR_R0,tmpref));
+        list.concat(taicpu.op_reg_reg(A_MOV,NR_R12,NR_R0));
+        list.concat(taicpu.op_regset(A_POP,R_INTREGISTER,R_SUBWHOLE,[RS_R0]));
+        list.concat(taicpu.op_reg(A_BX,NR_R12));
+
+        { append const entry }
+        list.Concat(tai_align.Create(4));
+        list.Concat(tai_label.create(l));
+        list.concat(tai_const.Create_sym(current_asmdata.RefAsmSymbol(externalname)));
+      end;
+
+
     procedure tthumb2cgarm.init_register_allocators;
       begin
         inherited init_register_allocators;

+ 43 - 1
compiler/arm/cpuelf.pas

@@ -561,6 +561,7 @@ implementation
     rotation:longint;
     residual,g_n:longword;
     curloc: aword;
+    bit_S,bit_I1,bit_I2: aint;
   begin
     data:=objsec.data;
     for i:=0 to objsec.ObjRelocations.Count-1 do
@@ -660,7 +661,16 @@ implementation
                   2) when target is unresolved weak symbol, CALL must be changed to NOP,
                   while JUMP24 behavior is unspecified. }
                 tmp:=sarlongint((address and $00FFFFFF) shl 8,6);
-                tmp:=tmp+relocval-curloc;
+                tmp:=tmp+relocval;
+                if odd(tmp) then    { dest is Thumb? }
+                  begin
+                    if (reltyp=R_ARM_CALL) then
+                      { change BL to BLX, dest bit 1 goes to instruction bit 24 }
+                      address:=(address and $FE000000) or (((tmp-curloc) and 2) shl 23) or $10000000
+                    else
+                      InternalError(2014092001);
+                  end;
+                tmp:=tmp-curloc;
                 // TODO: check overflow
                 address:=(address and $FF000000) or ((tmp and $3FFFFFE) shr 2);
               end;
@@ -829,6 +839,38 @@ implementation
                   address:=address or (1 shl 23);
               end;
 
+            R_ARM_THM_CALL:
+              begin
+                if (not ElfTarget.relocs_use_addend) then
+                  begin
+                    address:=((address and $ffff) shl 16) or word(address shr 16);
+                    bit_S:=(address shr 26) and 1;
+                    bit_I1:=(bit_S xor ((address shr 13) and 1)) xor 1;
+                    bit_I2:=(bit_S xor ((address shr 11) and 1)) xor 1;
+                    tmp:=((-bit_S) shl 24) or (bit_I1 shl 23) or (bit_I2 shl 22) or (((address shr 16) and $3ff) shl 12) or ((address and $7ff) shl 1);
+                  end
+                else  { TODO: must read the instruction anyway }
+                  tmp:=address;
+                tmp:=tmp+relocval;       { dest address }
+                if odd(tmp) then         { if it's Thumb code, change possible BLX to BL }
+                  address:=address or $1800;
+                tmp:=tmp-curloc;         { now take PC-relative }
+                { TODO: overflow check, different limit for Thumb and Thumb-2 }
+
+                { now encode this mess back }
+                if (address and $5000)=$4000 then
+                  tmp:=(tmp+2) and (not 3);
+
+                bit_S:=(tmp shr 31) and 1;
+                address:=(address and $F800D000) or
+                  (bit_S shl 26) or
+                  (((tmp shr 12) and $3ff) shl 16) or
+                  ((tmp shr 1) and $7FF) or
+                  ((((tmp shr 23) and 1) xor 1 xor bit_S) shl 13) or
+                  ((((tmp shr 22) and 1) xor 1 xor bit_S) shl 11);
+                address:=((address and $ffff) shl 16) or word(address shr 16);
+              end;
+
             R_ARM_TLS_IE32:
               begin
                 relocval:=relocval-tlsseg.mempos+align_aword(TCB_SIZE,tlsseg.align);

+ 6 - 1
compiler/arm/cpuinfo.pas

@@ -21,6 +21,9 @@ Interface
 
 Type
    bestreal = double;
+{$if FPC_FULLVERSION>20700}
+   bestrealrec = TDoubleRec;
+{$endif FPC_FULLVERSION>20700}
    ts32real = single;
    ts64real = double;
    ts80real = type extended;
@@ -338,8 +341,10 @@ Type
       ct_thumb2bare
      );
 
-
 Const
+   { Is there support for dealing with multiple microcontrollers available }
+   { for this platform? }
+   ControllerSupport = true;
    {# Size of native extended floating point type }
    extended_size = 12;
    {# Size of a multimedia register               }

+ 20 - 4
compiler/arm/narmadd.pas

@@ -85,6 +85,8 @@ interface
                       GetResFlags:=F_LT;
                     gten:
                       GetResFlags:=F_LE;
+                    else
+                      internalerror(201408203);
                   end
                 else
                   case NodeType of
@@ -96,6 +98,8 @@ interface
                       GetResFlags:=F_GT;
                     gten:
                       GetResFlags:=F_GE;
+                    else
+                      internalerror(201408204);
                   end;
               end
             else
@@ -110,6 +114,8 @@ interface
                       GetResFlags:=F_CC;
                     gten:
                       GetResFlags:=F_LS;
+                    else
+                      internalerror(201408205);
                   end
                 else
                   case NodeType of
@@ -121,6 +127,8 @@ interface
                       GetResFlags:=F_HI;
                     gten:
                       GetResFlags:=F_CS;
+                    else
+                      internalerror(201408206);
                   end;
               end;
         end;
@@ -144,6 +152,8 @@ interface
             result:=F_GT;
           gten:
             result:=F_GE;
+          else
+            internalerror(201408207);
         end;
       end;
 
@@ -379,13 +389,13 @@ interface
               tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
               if right.location.loc = LOC_CONSTANT then
                 begin
-                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_const(A_AND,tmpreg,left.location.register,right.location.value));
+                  cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_AND,OS_32,right.location.value,left.location.register,tmpreg);
                   cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                   current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,tmpreg,right.location.value));
                 end
               else
                 begin
-                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_AND,tmpreg,left.location.register,right.location.register));
+                  cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList,OP_AND,OS_32,left.location.register,right.location.register,tmpreg);
                   cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                   current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,tmpreg,right.location.register));
                 end;
@@ -421,8 +431,14 @@ interface
             dummyreg:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
             cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
 
-            if GenerateThumbCode then
-              cg.a_op_reg_reg_reg(current_asmdata.CurrAsmList,OP_OR,OS_32,left.location.register64.reglo,left.location.register64.reghi,dummyreg)
+            cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
+            { Optimize for the common case of int64 < 0 }
+            if nodetype in [ltn, gtn] then
+              begin
+                {Just check for the MSB in reghi to be set or not, this is independed from nf_swapped}
+                location.resflags:=F_NE;
+                current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_TST,left.location.register64.reghi, aint($80000000)));
+              end
             else
               current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ORR,dummyreg,left.location.register64.reglo,left.location.register64.reghi),PF_S));
           end

+ 4 - 3
compiler/arm/narmmem.pas

@@ -27,6 +27,7 @@ interface
 
     uses
       globtype,
+      symtype,
       cgbase,cpubase,nmem,ncgmem;
 
     type
@@ -36,7 +37,7 @@ interface
 
 
       tarmvecnode = class(tcgvecnode)
-        procedure update_reference_reg_mul(maybe_const_reg: tregister; l: aint);override;
+        procedure update_reference_reg_mul(maybe_const_reg: tregister; regsize: tdef; l: aint);override;
       end;
 
 implementation
@@ -70,7 +71,7 @@ implementation
                              TARMVECNODE
 *****************************************************************************}
 
-     procedure tarmvecnode.update_reference_reg_mul(maybe_const_reg:tregister;l:aint);
+     procedure tarmvecnode.update_reference_reg_mul(maybe_const_reg: tregister; regsize: tdef; l: aint);
        var
          hreg: tregister;
          hl : longint;
@@ -79,7 +80,7 @@ implementation
             (GenerateThumbCode) or
             { simple constant? }
             (l=1) or ispowerof2(l,hl) or ispowerof2(l+1,hl) or ispowerof2(l-1,hl) then
-           inherited update_reference_reg_mul(maybe_const_reg,l)
+           inherited update_reference_reg_mul(maybe_const_reg,regsize,l)
          else if (location.reference.base<>NR_NO) then
            begin
              hreg:=cg.getaddressregister(current_asmdata.CurrAsmList);

+ 11 - 4
compiler/arm/narmset.pas

@@ -141,6 +141,7 @@ implementation
     procedure tarmcasenode.genjumptable(hp : pcaselabel;min_,max_ : aint);
       var
         last : TConstExprInt;
+        tmpreg,
         basereg,
         indexreg : tregister;
         href : treference;
@@ -222,7 +223,7 @@ implementation
           begin
             if cs_create_pic in current_settings.moduleswitches then
               internalerror(2013082102);
-            cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SUB,OS_ADDR,min_+1,indexreg,indexreg);
+            cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SUB,OS_ADDR,min_,indexreg,indexreg);
             current_asmdata.getaddrlabel(tablelabel);
 
             cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHL,OS_ADDR,2,indexreg);
@@ -231,9 +232,15 @@ implementation
             reference_reset_symbol(href,tablelabel,0,4);
             cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList, href, basereg);
 
-            cg.a_op_reg_reg(current_asmdata.CurrAsmList, OP_ADD, OS_ADDr, indexreg, basereg);
-
-            current_asmdata.CurrAsmList.Concat(taicpu.op_reg(A_BX, basereg));
+            reference_reset(href,0);
+            href.base:=basereg;
+            href.index:=indexreg;
+            
+            tmpreg:=cg.getintregister(current_asmdata.CurrAsmList, OS_ADDR);
+            cg.a_load_ref_reg(current_asmdata.CurrAsmList, OS_ADDR, OS_ADDR, href, tmpreg);
+            
+            { do not use BX here to avoid switching into arm mode }
+            current_asmdata.CurrAsmList.Concat(taicpu.op_reg_reg(A_MOV, NR_PC, tmpreg));
 
             cg.a_label(current_asmdata.CurrAsmList,tablelabel);
             { generate jump table }

+ 2 - 1
compiler/arm/raarmgas.pas

@@ -1043,6 +1043,7 @@ Unit raarmgas;
         hreg : tregister;
         flags : tspecialregflags;
       begin
+        hreg:=NR_NO;
         case actasmtoken of
           AS_REGISTER:
             begin
@@ -1219,7 +1220,7 @@ Unit raarmgas;
                   end;
               end;
           end;
-        maxlen:=max(length(hs),5);
+        maxlen:=min(length(hs),5);
         actopcode:=A_NONE;
         for j:=maxlen downto 1 do
           begin

+ 10 - 0
compiler/arm/rgcpu.pas

@@ -304,6 +304,11 @@ unit rgcpu;
                    (get_alias(getsupreg(oper[0]^.reg))=orgreg) and
                    (get_alias(getsupreg(oper[1]^.reg))<>orgreg) then
                   begin
+                    { do not replace if we're on Thumb, ldr/str cannot be used with rX>r7 }
+                    if GenerateThumbCode and
+                       (getsupreg(oper[1]^.reg)>RS_R7) then
+                       exit;
+
                     { str expects the register in oper[0] }
                     instr.loadreg(0,oper[1]^.reg);
                     instr.loadref(1,spilltemp);
@@ -314,6 +319,11 @@ unit rgcpu;
                    (get_alias(getsupreg(oper[1]^.reg))=orgreg) and
                    (get_alias(getsupreg(oper[0]^.reg))<>orgreg) then
                   begin
+                    { do not replace if we're on Thumb, ldr/str cannot be used with rX>r7 }
+                    if GenerateThumbCode and
+                       (getsupreg(oper[0]^.reg)>RS_R7) then
+                       exit;
+
                     instr.loadref(1,spilltemp);
                     opcode:=A_LDR;
                     result:=true;

+ 4 - 2
compiler/assemble.pas

@@ -83,6 +83,7 @@ interface
         lastsectype : TAsmSectionType;
         procedure WriteSourceLine(hp: tailineinfo);
         procedure WriteTempalloc(hp: tai_tempalloc);
+        Function DoPipe:boolean;
       public
         {# Returns the complete path and executable name of the assembler
            program.
@@ -271,7 +272,7 @@ Implementation
                                  TExternalAssembler
 *****************************************************************************}
 
-    Function DoPipe:boolean;
+    Function TExternalAssembler.DoPipe:boolean;
       begin
         DoPipe:=(cs_asm_pipe in current_settings.globalswitches) and
                 (([cs_asm_extern,cs_asm_leave,cs_link_on_target] * current_settings.globalswitches) = []) and
@@ -1736,7 +1737,8 @@ Implementation
         if not(tf_section_threadvars in target_info.flags) then
           exclude(to_do,al_threadvars);
         for i:=low(TasmlistType) to high(TasmlistType) do
-          if (i in to_do) and (current_asmdata.asmlists[i]<>nil) then
+          if (i in to_do) and (current_asmdata.asmlists[i]<>nil) and
+             (not current_asmdata.asmlists[i].empty) then
             addlist(current_asmdata.asmlists[i]);
 
         if SmartAsm then

+ 5 - 5
compiler/avr/aasmcpu.pas

@@ -246,11 +246,11 @@ implementation
       begin
         result:=operand_read;
         case opcode of
-          A_CLR,
-          A_MOV, A_MOVW:
-           if opnr=0 then
-             result:=operand_write;
-          A_CP,A_CPC,A_CPI,A_PUSH :
+          A_CLR,A_LDD,A_LD,A_LDI,A_LDS,
+          A_MOV,A_MOVW:
+            if opnr=0 then
+              result:=operand_write;
+          A_CP,A_CPC,A_CPI,A_PUSH,A_ST,A_STD,A_STS:
             ;
           else
             begin

+ 4 - 6
compiler/avr/agavrgas.pas

@@ -81,6 +81,7 @@ unit agavrgas;
         var
           s : string;
         begin
+           s:='';
            with ref do
             begin
   {$ifdef extdebug}
@@ -95,9 +96,8 @@ unit agavrgas;
               else if base<>NR_NO then
                 begin
                   if addressmode=AM_PREDRECEMENT then
-                    s:='-'
-                  else
-                    s:='';
+                    s:='-';
+
                   case base of
                     NR_R26:
                       s:=s+'X';
@@ -119,9 +119,7 @@ unit agavrgas;
               else if assigned(symbol) or (offset<>0) then
                 begin
                   if assigned(symbol) then
-                    s:=ReplaceForbiddenAsmSymbolChars(symbol.name)
-                  else
-                     s:='';
+                    s:=ReplaceForbiddenAsmSymbolChars(symbol.name);
 
                   if offset<0 then
                     s:=s+tostr(offset)

+ 6 - 0
compiler/avr/cpuinfo.pas

@@ -21,6 +21,9 @@ Interface
 
 Type
    bestreal = double;
+{$if FPC_FULLVERSION>20700}
+   bestrealrec = TDoubleRec;
+{$endif FPC_FULLVERSION>20700}
    ts32real = single;
    ts64real = double;
    ts80real = type extended;
@@ -61,6 +64,9 @@ Type
      );
 
 Const
+   { Is there support for dealing with multiple microcontrollers available }
+   { for this platform? }
+   ControllerSupport = true;
    {# Size of native extended floating point type }
    extended_size = 12;
    {# Size of a multimedia register               }

+ 4 - 4
compiler/avr/cpupara.pas

@@ -201,13 +201,13 @@ unit cpupara;
         begin
           { In case of po_delphi_nested_cc, the parent frame pointer
             is always passed on the stack. }
-           if (nextintreg>RS_R8) and
+           if (nextintreg>RS_R7) and
               (not(vo_is_parentfp in hp.varoptions) or
                not(po_delphi_nested_cc in p.procoptions)) then
              begin
                paraloc^.loc:=LOC_REGISTER;
                paraloc^.register:=newreg(R_INTREGISTER,nextintreg,R_SUBWHOLE);
-               inc(nextintreg);
+               dec(nextintreg);
              end
            else
              begin
@@ -251,8 +251,8 @@ unit cpupara;
               begin
                 paradef:=getpointerdef(paradef);
                 loc:=LOC_REGISTER;
-                paracgsize := OS_ADDR;
-                paralen := tcgsize2size[OS_ADDR];
+                paracgsize:=OS_ADDR;
+                paralen:=tcgsize2size[OS_ADDR];
               end
             else
               begin

+ 10 - 2
compiler/avr/navradd.pas

@@ -77,6 +77,8 @@ interface
                       GetResFlags:=F_LT;
                     gten:
                       GetResFlags:=F_NotPossible;
+                    else
+                      internalerror(2014082020);
                   end
                 else
                   case NodeType of
@@ -88,6 +90,8 @@ interface
                       GetResFlags:=F_NotPossible;
                     gten:
                       GetResFlags:=F_GE;
+                    else
+                      internalerror(2014082021);
                   end;
               end
             else
@@ -102,6 +106,8 @@ interface
                       GetResFlags:=F_CC;
                     gten:
                       GetResFlags:=F_NotPossible;
+                    else
+                      internalerror(2014082022);
                   end
                 else
                   case NodeType of
@@ -113,6 +119,8 @@ interface
                       GetResFlags:=F_NotPossible;
                     gten:
                       GetResFlags:=F_CS;
+                    else
+                      internalerror(2014082023);
                   end;
               end;
         end;
@@ -220,7 +228,7 @@ interface
     function tavraddnode.pass_1 : tnode;
       begin
         result:=inherited pass_1;
-{
+{$ifdef dummy}
         if not(assigned(result)) then
           begin
             unsigned:=not(is_signed(left.resultdef)) or
@@ -240,7 +248,7 @@ interface
              is_dynamic_array(left.resultdef)
            ) then
           expectloc:=LOC_FLAGS;
-}
+{$endif dummy}
       end;
 
 

+ 2 - 2
compiler/avr/navrmat.pas

@@ -165,7 +165,7 @@ implementation
         secondpass(right);
         location_copy(location,left.location);
 
-{
+{$ifdef dummy}
         { put numerator in register }
         size:=def_cgsize(left.resultdef);
         hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,
@@ -201,7 +201,7 @@ implementation
         {  simple comparison with 0)                                             }
         if is_signed(right.resultdef) then
           cg.g_overflowcheck(current_asmdata.CurrAsmList,location,resultdef);
-}
+{$endif dummy}
       end;
 
 {*****************************************************************************

+ 1 - 1
compiler/avr/raavrgas.pas

@@ -617,7 +617,7 @@ Unit raavrgas;
         actopcode:=A_NONE;
         for j:=maxlen downto 1 do
           begin
-            actopcode:=tasmop(PtrInt(iasmops.Find(copy(hs,1,j))));
+            actopcode:=tasmop(PtrUInt(iasmops.Find(copy(hs,1,j))));
             if actopcode<>A_NONE then
               begin
                 actasmtoken:=AS_OPCODE;

+ 385 - 0
compiler/blockutl.pas

@@ -0,0 +1,385 @@
+{
+    Copyright (c) 2014 by Jonas Maebe, Member of the Free Pascal
+    development team.
+
+    This unit implements helper routines for "blocks" support
+    (http://en.wikipedia.org/wiki/Blocks_(C_language_extension) )
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit blockutl;
+
+{$i fpcdefs.inc}
+
+interface
+
+  uses
+    node,nld,ncnv,
+    symtype,symdef;
+
+  { accepts a loadnode for a procdef
+
+    returns a node representing the converted code to implement this
+    conversion (this node is valid both for typed constant declarations and
+    in function bodies). The node is not reused }
+  function generate_block_for_procaddr(procloadnode: tloadnode): tnode;
+
+  { for a procdef, return a recorddef representing a block literal for this
+    procdef
+
+    for a procvardef, return a basic recorddef representing a block literal
+    with enough info to call this procvardef }
+  function get_block_literal_type_for_proc(pd: tabstractprocdef): trecorddef;
+
+implementation
+
+  uses
+    verbose,globtype,globals,cutils,constexp,
+    pass_1,pparautl,fmodule,
+    aasmdata,
+    nbas,ncon,nmem,nutils,
+    symbase,symconst,symtable,symsym,symcreat,objcutil,objcdef,defutil,
+    paramgr;
+
+
+  function get_block_literal_type_for_proc(pd: tabstractprocdef): trecorddef;
+    begin
+      if pd.typ=procvardef then
+        result:=trecorddef(search_named_unit_globaltype('BLOCKRTL','FPC_BLOCK_LITERAL_BASE',true).typedef)
+      else if pd.is_addressonly then
+        result:=trecorddef(search_named_unit_globaltype('BLOCKRTL','FPC_BLOCK_LITERAL_STATIC',true).typedef)
+      { todo: nested functions and Objective-C methods }
+      else if not is_nested_pd(pd) and
+              not is_objcclass(tdef(pd.owner.defowner)) then
+        result:=trecorddef(search_named_unit_globaltype('BLOCKRTL','FPC_BLOCK_LITERAL_COMPLEX_PROCVAR',true).typedef)
+      else
+        internalerror(2014071304);
+    end;
+
+
+  function get_block_literal_isa(orgpd: tprocdef): tstaticvarsym;
+    var
+      srsym: tsym;
+      srsymtable: tsymtable;
+      name: tidstring;
+    begin
+      if orgpd.is_addressonly then
+        name:='_NSCONCRETEGLOBALBLOCK'
+      else
+        name:='_NSCONCRETESTACKBLOCK';
+      if not searchsym_in_named_module('BLOCKRTL',name,srsym,srsymtable) or
+         (srsym.typ<>staticvarsym) then
+        internalerror(2014071501);
+      result:=tstaticvarsym(srsym);
+    end;
+
+
+  function get_block_literal_flags(orgpd, invokepd: tprocdef): longint;
+    { BlockLiteralFlags }
+    const
+      BLOCK_HAS_COPY_DISPOSE    = 1 shl 25;
+      BLOCK_HAS_CXX_OBJ         = 1 shl 26;
+      BLOCK_IS_GLOBAL           = 1 shl 28;
+      BLOCK_USE_STRET           = 1 shl 29;
+      BLOCK_HAS_SIGNATURE       = 1 shl 30;
+      BLOCK_HAS_EXTENDED_LAYOUT = 1 shl 31;
+    begin
+      result:=0;
+      { BLOCK_HAS_COPY_DISPOSE :
+          copy/dispose will be necessary once we support nested procedures, in
+          case they capture reference counted types, Objective-C class instances
+          or block-type variables
+      }
+
+      { BLOCK_HAS_CXX_OBJ:
+          we don't support C++ (stack-based) class instances yet
+      }
+
+      { BLOCK_IS_GLOBAL:
+          set in case the block does not capture any local state; used for
+          global functions and in theory also possible for nested functions that
+          do not access any variables from their parentfp context
+      }
+      if orgpd.is_addressonly then
+        result:=result or BLOCK_IS_GLOBAL;
+
+      { BLOCK_USE_STRET:
+          set in case the invoke function returns its result via a hidden
+          parameter
+      }
+      if paramanager.ret_in_param(invokepd.returndef,orgpd) then
+        result:=result or BLOCK_USE_STRET;
+      { BLOCK_HAS_SIGNATURE:
+          only if this bit is set, the above bit will actually be taken into
+          account (for backward compatibility). We need it so that our invoke
+          function isn't called as a variadic function, but on the downside this
+          requires Mac OS X 10.7 or later
+      }
+      result:=result or BLOCK_HAS_SIGNATURE;
+      { BLOCK_HAS_EXTENDED_LAYOUT:
+          no documentation about what this means or what it's good for (clang
+          adds it for Objective-C 1 platforms in case garbage collection is
+          switched off, but then you also have to actually generate this layout)
+      }
+    end;
+
+
+  function get_block_literal_descriptor(invokepd: tprocdef; block_literal_size: tcgint): tstaticvarsym;
+    var
+      descriptordef: tdef;
+      descriptor: tstaticvarsym;
+      name: tsymstr;
+      srsym: tsym;
+      srsymtable: tsymtable;
+    begin
+      (*
+        FPC_Block_descriptor_simple = record
+          reserved: culong;
+          Block_size: culong;
+          { signatures are only for the "ABI.2010.3.16" version, but that's all
+            we support because otherwise the callback has to be a C-style
+            variadic function, which we cannot (yet?) generate }
+          signature: pchar;
+        end;
+      *)
+
+      { must be a valid Pascal identifier, because we will reference it when
+        constructing the block initialiser }
+      { we don't have to include the moduleid in this mangledname, because
+        the invokepd is a local procedure in the current unit -> defid by
+        itself is unique }
+      name:='__FPC_BLOCK_DESCRIPTOR_SIMPLE_'+tostr(invokepd.defid);
+      { already exists -> return }
+      if searchsym(name,srsym,srsymtable) then
+        begin
+          if srsym.typ<>staticvarsym then
+            internalerror(2014071402);
+          result:=tstaticvarsym(srsym);
+          exit;
+        end;
+      { find the type of the descriptor structure }
+      descriptordef:=search_named_unit_globaltype('BLOCKRTL','FPC_BLOCK_DESCRIPTOR_SIMPLE',true).typedef;
+      { create new static variable }
+      descriptor:=cstaticvarsym.create(name,vs_value,descriptordef,[]);
+      symtablestack.top.insert(descriptor);
+      include(descriptor.symoptions,sp_internal);
+      { create typed constant for the descriptor }
+      str_parse_typedconst(current_asmdata.AsmLists[al_const],
+        '(reserved: 0; Block_size: '+tostr(block_literal_size)+
+        '; signature: '''+objcencodemethod(invokepd)+''');',descriptor);
+      result:=descriptor;
+    end;
+
+
+  { creates a wrapper function for pd with the C calling convention and an
+    extra first parameter pointing to the block "self" pointer. This wrapper is
+    what will be assigned to the "invoke" field of the block }
+  function get_invoke_wrapper(orgpd: tprocdef; orgpv: tprocvardef): tprocdef;
+    var
+      wrappername: TIDString;
+      srsym: tsym;
+      srsymtable: tsymtable;
+    begin
+      { the copy() is to ensure we don't overflow the maximum identifier length;
+        the combination of owner.moduleid and defid will make the name unique }
+      wrappername:='__FPC_BLOCK_INVOKE_'+upper(copy(orgpd.procsym.realname,1,60))+'_'+tostr(orgpd.owner.moduleid)+'_'+tostr(orgpd.defid);
+      { already an invoke wrapper for this procsym -> reuse }
+      if searchsym(wrappername,srsym,srsymtable) then
+        begin
+          if (srsym.typ<>procsym) or
+             (tprocsym(srsym).procdeflist.count<>1) then
+            internalerror(2014071503);
+          result:=tprocdef(tprocsym(srsym).procdeflist[0]);
+          exit;
+        end;
+      { bare copy, so that self etc are not inserted }
+      result:=tprocdef(orgpd.getcopyas(procdef,pc_bareproc));
+      { will be called accoding to the ABI conventions }
+      result.proccalloption:=pocall_cdecl;
+      { add po_is_block so that a block "self" pointer gets added (of the type
+        returned by get_block_literal_type_for_proc()) }
+      include(result.procoptions,po_is_block);
+      { now insert self/vmt/funcret according to the newly set calling
+        convention }
+      insert_self_and_vmt_para(result);
+      insert_funcret_para(result);
+      finish_copied_procdef(result,wrappername,current_module.localsymtable,nil);
+      if orgpd.is_addressonly then
+        begin
+          result.synthetickind:=tsk_callthrough;
+          result.skpara:=orgpd;
+        end
+      else
+        begin
+          { alias for the type to invoke the procvar, used in the symcreat
+            handling of tsk_block_invoke_procvar }
+          result.localst.insert(ctypesym.create('__FPC_BLOCK_INVOKE_PV_TYPE',orgpv));
+          result.synthetickind:=tsk_block_invoke_procvar;
+        end;
+    end;
+
+
+  { compose a block literal for a static block (one without context) }
+  function get_global_proc_literal_sym(blockliteraldef: tdef; blockisasym: tstaticvarsym; blockflags: longint; invokepd: tprocdef; descriptor: tstaticvarsym): tstaticvarsym;
+    var
+      literalname: TIDString;
+      srsym: tsym;
+      srsymtable: tsymtable;
+    begin
+      literalname:='block_literal_for_'+invokepd.procsym.realname;
+      { already exists -> return }
+      if searchsym(literalname,srsym,srsymtable) then
+        begin
+          if srsym.typ<>staticvarsym then
+            internalerror(2014071506);
+          result:=tstaticvarsym(srsym);
+          exit;
+        end;
+      { create new block literal symbol }
+      result:=cstaticvarsym.create(
+        '$'+literalname,
+        vs_value,
+        blockliteraldef,[]);
+      include(result.symoptions,sp_internal);
+      symtablestack.top.insert(result);
+      { initialise it }
+      str_parse_typedconst(current_asmdata.AsmLists[al_const],
+        '(base: (isa        : @'+blockisasym.realname+
+              '; flags     : '+tostr(blockflags)+
+              '; reserved  : 0'+
+              '; invoke    : @'+invokepd.procsym.realname+
+              '; descriptor: @'+descriptor.realname+
+              '));',
+        result);
+    end;
+
+
+  { compose an on-stack block literal for a "procedure of object" }
+  function get_pascal_method_literal(blockliteraldef: tdef; blockisasym: tstaticvarsym; blockflags: longint; procvarnode: tnode; invokepd: tprocdef; orgpv: tprocvardef; descriptor: tstaticvarsym): tnode;
+    var
+      statement: tstatementnode;
+      literaltemp: ttempcreatenode;
+    begin
+      result:=internalstatements(statement);
+      { create new block literal structure }
+      literaltemp:=ctempcreatenode.create(blockliteraldef,blockliteraldef.size,tt_persistent,false);
+      addstatement(statement,literaltemp);
+      { temp.base.isa:=@blockisasym }
+      addstatement(statement,cassignmentnode.create(
+        genloadfield(genloadfield(ctemprefnode.create(literaltemp),'BASE'),'ISA'),
+        caddrnode.create(cloadnode.create(blockisasym,blockisasym.owner))));
+      { temp.base.flags:=blockflags }
+      addstatement(statement,cassignmentnode.create(
+        genloadfield(genloadfield(ctemprefnode.create(literaltemp),'BASE'),'FLAGS'),
+        genintconstnode(blockflags)));
+      { temp.base.reserved:=0 }
+      addstatement(statement,cassignmentnode.create(
+        genloadfield(genloadfield(ctemprefnode.create(literaltemp),'BASE'),'RESERVED'),
+        genintconstnode(0)));
+      { temp.base.invoke:=tmethod(@invokepd) }
+      addstatement(statement,cassignmentnode.create(
+        genloadfield(genloadfield(ctemprefnode.create(literaltemp),'BASE'),'INVOKE'),
+        ctypeconvnode.create_proc_to_procvar(
+          cloadnode.create_procvar(invokepd.procsym,invokepd,invokepd.owner))));
+      { temp.base.descriptor:=@descriptor }
+      addstatement(statement,cassignmentnode.create(
+        genloadfield(genloadfield(ctemprefnode.create(literaltemp),'BASE'),'DESCRIPTOR'),
+        caddrnode.create(cloadnode.create(descriptor,descriptor.owner))));
+      { temp.pv:=tmethod(@orgpd) }
+      addstatement(statement,cassignmentnode.create(
+        ctypeconvnode.create_explicit(genloadfield(ctemprefnode.create(literaltemp),'PV'),orgpv),
+          procvarnode.getcopy));
+      { and return the address of the temp }
+      addstatement(statement,caddrnode.create(ctemprefnode.create(literaltemp)));
+      { typecheck this now, because the current source may be written in TP/
+        Delphi/MacPas mode and the above node tree has been constructed for
+        ObjFPC mode, which has been set by replace_scanner (in Delphi, the
+        assignment to invoke would be without the proc_to_procvar conversion) }
+      typecheckpass(result);
+    end;
+
+
+  function generate_block_for_procaddr(procloadnode: tloadnode): tnode;
+    var
+      procvarnode: tnode;
+      { procvardef representing the original function we want to invoke }
+      orgpv: tprocvardef;
+      { procdef of the original function we want to invoke }
+      orgpd,
+      { procdef for the invoke-wrapper that we generated to call the original
+        function via a procvar }
+      invokepd: tprocdef;
+      blockliteraldef: tdef;
+      descriptor,
+      blockisasym,
+      blockliteralsym: tstaticvarsym;
+      blockflags: longint;
+      old_symtablestack: tsymtablestack;
+      sstate: tscannerstate;
+    begin
+      result:=nil;
+      { supported? (should be caught earlier) }
+      if (procloadnode.resultdef.typ<>procdef) or
+         is_nested_pd(tprocdef(procloadnode.resultdef)) or
+         is_objcclass(tdef(procloadnode.resultdef.owner.defowner)) then
+        internalerror(2014071401);
+
+      { add every symbol that we create here to the unit-level symbol table }
+      old_symtablestack:=symtablestack;
+      symtablestack:=old_symtablestack.getcopyuntil(current_module.localsymtable);
+      { save scanner state }
+      replace_scanner('block literal creation',sstate);
+
+      { def representing the original function }
+      orgpd:=tprocdef(procloadnode.resultdef);
+      { def representing the corresponding procvar type }
+      procvarnode:=ctypeconvnode.create_proc_to_procvar(procloadnode.getcopy);
+      typecheckpass(procvarnode);
+      orgpv:=tprocvardef(procvarnode.resultdef);
+      { get blockdef for this kind of procdef }
+      blockliteraldef:=get_block_literal_type_for_proc(orgpd);
+      { get the invoke wrapper }
+      invokepd:=get_invoke_wrapper(orgpd,orgpv);
+      { get the descriptor }
+      descriptor:=get_block_literal_descriptor(invokepd,blockliteraldef.size);
+      { get the ISA pointer for the literal }
+      blockisasym:=get_block_literal_isa(orgpd);
+      { get the flags for the block }
+      blockflags:=get_block_literal_flags(orgpd,invokepd);
+      { global/simple procedure -> block literal is a typed constant }
+      if orgpd.is_addressonly then
+        begin
+          blockliteralsym:=get_global_proc_literal_sym(blockliteraldef,blockisasym,blockflags,invokepd,descriptor);
+          { result: address of the block literal }
+          result:=caddrnode.create(cloadnode.create(blockliteralsym,blockliteralsym.owner));
+        end
+      else
+        begin
+          result:=get_pascal_method_literal(blockliteraldef,blockisasym,blockflags,procvarnode,invokepd,orgpv,descriptor)
+        end;
+
+      procvarnode.free;
+
+      { restore scanner }
+      restore_scanner(sstate);
+      { restore symtable stack }
+      symtablestack.free;
+      symtablestack:=old_symtablestack;
+    end;
+
+end.
+

+ 7 - 4
compiler/cfileutl.pas

@@ -23,7 +23,9 @@ unit cfileutl;
 
 {$i fpcdefs.inc}
 
+{$ifndef DragonFly}
 {$define usedircache}
+{$endif DragonFly}
 
 interface
 
@@ -124,6 +126,7 @@ interface
     function  GetShortName(const n:TCmdStr):TCmdStr;
     function maybequoted(const s:string):string;
     function maybequoted(const s:ansistring):ansistring;
+    function maybequoted_for_script(const s:ansistring; quote_script: tscripttype):ansistring;
 
     procedure InitFileUtils;
     procedure DoneFileUtils;
@@ -140,7 +143,7 @@ interface
 { * Since native Amiga commands can't handle Unix-style relative paths used by the compiler,
     and some GNU tools, Unix2AmigaPath is needed to handle such situations (KB) * }
 
-{$IF DEFINED(MORPHOS) OR DEFINED(AMIGA)}
+{$IFDEF HASAMIGA}
 { * PATHCONV is implemented in the Amiga/MorphOS system unit * }
 {$NOTE TODO Amiga: implement PathConv() in System unit, which works with AnsiString}
 function Unix2AmigaPath(path: ShortString): ShortString; external name 'PATHCONV';
@@ -187,7 +190,7 @@ implementation
       DirCache : TDirectoryCache;
 
 
-{$IF NOT (DEFINED(MORPHOS) OR DEFINED(AMIGA))}
+{$IFNDEF HASAMIGA}
 { Stub function for Unix2Amiga Path conversion functionality, only available in
   Amiga/MorphOS RTL. I'm open for better solutions. (KB) }
 function Unix2AmigaPath(path: String): String;{$IFDEF USEINLINE}inline;{$ENDIF}
@@ -537,7 +540,7 @@ end;
 {$if defined(unix)}
         if (length(s)>0) and (s[1] in AllowDirectorySeparators) then
           result:=true;
-{$elseif defined(amiga) or defined(morphos)}
+{$elseif defined(hasamiga)}
         (* An Amiga path is absolute, if it has a volume/device name in it (contains ":"),
            otherwise it's always a relative path, no matter if it starts with a directory
            separator or not. (KB) *)
@@ -1077,7 +1080,7 @@ end;
             currPath:=FixPath(ExpandFileName(currpath),false);
             if (CurrentDir<>'') and (Copy(currPath,1,length(CurrentDir))=CurrentDir) then
              begin
-{$if defined(amiga) and defined(morphos)}
+{$ifdef hasamiga}
                currPath:= CurrentDir+Copy(currPath,length(CurrentDir)+1,length(currPath));
 {$else}
                currPath:= CurDirRelPath(source_info)+Copy(currPath,length(CurrentDir)+1,length(currPath));

+ 13 - 0
compiler/cgbase.pas

@@ -101,6 +101,12 @@ interface
          ,addr_dgroup      // the data segment group
          ,addr_seg         // used for getting the segment of an object, e.g. 'mov ax, SEG symbol'
          {$ENDIF}
+         {$IFDEF AARCH64}
+         ,addr_page
+         ,addr_pageoffset
+         ,addr_gotpage
+         ,addr_gotpageoffset
+         {$ENDIF AARCH64}
          );
 
 
@@ -329,6 +335,13 @@ interface
           OS_M8,OS_M16,OS_M32,OS_M64,OS_M128,OS_M256,OS_M8,OS_M16,OS_M32,
           OS_M64,OS_M128,OS_M256);
 
+       tcgsize2signed : array[tcgsize] of tcgsize = (OS_NO,
+          OS_S8,OS_S16,OS_S32,OS_S64,OS_S128,OS_S8,OS_S16,OS_S32,OS_S64,OS_S128,
+          OS_F32,OS_F64,OS_F80,OS_C64,OS_F128,
+          OS_M8,OS_M16,OS_M32,OS_M64,OS_M128,OS_M256,OS_M8,OS_M16,OS_M32,
+          OS_M64,OS_M128,OS_M256);
+
+
        tcgloc2str : array[TCGLoc] of string[12] = (
             'LOC_INVALID',
             'LOC_VOID',

+ 2 - 2
compiler/cghlcpu.pas

@@ -65,14 +65,14 @@ uses
       procedure a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel); override;
       procedure a_call_reg(list: TAsmList; reg: tregister); override;
       procedure a_call_name(list: TAsmList; const s: string; weak: boolean); override;
-      procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tcgsize; src, dst: TRegister); override;
+      procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); override;
     end;
 
 implementation
 
    { thlbasecgcpu }
 
-    procedure thlbasecgcpu.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tcgsize; src, dst: TRegister);
+    procedure thlbasecgcpu.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister);
       begin
         internalerror(2012042801);
       end;

+ 29 - 10
compiler/cgobj.pas

@@ -247,7 +247,7 @@ unit cgobj;
           procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);virtual; abstract;
 
           { bit scan instructions }
-          procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tcgsize; src, dst: TRegister); virtual;
+          procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); virtual;
 
           { Multiplication with doubling result size.
             dstlo or dsthi may be NR_NO, in which case corresponding half of result is discarded. }
@@ -815,8 +815,15 @@ implementation
 
 
     procedure tcg.translate_register(var reg : tregister);
+      var
+        rt: tregistertype;
       begin
-        rg[getregtype(reg)].translate_register(reg);
+        { Getting here without assigned rg is possible for an "assembler nostackframe"
+          function returning x87 float, compiler tries to translate NR_ST which is used for
+          result.  }
+        rt:=getregtype(reg);
+        if assigned(rg[rt]) then
+          rg[rt].translate_register(reg);
       end;
 
 
@@ -828,7 +835,8 @@ implementation
 
     procedure tcg.a_reg_dealloc(list : TAsmList;r : tregister);
       begin
-         list.concat(tai_regalloc.dealloc(r,nil));
+        if (r<>NR_NO) then
+          list.concat(tai_regalloc.dealloc(r,nil));
       end;
 
 
@@ -857,7 +865,14 @@ implementation
          ref : treference;
          tmpreg : tregister;
       begin
-         cgpara.check_simple_location;
+         if assigned(cgpara.location^.next) then
+           begin
+             tg.gethltemp(list,cgpara.def,cgpara.def.size,tt_persistent,ref);
+             a_load_reg_ref(list,size,size,r,ref);
+             a_load_ref_cgpara(list,size,ref,cgpara);
+             tg.ungettemp(list,ref);
+             exit;
+           end;
          paramanager.alloccgpara(list,cgpara);
          if cgpara.location^.shiftval<0 then
            begin
@@ -1325,8 +1340,10 @@ implementation
                       dec(tmpref.offset)
                     else
                       inc(tmpref.offset);
-                    a_load_ref_reg(list,OS_8,OS_16,tmpref,register);
-                    a_op_reg_reg(list,OP_OR,OS_16,tmpreg,register);
+                    tmpreg2:=makeregsize(list,register,OS_16);
+                    a_load_ref_reg(list,OS_8,OS_16,tmpref,tmpreg2);
+                    a_op_reg_reg(list,OP_OR,OS_16,tmpreg,tmpreg2);
+                    a_load_reg_reg(list,OS_16,tosize,tmpreg2,register);
                   end;
               OS_32,OS_S32:
                 if ref.alignment=2 then
@@ -1340,8 +1357,10 @@ implementation
                       dec(tmpref.offset,2)
                     else
                       inc(tmpref.offset,2);
-                    a_load_ref_reg(list,OS_16,OS_32,tmpref,register);
-                    a_op_reg_reg(list,OP_OR,OS_32,tmpreg,register);
+                    tmpreg2:=makeregsize(list,register,OS_32);
+                    a_load_ref_reg(list,OS_16,OS_32,tmpref,tmpreg2);
+                    a_op_reg_reg(list,OP_OR,OS_32,tmpreg,tmpreg2);
+                    a_load_reg_reg(list,OS_32,tosize,tmpreg2,register);
                   end
                 else
                   begin
@@ -1360,7 +1379,7 @@ implementation
                         a_load_ref_reg(list,OS_8,OS_32,tmpref,tmpreg2);
                         a_op_reg_reg(list,OP_OR,OS_32,tmpreg2,tmpreg);
                       end;
-                    a_load_reg_reg(list,OS_32,OS_32,tmpreg,register);
+                    a_load_reg_reg(list,OS_32,tosize,tmpreg,register);
                   end
               else
                 a_load_ref_reg(list,fromsize,tosize,tmpref,register);
@@ -2509,7 +2528,7 @@ implementation
       end;
 
 
-    procedure tcg.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tcgsize; src, dst: TRegister);
+    procedure tcg.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister);
       begin
         internalerror(2014070601);
       end;

+ 6 - 0
compiler/cgutils.pas

@@ -63,6 +63,12 @@ unit cgutils;
          addressmode : taddressmode;
          shiftmode   : tshiftmode;
 {$endif arm}
+{$ifdef aarch64}
+         symboldata  : tlinkedlistitem;
+         shiftimm    : byte;
+         addressmode : taddressmode;
+         shiftmode   : tshiftmode;
+{$endif aarch64}
 {$ifdef avr}
          addressmode : taddressmode;
 {$endif avr}

+ 3 - 0
compiler/compiler.pas

@@ -65,6 +65,9 @@ uses
 {$ifdef android}
   ,i_android
 {$endif android}
+{$ifdef aros}
+  ,i_aros
+{$endif}
 {$ifdef atari}
   ,i_atari
 {$endif atari}

+ 1 - 0
compiler/compinnr.inc

@@ -88,6 +88,7 @@ const
    in_unbox_x_y         = 78; { manage platforms: extract from class instance }
    in_popcnt_x          = 79;
    in_aligned_x         = 80;
+   in_setstring_x_y_z   = 81;
 
 { Internal constant functions }
    in_const_sqr        = 100;

+ 1 - 1
compiler/comprsrc.pas

@@ -377,7 +377,7 @@ procedure TWinLikeResourceFile.Collect(const fn: ansistring);
 begin
   if fResScript=nil then
     fResScript:=TScript.Create(fScriptName);
-  fResScript.Add(MaybeQuoted(fn));
+  fResScript.Add(maybequoted_for_script(fn,script_fpcres));
   inc(fCollectCount);
 end;
 

+ 77 - 43
compiler/dbgdwarf.pas

@@ -375,7 +375,7 @@ interface
         procedure appendsym_property(list:TAsmList;sym:tpropertysym);override;
 
         function symdebugname(sym:tsym): String; virtual;
-        function symname(sym:tsym): String; virtual;
+        function symname(sym: tsym; manglename: boolean): String; virtual;
         procedure append_visibility(vis: tvisibility);
 
         procedure enum_membersyms_callback(p:TObject;arg:pointer);
@@ -931,10 +931,10 @@ implementation
                   begin
                     if not assigned(def.typesym) then
                       internalerror(200610011);
-                    def.dwarf_lab:=current_asmdata.RefAsmSymbol(make_mangledname('DBG',def.owner,symname(def.typesym)),AT_DATA);
-                    def.dwarf_ref_lab:=current_asmdata.RefAsmSymbol(make_mangledname('DBGREF',def.owner,symname(def.typesym)),AT_DATA);
+                    def.dwarf_lab:=current_asmdata.RefAsmSymbol(make_mangledname('DBG',def.owner,symname(def.typesym, true)),AT_DATA);
+                    def.dwarf_ref_lab:=current_asmdata.RefAsmSymbol(make_mangledname('DBGREF',def.owner,symname(def.typesym, true)),AT_DATA);
                     if is_class_or_interface_or_dispinterface(def) or is_objectpascal_helper(def) then
-                      tobjectdef(def).dwarf_struct_lab:=current_asmdata.RefAsmSymbol(make_mangledname('DBG2',def.owner,symname(def.typesym)),AT_DATA);
+                      tobjectdef(def).dwarf_struct_lab:=current_asmdata.RefAsmSymbol(make_mangledname('DBG2',def.owner,symname(def.typesym, true)),AT_DATA);
                     def.dbg_state:=dbg_state_written;
                   end
                 else
@@ -945,10 +945,10 @@ implementation
                        (def.owner.symtabletype=globalsymtable) and
                        (def.owner.iscurrentunit) then
                       begin
-                        def.dwarf_lab:=current_asmdata.DefineAsmSymbol(make_mangledname('DBG',def.owner,symname(def.typesym)),AB_GLOBAL,AT_DATA);
-                        def.dwarf_ref_lab:=current_asmdata.DefineAsmSymbol(make_mangledname('DBGREF',def.owner,symname(def.typesym)),AB_GLOBAL,AT_DATA);
+                        def.dwarf_lab:=current_asmdata.DefineAsmSymbol(make_mangledname('DBG',def.owner,symname(def.typesym, true)),AB_GLOBAL,AT_DATA);
+                        def.dwarf_ref_lab:=current_asmdata.DefineAsmSymbol(make_mangledname('DBGREF',def.owner,symname(def.typesym, true)),AB_GLOBAL,AT_DATA);
                         if is_class_or_interface_or_dispinterface(def) or is_objectpascal_helper(def) then
-                          tobjectdef(def).dwarf_struct_lab:=current_asmdata.DefineAsmSymbol(make_mangledname('DBG2',def.owner,symname(def.typesym)),AB_GLOBAL,AT_DATA);
+                          tobjectdef(def).dwarf_struct_lab:=current_asmdata.DefineAsmSymbol(make_mangledname('DBG2',def.owner,symname(def.typesym, true)),AB_GLOBAL,AT_DATA);
                         include(def.defstates,ds_dwarf_dbg_info_written);
                       end
                     else
@@ -1371,7 +1371,7 @@ implementation
                 { base type such as byte/shortint/word/... }
                 if assigned(def.typesym) then
                   append_entry(DW_TAG_base_type,false,[
-                    DW_AT_name,DW_FORM_string,symname(def.typesym)+#0,
+                    DW_AT_name,DW_FORM_string,symname(def.typesym, false)+#0,
                     DW_AT_encoding,DW_FORM_data1,sign,
                     DW_AT_byte_size,DW_FORM_data1,fullbytesize])
                 else
@@ -1385,7 +1385,7 @@ implementation
                   {       to be always clipped to s32bit for some reason }
                   if assigned(def.typesym) then
                     append_entry(DW_TAG_subrange_type,false,[
-                      DW_AT_name,DW_FORM_string,symname(def.typesym)+#0,
+                      DW_AT_name,DW_FORM_string,symname(def.typesym, false)+#0,
                       DW_AT_lower_bound,signform,int64(def.low),
                       DW_AT_upper_bound,signform,int64(def.high)
                       ])
@@ -1544,7 +1544,7 @@ implementation
             if assigned(def.typesym) then
               begin
                 append_entry(DW_TAG_base_type,false,[
-                  DW_AT_name,DW_FORM_string,symname(def.typesym)+#0,
+                  DW_AT_name,DW_FORM_string,symname(def.typesym, false)+#0,
                   DW_AT_encoding,DW_FORM_data1,DW_ATE_float,
                   DW_AT_byte_size,DW_FORM_data1,def.size
                   ]);
@@ -1569,7 +1569,7 @@ implementation
             { we should use DW_ATE_signed_fixed, however it isn't supported yet by GDB (FK) }
             if assigned(def.typesym) then
               append_entry(DW_TAG_base_type,false,[
-                DW_AT_name,DW_FORM_string,symname(def.typesym)+#0,
+                DW_AT_name,DW_FORM_string,symname(def.typesym, false)+#0,
                 DW_AT_encoding,DW_FORM_data1,DW_ATE_signed,
                 DW_AT_byte_size,DW_FORM_data1,8
                 ])
@@ -1581,7 +1581,7 @@ implementation
           s64comp:
             if assigned(def.typesym) then
               append_entry(DW_TAG_base_type,false,[
-                DW_AT_name,DW_FORM_string,symname(def.typesym)+#0,
+                DW_AT_name,DW_FORM_string,symname(def.typesym, false)+#0,
                 DW_AT_encoding,DW_FORM_data1,DW_ATE_signed,
                 DW_AT_byte_size,DW_FORM_data1,8
                 ])
@@ -1604,7 +1604,7 @@ implementation
       begin
         if assigned(def.typesym) then
           append_entry(DW_TAG_enumeration_type,true,[
-            DW_AT_name,DW_FORM_string,symname(def.typesym)+#0,
+            DW_AT_name,DW_FORM_string,symname(def.typesym, false)+#0,
             DW_AT_byte_size,DW_FORM_data1,def.size
             ])
         else
@@ -1625,7 +1625,7 @@ implementation
             if hp.value>def.maxval then
               break;
             append_entry(DW_TAG_enumerator,false,[
-              DW_AT_name,DW_FORM_string,symname(hp)+#0,
+              DW_AT_name,DW_FORM_string,symname(hp, false)+#0,
               DW_AT_const_value,DW_FORM_data4,hp.value
             ]);
             finish_entry;
@@ -1668,7 +1668,7 @@ implementation
             { no known size, no known upper bound }
             if assigned(def.typesym) then
               append_entry(DW_TAG_array_type,true,[
-                DW_AT_name,DW_FORM_string,symname(def.typesym)+#0
+                DW_AT_name,DW_FORM_string,symname(def.typesym, false)+#0
                 ])
             else
               append_entry(DW_TAG_array_type,true,[]);
@@ -1685,7 +1685,7 @@ implementation
             size:=def.size;
             if assigned(def.typesym) then
               append_entry(DW_TAG_array_type,true,[
-                DW_AT_name,DW_FORM_string,symname(def.typesym)+#0,
+                DW_AT_name,DW_FORM_string,symname(def.typesym, false)+#0,
                 DW_AT_byte_size,DW_FORM_udata,size
                 ])
             else
@@ -1867,7 +1867,7 @@ implementation
         begin
           if assigned(def.typesym) then
             append_entry(DW_TAG_subroutine_type,true,[
-              DW_AT_name,DW_FORM_string,symname(def.typesym)+#0,
+              DW_AT_name,DW_FORM_string,symname(def.typesym, false)+#0,
               DW_AT_prototyped,DW_FORM_flag,true
             ])
           else
@@ -1882,7 +1882,7 @@ implementation
           for i:=0 to def.paras.count-1 do
             begin
               append_entry(DW_TAG_formal_parameter,false,[
-                DW_AT_name,DW_FORM_string,symname(tsym(def.paras[i]))+#0
+                DW_AT_name,DW_FORM_string,symname(tsym(def.paras[i]), false)+#0
               ]);
               append_labelentry_ref(DW_AT_type,def_dwarf_lab(tparavarsym(def.paras[i]).vardef));
               finish_entry;
@@ -2017,7 +2017,7 @@ implementation
           begin
             current_asmdata.getaddrlabel(TAsmLabel(pointer(labsym)));
             append_entry(DW_TAG_typedef,false,[
-              DW_AT_name,DW_FORM_string,symname(def.typesym)+#0
+              DW_AT_name,DW_FORM_string,symname(def.typesym, false)+#0
             ]);
             append_labelentry_ref(DW_AT_type,labsym);
             finish_entry;
@@ -2114,7 +2114,7 @@ implementation
         current_asmdata.asmlists[al_dwarf_info].concat(tai_comment.Create(strpnew('Procdef '+def.fullprocname(true))));
         if not is_objc_class_or_protocol(def.struct) then
           append_entry(DW_TAG_subprogram,true,
-            [DW_AT_name,DW_FORM_string,symname(def.procsym)+#0
+            [DW_AT_name,DW_FORM_string,symname(def.procsym, false)+#0
             { data continues below }
             { problem: base reg isn't known here
               DW_AT_frame_base,DW_FORM_block1,1
@@ -2318,7 +2318,7 @@ implementation
 
     procedure TDebugInfoDwarf.appendsym_var(list:TAsmList;sym:tabstractnormalvarsym);
       begin
-        appendsym_var_with_name_type_offset(list,sym,symname(sym),sym.vardef,0,[]);
+        appendsym_var_with_name_type_offset(list,sym,symname(sym, false),sym.vardef,0,[]);
       end;
 
 
@@ -2404,7 +2404,12 @@ implementation
                     if (vo_is_thread_var in sym.varoptions) then
                       begin
 { TODO: !!! FIXME: dwarf for thread vars !!!}
-                        blocksize:=0;
+{ This is only a minimal change to at least be able to get a value
+  in only one thread is present PM 2014-11-21, like for stabs format }
+                        templist.concat(tai_const.create_8bit(ord(DW_OP_addr)));
+                        templist.concat(tai_const.Create_type_name(aitconst_ptr,sym.mangledname,
+                          offset+sizeof(pint)));
+                        blocksize:=1+sizeof(puint);
                       end
                     else
                       begin
@@ -2554,7 +2559,7 @@ implementation
 
     procedure TDebugInfoDwarf.appendsym_fieldvar(list:TAsmList;sym: tfieldvarsym);
       begin
-        appendsym_fieldvar_with_name_offset(list,sym,symname(sym),sym.vardef,0);
+        appendsym_fieldvar_with_name_offset(list,sym,symname(sym, false),sym.vardef,0);
       end;
 
 
@@ -2601,7 +2606,7 @@ implementation
             if (target_info.endian=endian_little) then
               bitoffset:=(fieldnatsize*8)-bitoffset-sym.vardef.packedbitsize;
             append_entry(DW_TAG_member,false,[
-              DW_AT_name,DW_FORM_string,symname(sym)+#0,
+              DW_AT_name,DW_FORM_string,symname(sym, false)+#0,
               { gcc also generates both a bit and byte size attribute }
               { we don't support ordinals >= 256 bits }
               DW_AT_byte_size,DW_FORM_data1,fieldnatsize,
@@ -2642,7 +2647,7 @@ implementation
 
         if ismember then
           append_entry(DW_TAG_member,false,[
-            DW_AT_name,DW_FORM_string,symname(sym)+#0,
+            DW_AT_name,DW_FORM_string,symname(sym, false)+#0,
           { The DW_AT_declaration tag is invalid according to the DWARF specifications.
             But gcc adds this to static const members and gdb checks
             for this flag. So we have to set it also.
@@ -2652,7 +2657,7 @@ implementation
             ])
         else
           append_entry(DW_TAG_variable,false,[
-            DW_AT_name,DW_FORM_string,symname(sym)+#0
+            DW_AT_name,DW_FORM_string,symname(sym, false)+#0
             ]);
         { for string constants, constdef isn't set because they have no real type }
         case sym.consttyp of
@@ -2821,10 +2826,10 @@ implementation
           begin
             if (tosym.typ=fieldvarsym) then
               internalerror(2009031404);
-            appendsym_var_with_name_type_offset(list,tabstractnormalvarsym(tosym),symname(sym),sym.propdef,offset,[])
+            appendsym_var_with_name_type_offset(list,tabstractnormalvarsym(tosym),symname(sym, false),sym.propdef,offset,[])
           end
         else
-          appendsym_fieldvar_with_name_offset(list,tfieldvarsym(tosym),symname(sym),sym.propdef,offset)
+          appendsym_fieldvar_with_name_offset(list,tfieldvarsym(tosym),symname(sym, false),sym.propdef,offset)
       end;
 
 
@@ -2883,7 +2888,7 @@ implementation
                   flags:=[];
                   if (sym.owner.symtabletype=localsymtable) then
                     include(flags,dvf_force_local_var);
-                  appendsym_var_with_name_type_offset(list,tabstractnormalvarsym(tosym),symname(sym),tabstractvarsym(sym).vardef,offset,flags);
+                  appendsym_var_with_name_type_offset(list,tabstractnormalvarsym(tosym),symname(sym, false),tabstractvarsym(sym).vardef,offset,flags);
                 end;
               templist.free;
               exit;
@@ -2893,7 +2898,7 @@ implementation
         end;
 
         append_entry(DW_TAG_variable,false,[
-          DW_AT_name,DW_FORM_string,symname(sym)+#0,
+          DW_AT_name,DW_FORM_string,symname(sym, false)+#0,
           {
           DW_AT_decl_file,DW_FORM_data1,0,
           DW_AT_decl_line,DW_FORM_data1,
@@ -2914,7 +2919,7 @@ implementation
 
     procedure TDebugInfoDwarf.beforeappendsym(list:TAsmList;sym:tsym);
       begin
-        current_asmdata.asmlists[al_dwarf_info].concat(tai_comment.Create(strpnew('Symbol '+symname(sym))));
+        current_asmdata.asmlists[al_dwarf_info].concat(tai_comment.Create(strpnew('Symbol '+symname(sym, true))));
       end;
 
 
@@ -3270,7 +3275,7 @@ implementation
       end;
 
 
-    function TDebugInfoDwarf.symname(sym: tsym): String;
+    function TDebugInfoDwarf.symname(sym: tsym; manglename: boolean): String;
       begin
         if (sym.typ=paravarsym) and
            (vo_is_self in tparavarsym(sym).varoptions) then
@@ -3290,9 +3295,20 @@ implementation
         else if (ds_dwarf_method_class_prefix in current_settings.debugswitches) and
                 (sym.typ=procsym) and
                 (tprocsym(sym).owner.symtabletype in [objectsymtable,recordsymtable]) then
-          result:=tprocsym(sym).owner.name^+'__'+symdebugname(sym)
+          begin
+            result:=tprocsym(sym).owner.name^+'__';
+            if manglename then
+              result := result + sym.name
+            else
+              result := result + symdebugname(sym);
+          end
         else
-          result:=symdebugname(sym);
+          begin
+            if manglename then
+              result := sym.name
+            else
+              result := symdebugname(sym);
+          end;
       end;
 
 
@@ -3316,6 +3332,7 @@ implementation
         currfileinfo,
         lastfileinfo : tfileposinfo;
         currfuncname : pshortstring;
+        currstatement: boolean;
         currsectype  : TAsmSectiontype;
         hp, hpend : tai;
         infile : tinputfile;
@@ -3334,6 +3351,7 @@ implementation
         currfuncname:=nil;
         currsectype:=sec_code;
         hp:=Tai(list.first);
+        currstatement:=true;
         prevcolumn := 0;
         prevline := 1;
         prevfileidx := 1;
@@ -3365,8 +3383,7 @@ implementation
             end;
 
             if (currsectype=sec_code) and
-               (hp.typ=ait_instruction) and
-               (nolineinfolevel=0) then
+               (hp.typ=ait_instruction) then
               begin
                 currfileinfo:=tailineinfo(hp).fileinfo;
                 { file changed ? (must be before line info) }
@@ -3397,8 +3414,12 @@ implementation
                       end;
                   end;
 
+                { Set the line-nr to 0 if the code does not corresponds to a particular line  }
+                if nolineinfolevel>0 then
+                  currfileinfo.line := 0;
+
                 { line changed ? }
-                if (lastfileinfo.line<>currfileinfo.line) and (currfileinfo.line<>0) then
+                if (lastfileinfo.line<>currfileinfo.line) and ((currfileinfo.line<>0) or (nolineinfolevel>0)) then
                   begin
                     { set address }
                     current_asmdata.getlabel(currlabel, alt_dbgline);
@@ -3431,6 +3452,19 @@ implementation
                         prevcolumn := currfileinfo.column;
                       end;
 
+                    { set statement }
+                    if (currfileinfo.line=0) and currstatement then
+                      begin
+                        currstatement := false;
+                        asmline.concat(tai_const.create_8bit(DW_LNS_negate_stmt));
+                      end;
+
+                    if not currstatement and (currfileinfo.line>0) then
+                      begin
+                        currstatement := true;
+                        asmline.concat(tai_const.create_8bit(DW_LNS_negate_stmt));
+                      end;
+
                     { set line }
                     diffline := currfileinfo.line - prevline;
                     if (diffline >= LINE_BASE) and (OPCODE_BASE + diffline - LINE_BASE <= 255) then
@@ -3517,7 +3551,7 @@ implementation
           file recs. are less than 1k so using data2 is enough }
         if assigned(def.typesym) then
           append_entry(DW_TAG_structure_type,false,[
-           DW_AT_name,DW_FORM_string,symname(def.typesym)+#0,
+           DW_AT_name,DW_FORM_string,symname(def.typesym, false)+#0,
            DW_AT_byte_size,DW_FORM_udata,def.size
           ])
         else
@@ -3660,7 +3694,7 @@ implementation
 
             if assigned(def.typesym) then
               append_entry(DW_TAG_set_type,false,[
-                DW_AT_name,DW_FORM_string,symname(def.typesym)+#0,
+                DW_AT_name,DW_FORM_string,symname(def.typesym, false)+#0,
                 DW_AT_byte_size,DW_FORM_data2,def.size
                 ])
             else
@@ -3696,7 +3730,7 @@ implementation
             { info of modules that contain set tags                          }
             if assigned(def.typesym) then
               append_entry(DW_TAG_base_type,false,[
-                DW_AT_name,DW_FORM_string,symname(def.typesym)+#0,
+                DW_AT_name,DW_FORM_string,symname(def.typesym, false)+#0,
                 DW_AT_encoding,DW_FORM_data1,DW_ATE_unsigned,
                 DW_AT_byte_size,DW_FORM_data2,def.size
                 ])
@@ -3764,7 +3798,7 @@ implementation
 
         if assigned(def.typesym) then
           append_entry(DW_TAG_array_type,true,[
-            DW_AT_name,DW_FORM_string,symname(def.typesym)+#0,
+            DW_AT_name,DW_FORM_string,symname(def.typesym, false)+#0,
             DW_AT_data_location,DW_FORM_block1,2
             ])
         else
@@ -3935,7 +3969,7 @@ implementation
       begin
         if assigned(def.typesym) then
           append_entry(DW_TAG_file_type,false,[
-            DW_AT_name,DW_FORM_string,symname(def.typesym)+#0,
+            DW_AT_name,DW_FORM_string,symname(def.typesym, false)+#0,
             DW_AT_byte_size,DW_FORM_data2,def.size
             ])
         else
@@ -4073,7 +4107,7 @@ implementation
         { ??? can a undefined def have a typename ? }
         if assigned(def.typesym) then
           append_entry(DW_TAG_unspecified_type,false,[
-            DW_AT_name,DW_FORM_string,symname(def.typesym)+#0
+            DW_AT_name,DW_FORM_string,symname(def.typesym, false)+#0
             ])
         else
           append_entry(DW_TAG_unspecified_type,false,[

+ 6 - 10
compiler/dbgstabs.pas

@@ -1408,20 +1408,16 @@ implementation
             if target_info.system in systems_dotted_function_names then
               mangledname:='.'+mangledname;
             // LBRAC
-            ss:=tostr(STABS_N_LBRAC)+',0,0,'+mangledname;
-            if not(af_stabs_use_function_absolute_addresses in target_asm.flags) then
-              begin
-                ss:=ss+'-';
-                ss:=ss+mangledname;
-              end;
+            if af_stabs_use_function_absolute_addresses in target_asm.flags then
+              ss:=tostr(STABS_N_LBRAC)+',0,0,'+mangledname
+            else
+              ss:=tostr(STABS_N_LBRAC)+',0,0,0';
             result.concat(Tai_stab.Create_ansistr(stab_stabn,ss));
+
             // RBRAC
             ss:=tostr(STABS_N_RBRAC)+',0,0,'+stabsendlabel.name;
             if not(af_stabs_use_function_absolute_addresses in target_asm.flags) then
-              begin
-                ss:=ss+'-';
-                ss:=ss+mangledname;
-              end;
+              ss:=ss+'-'+mangledname;
             result.concat(Tai_stab.Create_ansistr(stab_stabn,ss));
 
             { the stabsendlabel must come after all other stabs for this }

+ 27 - 9
compiler/defcmp.pas

@@ -1533,6 +1533,10 @@ implementation
                    begin
                      { procvar -> procvar }
                      eq:=proc_to_procvar_equal(tprocvardef(def_from),tprocvardef(def_to),cdo_warn_incompatible_univ in cdoptions);
+                     if eq<te_equal then
+                       doconv:=tc_proc_2_procvar
+                     else
+                       doconv:=tc_equal;
                    end;
                  pointerdef :
                    begin
@@ -2176,30 +2180,38 @@ implementation
          if not(assigned(def1)) or not(assigned(def2)) then
            exit;
          { check for method pointer and local procedure pointer:
-             a) if one is a procedure of object, the other also has to be one
-             b) if one is a pure address, the other also has to be one
+             a) anything but procvars can be assigned to blocks
+             b) if one is a procedure of object, the other also has to be one
+                (except for block)
+             c) if one is a pure address, the other also has to be one
                 except if def1 is a global proc and def2 is a nested procdef
                 (global procedures can be converted into nested procvars)
-             c) if def1 is a nested procedure, then def2 has to be a nested
+             d) if def1 is a nested procedure, then def2 has to be a nested
                 procvar and def1 has to have the po_delphi_nested_cc option
-             d) if def1 is a procvar, def1 and def2 both have to be nested or
+             e) if def1 is a procvar, def1 and def2 both have to be nested or
                 non-nested (we don't allow assignments from non-nested to
                 nested procvars to make sure that we can still implement
                 nested procvars using trampolines -- e.g., this would be
                 necessary for LLVM or CIL as long as they do not have support
                 for Delphi-style frame pointer parameter passing) }
-         if (def1.is_methodpointer<>def2.is_methodpointer) or  { a) }
-            ((def1.is_addressonly<>def2.is_addressonly) and    { b) }
+         if is_block(def2) then                                     { a) }
+           { can't explicitly check against procvars here, because
+             def1 may already be a procvar due to a proc_to_procvar;
+             this is checked in the type conversion node itself -> ok }
+         else if (def1.is_methodpointer<>def2.is_methodpointer) or  { b) }
+            ((def1.is_addressonly<>def2.is_addressonly) and         { c) }
              (is_nested_pd(def1) or
               not is_nested_pd(def2))) or
-            ((def1.typ=procdef) and                            { c) }
+            ((def1.typ=procdef) and                                 { d) }
              is_nested_pd(def1) and
              (not(po_delphi_nested_cc in def1.procoptions) or
               not is_nested_pd(def2))) or
-            ((def1.typ=procvardef) and                         { d) }
+            ((def1.typ=procvardef) and                              { e) }
              (is_nested_pd(def1)<>is_nested_pd(def2))) then
            exit;
          pa_comp:=[cpo_ignoreframepointer];
+         if is_block(def2) then
+           include(pa_comp,cpo_ignorehidden);
          if checkincompatibleuniv then
            include(pa_comp,cpo_warn_incompatible_univ);
          { check return value and options, methodpointer is already checked }
@@ -2209,7 +2221,10 @@ implementation
            include(po_comp,po_staticmethod);
          if (m_delphi in current_settings.modeswitches) then
            exclude(po_comp,po_varargs);
-         if (def1.proccalloption=def2.proccalloption) and
+         { for blocks, the calling convention doesn't matter because we have to
+           generate a wrapper anyway }
+         if ((po_is_block in def2.procoptions) or
+             (def1.proccalloption=def2.proccalloption)) and
             ((po_comp * def1.procoptions)= (po_comp * def2.procoptions)) and
             equal_defs(def1.returndef,def2.returndef) then
           begin
@@ -2224,6 +2239,9 @@ implementation
                 { prefer non-nested to non-nested over non-nested to nested }
                 if (is_nested_pd(def1)<>is_nested_pd(def2)) then
                   eq:=te_convert_l1;
+                { in case of non-block to block, we need a type conversion }
+                if (po_is_block in def1.procoptions) <> (po_is_block in def2.procoptions) then
+                  eq:=te_convert_l1;
               end;
             proc_to_procvar_equal:=eq;
           end;

+ 4 - 17
compiler/defutil.pas

@@ -331,13 +331,8 @@ interface
     { returns true of def is a methodpointer }
     function is_methodpointer(def : tdef) : boolean;
 
-{$ifdef i8086}
-    {# Returns true if p is a far pointer def }
-    function is_farpointer(p : tdef) : boolean;
-
-    {# Returns true if p is a huge pointer def }
-    function is_hugepointer(p : tdef) : boolean;
-{$endif i8086}
+    { returns true if def is a C "block" }
+    function is_block(def: tdef): boolean;
 
 implementation
 
@@ -1433,18 +1428,10 @@ implementation
         result:=(def.typ=procvardef) and (po_methodpointer in tprocvardef(def).procoptions);
       end;
 
-{$ifdef i8086}
-    { true if p is a far pointer def }
-    function is_farpointer(p : tdef) : boolean;
-      begin
-        result:=(p.typ=pointerdef) and (tcpupointerdef(p).x86pointertyp=x86pt_far);
-      end;
 
-    { true if p is a huge pointer def }
-    function is_hugepointer(p : tdef) : boolean;
+    function is_block(def: tdef): boolean;
       begin
-        result:=(p.typ=pointerdef) and (tcpupointerdef(p).x86pointertyp=x86pt_huge);
+        result:=(def.typ=procvardef) and (po_is_block in tprocvardef(def).procoptions)
       end;
-{$endif i8086}
 
 end.

+ 1 - 1
compiler/expunix.pas

@@ -154,7 +154,7 @@ begin
            current_asmdata.asmlists[al_procedures].concat(Tai_symbol.Createname_global(hp2.name^,AT_FUNCTION,0));
            if (cs_create_pic in current_settings.moduleswitches) and
              { other targets need to be checked how it works }
-             (target_info.system in [system_i386_freebsd,system_x86_64_freebsd,system_x86_64_linux,system_i386_linux,system_x86_64_solaris,system_i386_solaris,system_i386_android]) then
+             (target_info.system in [system_i386_freebsd,system_x86_64_freebsd,system_x86_64_linux,system_i386_linux,system_x86_64_solaris,system_i386_solaris,system_i386_android,system_x86_64_dragonfly]) then
              begin
 {$ifdef x86}
                sym:=current_asmdata.RefAsmSymbol(pd.mangledname);

+ 1 - 1
compiler/finput.pas

@@ -454,7 +454,7 @@ uses
         fileopen:=false;
         try
           f:=CFileStreamClass.Create(filename,fmOpenRead);
-          fileopen:=true;
+          fileopen:=CStreamError=0;
         except
         end;
       end;

+ 1 - 0
compiler/fpcdefs.inc

@@ -242,6 +242,7 @@
   {$define cpurox}
   {$define cputargethasfixedstack}
   {$define cpurefshaveindexreg}
+  {$define SUPPORT_GET_FRAME}
 {$endif aarch64}
 
 {$IFDEF MACOS}

+ 10 - 2
compiler/fppu.pas

@@ -23,6 +23,8 @@ unit fppu;
 
 {$i fpcdefs.inc}
 
+{ $define DEBUG_UNIT_CRC_CHANGES}
+
 { close ppufiles on system that are
   short on file handles like DOS system PM }
 {$ifdef GO32V2}
@@ -836,7 +838,13 @@ var
                 end;
              end
            else
-             temp:=' not available';
+             begin
+               { still register the source module for proper error messages
+                 since source_avail for the module is still false, this should not hurt }
+               sourcefiles.register_file(tdosinputfile.create(hs));
+
+               temp:=' not available';
+             end;
            if is_main then
              begin
                mainsource:=hs;
@@ -1471,7 +1479,7 @@ var
         { we can now derefence all pointers to the implementation parts }
         tstoredsymtable(globalsymtable).derefimpl;
         if assigned(localsymtable) then
-          tstoredsymtable(localsymtable).derefimpl;
+            tstoredsymtable(localsymtable).derefimpl;
 
          { read whole program optimisation-related information }
          wpoinfo:=tunitwpoinfo.ppuload(ppufile);

+ 24 - 0
compiler/generic/cpuinfo.pas

@@ -22,6 +22,13 @@ Interface
 
 Type
    bestreal = extended;
+{$if FPC_FULLVERSION>20700}
+{$ifdef FPC_HAS_TYPE_EXTENDED}
+   bestrealrec = TExtended80Rec;
+{$else}
+   bestrealrec = TDoubleRec;
+{$endif}
+{$endif FPC_FULLVERSION>20700}
    ts32real = single;
    ts64real = double;
    ts80real = type extended;
@@ -42,7 +49,24 @@ Type
       fpu_soft
      );
 
+   tcontrollertype =
+     (ct_none
+     );
+
 Const
+   { Is there support for dealing with multiple microcontrollers available }
+   { for this platform? }
+   ControllerSupport = false;
+
+   { We know that there are fields after sramsize
+     but we don't care about this warning }
+   {$PUSH}
+    {$WARN 3177 OFF}
+   embedded_controllers : array [tcontrollertype] of tcontrollerdatatype =
+   (
+      (controllertypestr:''; controllerunitstr:''; flashbase:0; flashsize:0; srambase:0; sramsize:0));
+   {$POP}
+
    cputypestr : array[tcputype] of string[8] = ('none');
    fputypestr : array[tfputype] of string[6] = ('none','soft');
 

+ 51 - 37
compiler/globals.pas

@@ -41,7 +41,6 @@ interface
 {$ELSE}
       fksysutl,
 {$ENDIF}
-
       { comphook pulls in sysutils anyways }
       cutils,cclasses,cfileutl,
       cpuinfo,
@@ -52,7 +51,7 @@ interface
          [m_delphi,m_class,m_objpas,m_result,m_string_pchar,
           m_pointer_2_procedure,m_autoderef,m_tp_procvar,m_initfinal,m_default_ansistring,
           m_out,m_default_para,m_duplicate_names,m_hintdirective,
-          m_property,m_default_inline,m_except,m_advanced_records];
+          m_property,m_default_inline,m_except,m_advanced_records,m_type_helpers];
        delphiunicodemodeswitches = delphimodeswitches + [m_systemcodepage,m_default_unicodestring];
        fpcmodeswitches =
          [m_fpc,m_string_pchar,m_nested_comment,m_repeat_forward,
@@ -84,23 +83,23 @@ interface
        treelogfilename = 'tree.log';
 
 {$if defined(CPUARM) and defined(FPUFPA)}
-       MathQNaN : tdoublerec = (bytes : (0,0,252,255,0,0,0,0));
-       MathInf : tdoublerec = (bytes : (0,0,240,127,0,0,0,0));
-       MathNegInf : tdoublerec = (bytes : (0,0,240,255,0,0,0,0));
-       MathPi : tdoublerec =  (bytes : (251,33,9,64,24,45,68,84));
+       MathQNaN : tcompdoublerec = (bytes : (0,0,252,255,0,0,0,0));
+       MathInf : tcompdoublerec = (bytes : (0,0,240,127,0,0,0,0));
+       MathNegInf : tcompdoublerec = (bytes : (0,0,240,255,0,0,0,0));
+       MathPi : tcompdoublerec =  (bytes : (251,33,9,64,24,45,68,84));
 {$else}
 {$ifdef FPC_LITTLE_ENDIAN}
-       MathQNaN : tdoublerec = (bytes : (0,0,0,0,0,0,252,255));
-       MathInf : tdoublerec = (bytes : (0,0,0,0,0,0,240,127));
-       MathNegInf : tdoublerec = (bytes : (0,0,0,0,0,0,240,255));
-       MathPi : tdoublerec = (bytes : (24,45,68,84,251,33,9,64));
-       MathPiExtended : textendedrec = (bytes : (53,194,104,33,162,218,15,201,0,64));
+       MathQNaN : tcompdoublerec = (bytes : (0,0,0,0,0,0,252,255));
+       MathInf : tcompdoublerec = (bytes : (0,0,0,0,0,0,240,127));
+       MathNegInf : tcompdoublerec = (bytes : (0,0,0,0,0,0,240,255));
+       MathPi : tcompdoublerec = (bytes : (24,45,68,84,251,33,9,64));
+       MathPiExtended : tcompextendedrec = (bytes : (53,194,104,33,162,218,15,201,0,64));
 {$else FPC_LITTLE_ENDIAN}
-       MathQNaN : tdoublerec = (bytes : (255,252,0,0,0,0,0,0));
-       MathInf : tdoublerec = (bytes : (127,240,0,0,0,0,0,0));
-       MathNegInf : tdoublerec = (bytes : (255,240,0,0,0,0,0,0));
-       MathPi : tdoublerec =  (bytes : (64,9,33,251,84,68,45,24));
-       MathPiExtended : textendedrec = (bytes : (64,0,201,15,218,162,33,104,194,53));
+       MathQNaN : tcompdoublerec = (bytes : (255,252,0,0,0,0,0,0));
+       MathInf : tcompdoublerec = (bytes : (127,240,0,0,0,0,0,0));
+       MathNegInf : tcompdoublerec = (bytes : (255,240,0,0,0,0,0,0));
+       MathPi : tcompdoublerec =  (bytes : (64,9,33,251,84,68,45,24));
+       MathPiExtended : tcompextendedrec = (bytes : (64,0,201,15,218,162,33,104,194,53));
 {$endif FPC_LITTLE_ENDIAN}
 {$endif}
 
@@ -163,9 +162,8 @@ interface
 {$endif defined(ARM)}
 
         { CPU targets with microcontroller support can add a controller specific unit }
-{$if defined(ARM) or defined(AVR) or defined(MIPSEL)}
          controllertype   : tcontrollertype;
-{$endif defined(ARM) or defined(AVR) or defined(MIPSEL)}
+
          { WARNING: this pointer cannot be written as such in record token }
          pmessage : pmessagestaterecord;
        end;
@@ -238,7 +236,9 @@ interface
        paralinkoptions   : TCmdStr;
        paradynamiclinker : string;
        paraprintnodetree : byte;
+{$ifdef PREPROCWRITE}
        parapreprocess    : boolean;
+{$endif PREPROCWRITE}
        printnodefile     : text;
 
        {  typical cross compiling params}
@@ -411,7 +411,7 @@ interface
 {$endif i8086}
         maxfpuregisters : 0;
 
-{ Note: GENERIC_CPU is sued together with generic subdirectory to
+{ Note: GENERIC_CPU is used together with generic subdirectory to
   be able to compile some of the units without any real CPU.
   This is used to generate a CPU independant PPUDUMP utility. PM }
 {$ifdef GENERIC_CPU}
@@ -445,8 +445,8 @@ interface
         fputype : fpu_hard;
   {$endif sparc}
   {$ifdef arm}
-        cputype : cpu_armv3;
-        optimizecputype : cpu_armv3;
+        cputype : cpu_armv4;
+        optimizecputype : cpu_armv4;
         fputype : fpu_fpa;
   {$endif arm}
   {$ifdef x86_64}
@@ -502,9 +502,7 @@ interface
 {$if defined(ARM)}
         instructionset : is_arm;
 {$endif defined(ARM)}
-{$if defined(ARM) or defined(AVR) or defined(MIPSEL)}
         controllertype : ct_none;
-{$endif defined(ARM) or defined(AVR) or defined(MIPSEL)}
         pmessage : nil;
       );
 
@@ -536,9 +534,7 @@ interface
     function Setoptimizecputype(const s:string;var a:tcputype):boolean;
     function Setcputype(const s:string;var a:tsettings):boolean;
     function SetFpuType(const s:string;var a:tfputype):boolean;
-{$if defined(arm) or defined(avr) or defined(mipsel)}
     function SetControllerType(const s:string;var a:tcontrollertype):boolean;
-{$endif defined(arm) or defined(avr) or defined(mipsel)}
     function IncludeFeature(const s : string) : boolean;
     function SetMinFPConstPrec(const s: string; var a: tfloattype) : boolean;
 
@@ -838,6 +834,13 @@ implementation
            Replace(s,'$FPCTARGET',target_os_string)
          else
            Replace(s,'$FPCTARGET',target_full_string);
+         Replace(s,'$FPCSUBARCH',lower(cputypestr[init_settings.cputype]));
+         Replace(s,'$FPCABI',lower(abiinfo[target_info.abi].name));
+{$ifdef i8086}
+         Replace(s,'$FPCMEMORYMODEL',lower(x86memorymodelstr[init_settings.x86memorymodel]));
+{$else i8086}
+         Replace(s,'$FPCMEMORYMODEL','flat');
+{$endif i8086}
 {$ifdef mswindows}
          ReplaceSpecialFolder('$LOCAL_APPDATA',CSIDL_LOCAL_APPDATA);
          ReplaceSpecialFolder('$APPDATA',CSIDL_APPDATA);
@@ -968,7 +971,7 @@ implementation
           result := -1;
       end;
 
-    function convertdoublerec(d : tdoublerec) : tdoublerec;{$ifdef USEINLINE}inline;{$endif}
+    function convertdoublerec(d : tcompdoublerec) : tcompdoublerec;{$ifdef USEINLINE}inline;{$endif}
 {$ifdef CPUARM}
       var
         i : longint;
@@ -1177,23 +1180,34 @@ implementation
       end;
 
 
-{$if defined(arm) or defined(avr) or defined(mipsel)}
     function SetControllerType(const s:string;var a:tcontrollertype):boolean;
       var
         t  : tcontrollertype;
         hs : string;
       begin
-        result:=false;
-        hs:=Upper(s);
-        for t:=low(tcontrollertype) to high(tcontrollertype) do
-          if embedded_controllers[t].controllertypestr=hs then
-            begin
-              a:=t;
-              result:=true;
-              break;
-            end;
+{ The following check allows to reduce amount of code for platforms  }
+{ not supporting microcontrollers due to evaluation at compile time. }
+{$PUSH}
+ {$WARN 6018 OFF} (* Unreachable code due to compile time evaluation *)
+        if ControllerSupport then
+         begin
+          result:=false;
+          hs:=Upper(s);
+          for t:=low(tcontrollertype) to high(tcontrollertype) do
+            if embedded_controllers[t].controllertypestr=hs then
+              begin
+                a:=t;
+                result:=true;
+                break;
+              end;
+         end
+        else
+         begin
+          a := ct_none;
+          Result := true;
+         end;
+{$POP}
       end;
-{$endif defined(arm) or defined(avr) or defined(mipsel)}
 
 
     function IncludeFeature(const s : string) : boolean;

+ 20 - 8
compiler/globtype.pas

@@ -110,12 +110,12 @@ interface
 {$endif i8086}
 
        { Use a variant record to be sure that the array if aligned correctly }
-       tdoublerec=record
+       tcompdoublerec=record
          case byte of
            0 : (bytes:array[0..7] of byte);
            1 : (value:double);
        end;
-       textendedrec=record
+       tcompextendedrec=record
          case byte of
            0 : (bytes:array[0..9] of byte);
            1 : (value:extended);
@@ -267,7 +267,7 @@ interface
      type
        { optimizer }
        toptimizerswitch = (cs_opt_none,
-         cs_opt_level1,cs_opt_level2,cs_opt_level3,
+         cs_opt_level1,cs_opt_level2,cs_opt_level3,cs_opt_level4,
          cs_opt_regvar,cs_opt_uncertain,cs_opt_size,cs_opt_stackframe,
          cs_opt_peephole,cs_opt_asmcse,cs_opt_loopunroll,cs_opt_tailrecursion,cs_opt_nodecse,
          cs_opt_nodedfa,cs_opt_loopstrength,cs_opt_scheduler,cs_opt_autoinline,cs_useebp,cs_userbp,
@@ -313,7 +313,7 @@ interface
 
     const
        OptimizerSwitchStr : array[toptimizerswitch] of string[17] = ('',
-         'LEVEL1','LEVEL2','LEVEL3',
+         'LEVEL1','LEVEL2','LEVEL3','LEVEL4',
          'REGVAR','UNCERTAIN','SIZE','STACKFRAME',
          'PEEPHOLE','ASMCSE','LOOPUNROLL','TAILREC','CSE',
          'DFA','STRENGTH','SCHEDULE','AUTOINLINE','USEEBP','USERBP',
@@ -345,7 +345,7 @@ interface
        genericlevel1optimizerswitches = [cs_opt_level1,cs_opt_peephole];
        genericlevel2optimizerswitches = [cs_opt_level2,cs_opt_remove_emtpy_proc];
        genericlevel3optimizerswitches = [cs_opt_level3,cs_opt_constant_propagate,cs_opt_nodedfa];
-       genericlevel4optimizerswitches = [cs_opt_reorder_fields,cs_opt_dead_values,cs_opt_fastmath];
+       genericlevel4optimizerswitches = [cs_opt_level4,cs_opt_reorder_fields,cs_opt_dead_values,cs_opt_fastmath];
 
        { whole program optimizations whose information generation requires
          information from all loaded units
@@ -400,8 +400,9 @@ interface
                                   fields in Java) }
          m_default_unicodestring, { makes the default string type in $h+ mode unicodestring rather than
                                     ansistring; similarly, char becomes unicodechar rather than ansichar }
-         m_type_helpers         { allows the declaration of "type helper" (non-Delphi) or "record helper"
+         m_type_helpers,        { allows the declaration of "type helper" (non-Delphi) or "record helper"
                                   (Delphi) for primitive types }
+         m_blocks               { support for http://en.wikipedia.org/wiki/Blocks_(C_language_extension) }
        );
        tmodeswitches = set of tmodeswitch;
 
@@ -566,7 +567,8 @@ interface
          'SYSTEMCODEPAGE',
          'FINALFIELDS',
          'UNICODESTRINGS',
-         'TYPEHELPERS');
+         'TYPEHELPERS',
+         'CBLOCKS');
 
 
      type
@@ -615,7 +617,9 @@ interface
          { allocates memory on stack, so stack is unbalanced on exit }
          pi_has_stack_allocs,
          { set if the stack frame of the procedure is estimated }
-         pi_estimatestacksize
+         pi_estimatestacksize,
+         { the routine calls a C-style varargs function }
+         pi_calls_c_varargs
        );
        tprocinfoflags=set of tprocinfoflag;
 
@@ -705,6 +709,14 @@ interface
 
     type
       tx86memorymodel = (mm_tiny,mm_small,mm_medium,mm_compact,mm_large,mm_huge);
+    const
+      x86memorymodelstr : array[tx86memorymodel] of string[7]=(
+        'TINY',
+        'SMALL',
+        'MEDIUM',
+        'COMPACT',
+        'LARGE',
+        'HUGE');
 
   { hide Sysutils.ExecuteProcess in units using this one after SysUtils}
   const

+ 4 - 3
compiler/hlcg2ll.pas

@@ -171,7 +171,7 @@ unit hlcg2ll;
           procedure a_loadaddr_ref_reg(list : TAsmList;fromsize, tosize : tdef;const ref : treference;r : tregister);override;
 
           { bit scan instructions }
-          procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tdef; src, dst: tregister); override;
+          procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tdef; src, dst: tregister); override;
 
           { fpu move instructions }
           procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister); override;
@@ -586,9 +586,9 @@ implementation
       cg.a_loadaddr_ref_reg(list,ref,r);
     end;
 
-  procedure thlcg2ll.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tdef; src, dst: tregister);
+  procedure thlcg2ll.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tdef; src, dst: tregister);
     begin
-      cg.a_bit_scan_reg_reg(list,reverse,def_cgsize(size),src,dst);
+      cg.a_bit_scan_reg_reg(list,reverse,def_cgsize(srcsize),def_cgsize(dstsize),src,dst);
     end;
 
   procedure thlcg2ll.a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister);
@@ -656,6 +656,7 @@ implementation
         internalerror(2012071226);
       tocgsize:=getintmmcgsize(reg,def_cgmmsize(tosize));
       case loc.loc of
+        LOC_CONSTANT,
         LOC_SUBSETREG,LOC_CSUBSETREG,
         LOC_SUBSETREF,LOC_CSUBSETREF:
           begin

+ 30 - 14
compiler/hlcgobj.pas

@@ -301,7 +301,7 @@ unit hlcgobj;
          public
 
           { bit scan instructions (still need transformation to thlcgobj) }
-          procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tdef; src, dst: tregister); virtual; abstract;
+          procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tdef; src, dst: tregister); virtual; abstract;
 
           { fpu move instructions }
           procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister); virtual; abstract;
@@ -536,9 +536,9 @@ unit hlcgobj;
          public
 
           procedure gen_load_para_value(list:TAsmList);virtual;
-         protected
           { helpers called by gen_load_para_value }
           procedure g_copyvalueparas(p:TObject;arg:pointer);virtual;
+         protected
           procedure gen_loadfpu_loc_cgpara(list: TAsmList; size: tdef; const l: tlocation;const cgpara: tcgpara;locintsize: longint);virtual;
           procedure init_paras(p:TObject;arg:pointer);
          protected
@@ -2311,7 +2311,9 @@ implementation
 
   function thlcgobj.get_bit_reg_ref_sref(list: TAsmList; bitnumbersize, refsize: tdef; bitnumber: tregister; const ref: treference): tsubsetreference;
     var
-      tmpreg: tregister;
+      refptrdef: tdef;
+      tmpreg,
+      newbase: tregister;
     begin
       result.ref:=ref;
       result.startbit:=0;
@@ -2323,13 +2325,15 @@ implementation
 
       { don't assign to ref.base, that one is for pointers and this is an index
         (important for platforms like LLVM) }
-      if (result.ref.index=NR_NO) then
-        result.ref.index:=tmpreg
-      else
+      if result.ref.index<>NR_NO then
         begin
-          a_op_reg_reg(list,OP_ADD,ptruinttype,result.ref.index,tmpreg);
-          result.ref.index:=tmpreg;
+          { don't just add to ref.index, as it may be scaled }
+          refptrdef:=getpointerdef(refsize);
+          newbase:=getaddressregister(list,refptrdef);
+          a_loadaddr_ref_reg(list,refsize,refptrdef,ref,newbase);
+          reference_reset_base(result.ref,refptrdef,newbase,0,result.ref.alignment);
         end;
+      result.ref.index:=tmpreg;
       tmpreg:=getintregister(list,ptruinttype);
       a_load_reg_reg(list,bitnumbersize,ptruinttype,bitnumber,tmpreg);
       a_op_const_reg(list,OP_AND,ptruinttype,7,tmpreg);
@@ -4387,7 +4391,7 @@ implementation
     var
       href : treference;
     begin
-      if (tsym(p).typ=staticvarsym) then
+      if (tsym(p).typ=staticvarsym) and not(tstaticvarsym(p).noregvarinitneeded) then
        begin
          { Static variables can have the initialloc only set to LOC_CxREGISTER
            or LOC_INVALID, for explaination see gen_alloc_symtable (PFV) }
@@ -4519,7 +4523,11 @@ implementation
          if (tparavarsym(p).varspez=vs_value) then
           begin
             include(current_procinfo.flags,pi_needs_implicit_finally);
-            location_get_data_ref(list,tparavarsym(p).vardef,tparavarsym(p).localloc,href,is_open_array(tparavarsym(p).vardef),sizeof(pint));
+            location_get_data_ref(list,tparavarsym(p).vardef,tparavarsym(p).localloc,href,
+              is_open_array(tparavarsym(p).vardef) or
+              ((target_info.system in systems_caller_copy_addr_value_para) and
+               paramanager.push_addr_param(vs_value,tparavarsym(p).vardef,current_procinfo.procdef.proccalloption)),
+              sizeof(pint));
             if is_open_array(tparavarsym(p).vardef) then
               begin
                 if paramanager.push_high_param(tparavarsym(p).varspez,tparavarsym(p).vardef,current_procinfo.procdef.proccalloption) then
@@ -4539,7 +4547,8 @@ implementation
           end;
        end;
       { open arrays can contain elements requiring init/final code, so the else has been removed here }
-      if (tparavarsym(p).varspez=vs_value) and
+      if not(target_info.system in systems_caller_copy_addr_value_para) and
+         (tparavarsym(p).varspez=vs_value) and
          (is_open_array(tparavarsym(p).vardef) or
           is_array_of_const(tparavarsym(p).vardef)) then
         begin
@@ -4577,7 +4586,11 @@ implementation
                  if not((tparavarsym(p).vardef.typ=variantdef) and
                    paramanager.push_addr_param(tparavarsym(p).varspez,tparavarsym(p).vardef,current_procinfo.procdef.proccalloption)) then
                    begin
-                     location_get_data_ref(list,tparavarsym(p).vardef,tparavarsym(p).initialloc,href,is_open_array(tparavarsym(p).vardef),sizeof(pint));
+                     location_get_data_ref(list,tparavarsym(p).vardef,tparavarsym(p).initialloc,href,
+                       is_open_array(tparavarsym(p).vardef) or
+                       ((target_info.system in systems_caller_copy_addr_value_para) and
+                        paramanager.push_addr_param(vs_value,tparavarsym(p).vardef,current_procinfo.procdef.proccalloption)),
+                       sizeof(pint));
                      if is_open_array(tparavarsym(p).vardef) then
                        begin
                          if paramanager.push_high_param(tparavarsym(p).varspez,tparavarsym(p).vardef,current_procinfo.procdef.proccalloption) then
@@ -4678,8 +4691,11 @@ implementation
     begin
       list:=TAsmList(arg);
       if (tsym(p).typ=paravarsym) and
-         (tparavarsym(p).varspez=vs_value) and
-        (paramanager.push_addr_param(tparavarsym(p).varspez,tparavarsym(p).vardef,current_procinfo.procdef.proccalloption)) then
+         ((vo_has_local_copy in tparavarsym(p).varoptions) or
+          (not(target_info.system in systems_caller_copy_addr_value_para) and
+           (is_open_array(tparavarsym(p).vardef) or
+            is_array_of_const(tparavarsym(p).vardef)) and
+           (tparavarsym(p).varspez=vs_value))) then
         begin
           { we have no idea about the alignment at the caller side }
           location_get_data_ref(list,tparavarsym(p).vardef,tparavarsym(p).initialloc,href,true,1);

+ 63 - 22
compiler/htypechk.pas

@@ -26,7 +26,7 @@ unit htypechk;
 interface
 
     uses
-      cclasses,tokens,cpuinfo,
+      cclasses,cmsgs,tokens,cpuinfo,
       node,globtype,
       symconst,symtype,symdef,symsym,symbase;
 
@@ -178,6 +178,8 @@ interface
       arrays, records and objects are checked recursively }
     function is_valid_for_default(def:tdef):boolean;
 
+    procedure UninitializedVariableMessage(pos : tfileposinfo;warning,local,managed : boolean;name : TMsgStr);
+
 implementation
 
     uses
@@ -1092,6 +1094,23 @@ implementation
       end;
 
 
+    procedure UninitializedVariableMessage(pos : tfileposinfo;warning,local,managed : boolean;name : TMsgStr);
+      const
+        msg : array[false..true,false..true,false..true] of dword = (
+            (
+              (sym_h_uninitialized_variable,sym_h_uninitialized_managed_variable),
+              (sym_h_uninitialized_local_variable,sym_h_uninitialized_managed_local_variable)
+            ),
+            (
+              (sym_w_uninitialized_variable,sym_w_uninitialized_managed_variable),
+              (sym_w_uninitialized_local_variable,sym_w_uninitialized_managed_local_variable)
+            )
+          );
+      begin
+        CGMessagePos1(pos,msg[warning,local,managed],name);
+      end;
+
+
     procedure set_varstate(p:tnode;newstate:tvarstate;varstateflags:tvarstateflags);
       const
         vstrans: array[tvarstate,tvarstate] of tvarstate = (
@@ -1197,32 +1216,29 @@ implementation
                                  if (vo_is_funcret in hsym.varoptions) then
                                    begin
                                      if (vsf_use_hints in varstateflags) then
-                                       CGMessagePos(p.fileinfo,sym_h_function_result_uninitialized)
-                                     else
-                                       CGMessagePos(p.fileinfo,sym_w_function_result_uninitialized)
-                                   end
-                                 else
-                                   begin
-                                     if tloadnode(p).symtable.symtabletype=localsymtable then
                                        begin
-                                         { on the JVM, an uninitialized var-parameter
-                                           is just as fatal as a nil pointer dereference }
-                                         if (vsf_use_hints in varstateflags) and
-                                            not(target_info.system in systems_jvm) then
-                                           CGMessagePos1(p.fileinfo,sym_h_uninitialized_local_variable,hsym.realname)
+                                         if is_managed_type(hsym.vardef) then
+                                           CGMessagePos(p.fileinfo,sym_h_managed_function_result_uninitialized)
                                          else
-                                           CGMessagePos1(p.fileinfo,sym_w_uninitialized_local_variable,hsym.realname);
+                                           CGMessagePos(p.fileinfo,sym_h_function_result_uninitialized);
                                        end
                                      else
                                        begin
-                                         { on the JVM, an uninitialized var-parameter
-                                           is just as fatal as a nil pointer dereference }
-                                         if (vsf_use_hints in varstateflags) and
-                                            not(target_info.system in systems_jvm) then
-                                           CGMessagePos1(p.fileinfo,sym_h_uninitialized_variable,hsym.realname)
+                                         if is_managed_type(hsym.vardef) then
+                                           CGMessagePos(p.fileinfo,sym_w_managed_function_result_uninitialized)
                                          else
-                                           CGMessagePos1(p.fileinfo,sym_w_uninitialized_variable,hsym.realname);
+                                          CGMessagePos(p.fileinfo,sym_w_function_result_uninitialized);
                                        end;
+                                   end
+                                 else
+                                   begin
+                                     UninitializedVariableMessage(p.fileinfo,
+                                       { on the JVM, an uninitialized var-parameter
+                                         is just as fatal as a nil pointer dereference }
+                                       not((vsf_use_hints in varstateflags) and not(target_info.system in systems_jvm)),
+                                       tloadnode(p).symtable.symtabletype=localsymtable,
+                                       is_managed_type(tloadnode(p).resultdef),
+                                       hsym.realname);
                                    end;
                                end;
                            end
@@ -2227,7 +2243,7 @@ implementation
                  break;
              end;
            if is_objectpascal_helper(structdef) and
-              (tobjectdef(structdef).typ in [recorddef,objectdef]) then
+              (tobjectdef(structdef).extendeddef.typ in [recorddef,objectdef]) then
              begin
                { search methods in the extended type as well }
                srsym:=tprocsym(tabstractrecorddef(tobjectdef(structdef).extendeddef).symtable.FindWithHash(hashedid));
@@ -2583,7 +2599,8 @@ implementation
         def_to   : tdef;
         currpt,
         pt       : tcallparanode;
-        eq       : tequaltype;
+        eq,
+        mineq    : tequaltype;
         convtype : tconverttype;
         pdtemp,
         pdoper   : tprocdef;
@@ -2763,6 +2780,30 @@ implementation
                    eq:=compare_defs_ext(n.resultdef,def_to,n.nodetype,convtype,pdoper,cdoptions);
                    n.free;
                  end
+              else if is_open_array(def_to) and
+                      is_class_or_interface_or_dispinterface_or_objc_or_java(tarraydef(def_to).elementdef) and
+                      is_array_constructor(currpt.left.resultdef) and
+                      assigned(tarrayconstructornode(currpt.left).left) then
+                begin
+                  { ensure that [nil] can be converted to "array of tobject",
+                    because if we just try to convert "array of pointer" to
+                    "array of tobject", we get type conversion errors in
+                    non-Delphi modes }
+                  n:=currpt.left;
+                  mineq:=te_exact;
+                  repeat
+                    if tarrayconstructornode(n).left.nodetype=arrayconstructorrangen then
+                      eq:=te_incompatible
+                    else
+                      eq:=compare_defs_ext(tarrayconstructornode(n).left.resultdef,tarraydef(def_to).elementdef,tarrayconstructornode(n).left.nodetype,convtype,pdoper,cdoptions);
+                    if eq<mineq then
+                      mineq:=eq;
+                    if eq=te_incompatible then
+                      break;
+                    n:=tarrayconstructornode(n).right;
+                  until not assigned(n);
+                  eq:=mineq;
+                end
               else
               { generic type comparision }
                begin

+ 8 - 1
compiler/i386/cgcpu.pas

@@ -314,6 +314,13 @@ unit cgcpu;
         end;
 
       begin
+        { Release PIC register }
+        if (cs_create_pic in current_settings.moduleswitches) and
+           (tf_pic_uses_got in target_info.flags) and
+           (pi_needs_got in current_procinfo.flags) and
+           not(target_info.system in systems_darwin) then
+          list.concat(tai_regalloc.dealloc(NR_PIC_OFFSET_REG,nil));
+
         { MMX needs to call EMMS }
         if assigned(rg[R_MMXREGISTER]) and
            (rg[R_MMXREGISTER].uses_registers) then
@@ -336,7 +343,7 @@ unit cgcpu;
               begin
                 if (not paramanager.use_fixed_stack) then
                   internal_restore_regs(list,not (pi_has_stack_allocs in current_procinfo.flags));
-                list.concat(Taicpu.op_none(A_LEAVE,S_NO));
+                generate_leave(list);
               end;
             list.concat(tai_regalloc.dealloc(current_procinfo.framepointer,nil));
           end;

+ 1 - 1
compiler/i386/cpuelf.pas

@@ -509,7 +509,7 @@ implementation
                               system_i386_openbsd,system_i386_netbsd,
                               system_i386_Netware,system_i386_netwlibc,
                               system_i386_solaris,system_i386_embedded,
-                              system_i386_android];
+                              system_i386_android,system_i386_aros];
          flags : [af_outputbinary,af_smartlink_sections,af_supports_dwarf];
          labelprefix : '.L';
          comment : '';

+ 29 - 8
compiler/i386/cpuinfo.pas

@@ -30,6 +30,9 @@ Interface
 
 Type
    bestreal = extended;
+{$if FPC_FULLVERSION>20700}
+   bestrealrec = TExtended80Rec;
+{$endif FPC_FULLVERSION>20700}
    ts32real = single;
    ts64real = double;
    ts80real = extended;
@@ -66,8 +69,25 @@ Type
       fpu_avx2
      );
 
+   tcontrollertype =
+     (ct_none
+     );
+
 
 Const
+   { Is there support for dealing with multiple microcontrollers available }
+   { for this platform? }
+   ControllerSupport = false;
+
+   { We know that there are fields after sramsize
+     but we don't care about this warning }
+   {$PUSH}
+    {$WARN 3177 OFF}
+   embedded_controllers : array [tcontrollertype] of tcontrollerdatatype =
+   (
+      (controllertypestr:''; controllerunitstr:''; flashbase:0; flashsize:0; srambase:0; sramsize:0));
+   {$POP}
+
    { calling conventions supported by the code generator }
    supported_calling_conventions : tproccalloptions = [
      pocall_internproc,
@@ -131,7 +151,8 @@ Const
 
 type
    tcpuflags =
-      (CPUX86_HAS_SSEUNIT,
+      (CPUX86_HAS_CMOV,
+       CPUX86_HAS_SSEUNIT,
        CPUX86_HAS_BMI1,
        CPUX86_HAS_BMI2,
        CPUX86_HAS_POPCNT,
@@ -147,13 +168,13 @@ type
      { cpu_none      } [],
      { cpu_386       } [],
      { cpu_Pentium   } [],
-     { cpu_Pentium2  } [],
-     { cpu_Pentium3  } [CPUX86_HAS_SSEUNIT],
-     { cpu_Pentium4  } [CPUX86_HAS_SSEUNIT],
-     { cpu_PentiumM  } [CPUX86_HAS_SSEUNIT],
-     { cpu_core_i    } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT],
-     { cpu_core_avx  } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT],
-     { cpu_core_avx2 } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT,CPUX86_HAS_BMI1,CPUX86_HAS_BMI2,CPUX86_HAS_LZCNT,CPUX86_HAS_MOVBE,CPUX86_HAS_FMA]
+     { cpu_Pentium2  } [CPUX86_HAS_CMOV],
+     { cpu_Pentium3  } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT],
+     { cpu_Pentium4  } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT],
+     { cpu_PentiumM  } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT],
+     { cpu_core_i    } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT],
+     { cpu_core_avx  } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT],
+     { cpu_core_avx2 } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT,CPUX86_HAS_BMI1,CPUX86_HAS_BMI2,CPUX86_HAS_LZCNT,CPUX86_HAS_MOVBE,CPUX86_HAS_FMA]
    );
 
 

+ 21 - 1
compiler/i386/cpupara.pas

@@ -114,6 +114,23 @@ unit cpupara;
                   end;
               end;
             end;
+          system_i386_os2,
+          system_i386_emx:
+            begin
+              case def.typ of
+                recorddef :
+                  begin
+                    { EMX port of GCC returns small records in the FUNCTION_RETURN_REG up to 4 bytes in registers. }
+                    if ((pd.proccalloption in [pocall_cdecl,pocall_cppdecl]) and
+                        (def.size>0) and
+                        (def.size<=4)) then
+                     begin
+                       result:=false;
+                       exit;
+                     end;
+                  end;
+              end;
+            end;
           system_i386_freebsd,
           system_i386_openbsd,
           system_i386_darwin,
@@ -243,6 +260,7 @@ unit cpupara;
           pocall_safecall,
           pocall_stdcall,
           pocall_cdecl,
+          pocall_syscall,
           pocall_cppdecl,
           pocall_mwpascal :
             result:=[RS_EAX,RS_EDX,RS_ECX];
@@ -284,7 +302,9 @@ unit cpupara;
           usedef:=forcetempdef;
         { on darwin/i386, if a record has only one field and that field is a
           single or double, it has to be returned like a single/double }
-        if (target_info.system in [system_i386_darwin,system_i386_iphonesim]) and
+        if (target_info.system in [system_i386_darwin,system_i386_iphonesim,
+                                   system_i386_freebsd,system_i386_openbsd,
+                                   system_i386_os2,system_i386_emx]) and
            ((usedef.typ=recorddef) or
             is_object(usedef)) and
            tabstractrecordsymtable(tabstractrecorddef(usedef).symtable).has_single_field(sym) and

+ 3 - 0
compiler/i386/cputarg.pas

@@ -86,6 +86,9 @@ implementation
     {$ifndef NOTARGETEMBEDDED}
       ,t_embed
     {$endif}
+    {$ifndef NOTARGETAROS}
+      ,t_aros
+    {$endif}
 
 {**************************************
              Assemblers

+ 2 - 0
compiler/i386/n386add.pas

@@ -442,6 +442,8 @@ interface
 
     begin
       pass_left_right;
+      reg:=NR_NO;
+      reference_reset(ref,sizeof(pint));
 
       { Mul supports registers and references, so if not register/reference,
         load the location into a register.

+ 36 - 2
compiler/i386/n386cal.pas

@@ -28,13 +28,16 @@ interface
 { $define AnsiStrRef}
 
     uses
-      nx86cal;
+      nx86cal,ncal;
 
     type
        ti386callnode = class(tx86callnode)
        protected
+          procedure gen_syscall_para(para: tcallparanode); override;
           procedure pop_parasize(pop_size:longint);override;
           procedure extra_interrupt_code;override;
+       public
+         procedure do_syscall;override;
        end;
 
 
@@ -46,7 +49,8 @@ implementation
       cgbase,cgutils,
       cpubase,paramgr,
       aasmtai,aasmdata,aasmcpu,
-      ncal,nbas,nmem,nld,ncnv,
+      nbas,nmem,nld,ncnv,
+      symdef,symsym,symcpu,
       cga,cgobj,cpuinfo;
 
 
@@ -55,6 +59,36 @@ implementation
 *****************************************************************************}
 
 
+    procedure ti386callnode.do_syscall;
+      var
+        tmpref: treference;
+      begin
+        case target_info.system of
+          system_i386_aros:
+            begin
+              // one syscall convention for AROS
+              current_asmdata.CurrAsmList.concat(tai_comment.create(strpnew('AROS SysCall')));
+              reference_reset(tmpref,sizeof(pint));
+              tmpref.symbol:=current_asmdata.RefAsmSymbol(tstaticvarsym(tcpuprocdef(procdefinition).libsym).mangledname);
+              cg.getcpuregister(current_asmdata.CurrAsmList,NR_EAX);
+              cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,tmpref,NR_EAX);
+              reference_reset_base(tmpref,NR_EAX,-tprocdef(procdefinition).extnumber,sizeof(pint));
+              cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,tmpref,NR_EAX);
+              cg.a_call_reg(current_asmdata.CurrAsmList,NR_EAX);
+              cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
+            end;
+          else
+            internalerror(2014081801);
+        end;
+      end;
+
+
+    procedure ti386callnode.gen_syscall_para(para: tcallparanode);
+      begin
+        { lib parameter has no special type but proccalloptions must be a syscall }
+        para.left:=cloadnode.create(tcpuprocdef(procdefinition).libsym,tcpuprocdef(procdefinition).libsym.owner);
+      end;
+
     procedure ti386callnode.extra_interrupt_code;
       begin
         if not(target_info.system in [system_i386_darwin,system_i386_iphonesim,system_i386_android]) then

この差分においてかなりの量のファイルが変更されているため、一部のファイルを表示していません