Răsfoiți Sursa

* Sync with trunk r23404.
* Regenerated makefiles.

git-svn-id: branches/targetandroid@23405 -

yury 12 ani în urmă
părinte
comite
d26f0552a0
100 a modificat fișierele cu 11601 adăugiri și 1063 ștergeri
  1. 337 73
      .gitattributes
  2. 44 10
      Makefile
  3. 23 4
      Makefile.fpc
  4. 59 33
      compiler/Makefile
  5. 41 27
      compiler/Makefile.fpc
  6. 133 0
      compiler/aarch64/a64att.inc
  7. 133 0
      compiler/aarch64/a64atts.inc
  8. 262 0
      compiler/aarch64/a64ins.dat
  9. 2 0
      compiler/aarch64/a64nop.inc
  10. 133 0
      compiler/aarch64/a64op.inc
  11. 238 0
      compiler/aarch64/a64reg.dat
  12. 4 0
      compiler/aarch64/a64tab.inc
  13. 2178 0
      compiler/aarch64/aasmcpu.pas
  14. 66 0
      compiler/aarch64/aoptcpu.pas
  15. 134 0
      compiler/aarch64/aoptcpub.pas
  16. 41 0
      compiler/aarch64/aoptcpud.pas
  17. 458 0
      compiler/aarch64/cpubase.pas
  18. 102 0
      compiler/aarch64/cpuinfo.pas
  19. 716 0
      compiler/aarch64/cpupara.pas
  20. 93 0
      compiler/aarch64/itcpugas.pas
  21. 227 0
      compiler/aarch64/ra64con.inc
  22. 227 0
      compiler/aarch64/ra64dwa.inc
  23. 2 0
      compiler/aarch64/ra64nor.inc
  24. 227 0
      compiler/aarch64/ra64num.inc
  25. 227 0
      compiler/aarch64/ra64rni.inc
  26. 227 0
      compiler/aarch64/ra64sri.inc
  27. 227 0
      compiler/aarch64/ra64sta.inc
  28. 227 0
      compiler/aarch64/ra64std.inc
  29. 227 0
      compiler/aarch64/ra64sup.inc
  30. 5 2
      compiler/aasmbase.pas
  31. 7 5
      compiler/aasmdata.pas
  32. 212 26
      compiler/aasmtai.pas
  33. 142 16
      compiler/aggas.pas
  34. 5 4
      compiler/agjasmin.pas
  35. 1 0
      compiler/alpha/cgcpu.pas
  36. 81 8
      compiler/aopt.pas
  37. 29 0
      compiler/aoptbase.pas
  38. 54 12
      compiler/aoptobj.pas
  39. 144 12
      compiler/arm/aasmcpu.pas
  40. 47 10
      compiler/arm/agarmgas.pas
  41. 714 146
      compiler/arm/aoptcpu.pas
  42. 1 1
      compiler/arm/aoptcpub.pas
  43. 37 5
      compiler/arm/armatt.inc
  44. 32 0
      compiler/arm/armatts.inc
  45. 45 10
      compiler/arm/armins.dat
  46. 1 1
      compiler/arm/armnop.inc
  47. 37 5
      compiler/arm/armop.inc
  48. 38 1
      compiler/arm/armreg.dat
  49. 0 7
      compiler/arm/armtab.inc
  50. 455 139
      compiler/arm/cgcpu.pas
  51. 95 7
      compiler/arm/cpubase.pas
  52. 882 0
      compiler/arm/cpuelf.pas
  53. 205 198
      compiler/arm/cpuinfo.pas
  54. 19 10
      compiler/arm/cpupara.pas
  55. 17 3
      compiler/arm/cpupi.pas
  56. 2 0
      compiler/arm/cputarg.pas
  57. 2 2
      compiler/arm/itcpugas.pas
  58. 174 26
      compiler/arm/narmadd.pas
  59. 3 5
      compiler/arm/narmcal.pas
  60. 71 9
      compiler/arm/narmcnv.pas
  61. 2 3
      compiler/arm/narmcon.pas
  62. 36 14
      compiler/arm/narminl.pas
  63. 72 4
      compiler/arm/narmmat.pas
  64. 2 8
      compiler/arm/narmmem.pas
  65. 70 8
      compiler/arm/narmset.pas
  66. 116 12
      compiler/arm/raarmgas.pas
  67. 34 1
      compiler/arm/rarmcon.inc
  68. 33 0
      compiler/arm/rarmdwa.inc
  69. 1 1
      compiler/arm/rarmnor.inc
  70. 34 1
      compiler/arm/rarmnum.inc
  71. 34 1
      compiler/arm/rarmrni.inc
  72. 34 1
      compiler/arm/rarmsri.inc
  73. 33 0
      compiler/arm/rarmsta.inc
  74. 35 2
      compiler/arm/rarmstd.inc
  75. 34 1
      compiler/arm/rarmsup.inc
  76. 101 1
      compiler/arm/rgcpu.pas
  77. 6 2
      compiler/asmutils.pas
  78. 33 11
      compiler/assemble.pas
  79. 1 1
      compiler/avr/agavrgas.pas
  80. 3 0
      compiler/avr/avrreg.dat
  81. 166 99
      compiler/avr/cgcpu.pas
  82. 16 1
      compiler/avr/cpubase.pas
  83. 16 31
      compiler/avr/cpuinfo.pas
  84. 17 6
      compiler/avr/cpupara.pas
  85. 7 4
      compiler/avr/navradd.pas
  86. 4 0
      compiler/avr/navrmat.pas
  87. 1 0
      compiler/avr/ravrcon.inc
  88. 2 1
      compiler/avr/ravrdwa.inc
  89. 1 1
      compiler/avr/ravrnor.inc
  90. 2 1
      compiler/avr/ravrnum.inc
  91. 2 1
      compiler/avr/ravrrni.inc
  92. 2 1
      compiler/avr/ravrsri.inc
  93. 2 1
      compiler/avr/ravrsta.inc
  94. 2 1
      compiler/avr/ravrstd.inc
  95. 1 0
      compiler/avr/ravrsup.inc
  96. 1 1
      compiler/browcol.pas
  97. 0 2
      compiler/cclasses.pas
  98. 2 0
      compiler/cfileutl.pas
  99. 42 17
      compiler/cg64f32.pas
  100. 29 6
      compiler/cgbase.pas

Fișier diff suprimat deoarece este prea mare
+ 337 - 73
.gitattributes


+ 44 - 10
Makefile

@@ -1,5 +1,5 @@
 #
-# Don't edit, this file is generated by FPCMake Version 2.0.0 [2012/07/08]
+# Don't edit, this file is generated by FPCMake Version 2.0.0 [2013/01/16]
 #
 default: help
 MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim i386-android m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii powerpc-aix sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-netbsd x86_64-solaris x86_64-openbsd x86_64-darwin x86_64-win64 x86_64-embedded arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian arm-android powerpc64-linux powerpc64-darwin powerpc64-embedded powerpc64-aix avr-embedded armeb-linux armeb-embedded mips-linux mipsel-linux jvm-java jvm-android
@@ -209,6 +209,14 @@ endif
 ifeq ($(OS_TARGET),linux)
 linuxHier=1
 endif
+ifndef CROSSCOMPILE
+BUILDFULLNATIVE=1
+export BUILDFULLNATIVE
+endif
+ifdef BUILDFULLNATIVE
+BUILDNATIVE=1
+export BUILDNATIVE
+endif
 export OS_TARGET OS_SOURCE ARCH CPU_TARGET CPU_SOURCE FULL_TARGET FULL_SOURCE TARGETSUFFIX SOURCESUFFIX CROSSCOMPILE
 ifdef FPCDIR
 override FPCDIR:=$(subst \,/,$(FPCDIR))
@@ -258,11 +266,13 @@ ifndef BINUTILSPREFIX
 ifndef CROSSBINDIR
 ifdef CROSSCOMPILE
 ifndef DARWIN2DARWIN
+ifneq ($(CPU_TARGET),jvm)
 BINUTILSPREFIX=$(CPU_TARGET)-$(OS_TARGET)-
 endif
 endif
 endif
 endif
+endif
 UNITSDIR:=$(wildcard $(FPCDIR)/units/$(TARGETSUFFIX))
 ifeq ($(UNITSDIR),)
 UNITSDIR:=$(wildcard $(FPCDIR)/units/$(OS_TARGET))
@@ -294,6 +304,7 @@ endif
 endif
 override PACKAGE_NAME=fpc
 override PACKAGE_VERSION=2.7.1
+REQUIREDVERSION=2.6.0
 ifndef inOS2
 override FPCDIR:=$(BASEDIR)
 export FPCDIR
@@ -396,11 +407,15 @@ ifndef DIST_DESTDIR
 export DIST_DESTDIR:=$(BASEDIR)
 endif
 BASEPACKDIR=$(BASEDIR)/basepack
+ifndef FPCMAKE
 ifeq ($(FULL_SOURCE),$(FULL_TARGET))
 FPCMAKENEW=$(BASEDIR)/utils/fpcm/fpcmake$(EXEEXT)
 else
 FPCMAKENEW=fpcmake
 endif
+else
+FPCMAKENEW=$(FPCMAKE)
+endif
 CLEANOPTS=FPC=$(PPNEW)
 BUILDOPTS=FPC=$(PPNEW) RELEASE=1
 INSTALLOPTS=FPC=$(PPNEW) ZIPDESTDIR=$(BASEDIR) FPCMAKE=$(FPCMAKENEW)
@@ -416,9 +431,11 @@ BuildOnlyBaseCPUs=jvm
 ifneq ($(wildcard utils),)
 NOUTILSTARGETS=embedded gba $(BuildOnlyBaseCPUs)
 ifeq ($(findstring $(OS_TARGET),$(NOUTILSTARGETS)),)
+ifdef BUILDFULLNATIVE
 UTILS=1
 endif
 endif
+endif
 ifeq ($(FULL_TARGET),i386-linux)
 override TARGET_DIRS+=compiler rtl utils packages ide installer
 endif
@@ -999,6 +1016,18 @@ BATCHEXT=.sh
 EXEEXT=
 SHORTSUFFIX=aix
 endif
+ifeq ($(OS_TARGET),java)
+OEXT=.class
+ASMEXT=.j
+SHAREDLIBEXT=.jar
+SHORTSUFFIX=java
+endif
+ifeq ($(OS_TARGET),android)
+OEXT=.class
+ASMEXT=.j
+SHAREDLIBEXT=.jar
+SHORTSUFFIX=android
+endif
 ifneq ($(findstring $(OS_SOURCE),$(LIMIT83fs)),)
 FPCMADE=fpcmade.$(SHORTSUFFIX)
 ZIPSUFFIX=$(SHORTSUFFIX)
@@ -1352,17 +1381,12 @@ endif
 endif
 ifdef CREATESHARED
 override FPCOPT+=-Cg
-ifeq ($(CPU_TARGET),i386)
-override FPCOPT+=-Aas
 endif
-endif
-ifeq ($(findstring 2.0.,$(FPC_VERSION)),)
 ifneq ($(findstring $(OS_TARGET),freebsd openbsd netbsd linux solaris),)
 ifeq ($(CPU_TARGET),x86_64)
 override FPCOPT+=-Cg
 endif
 endif
-endif
 ifdef LINKSHARED
 endif
 ifdef OPT
@@ -2624,12 +2648,22 @@ compiler_cycle:
 	$(MAKE) -C compiler cycle
 BUILDSTAMP=build-stamp.$(FULL_TARGET)
 .PHONY: all clean distclean build buildbase install installbase installother zipinstallbase zipinstallotherzipinstall singlezipinstall
+versioncheckstartingcompiler: 
+ifndef CROSSCOMPILE
+ifndef OVERRIDEVERSIONCHECK
+ifneq ($(FPC_VERSION),$(REQUIREDVERSION))
+	$(error The only supported starting compiler version is $(REQUIREDVERSION). You are trying to build with $(FPC_VERSION). If you are absolutely sure that the current compiler is built from the exact same version/revision, you can try to use OVERRIDEVERSIONCHECK=1 to override )
+endif
+endif
+endif
 ifeq ($(findstring $(CPU_TARGET),$(BuildOnlyBaseCPUs)),)
-all: build
+all: versioncheckstartingcompiler build
 install: installall
+zipinstall: zipinstallall
 else
-all: buildbase
+all: versioncheckstartingcompiler buildbase
 install: installbase
+zipinstall: zipinstallbase
 endif
 clean: $(addsuffix _distclean,$(TARGET_DIRS))
 	-$(DEL) build-stamp.*
@@ -2692,8 +2726,8 @@ installall: $(BUILDSTAMP)
 ifeq ($(findstring $(CPU_TARGET), $(BuildOnlyBaseCPUs)),)
 	$(MAKE) installother $(INSTALLOPTS)
 endif
-singlezipinstall: zipinstall
-zipinstall: $(BUILDSTAMP)
+singlezipinstall: zipinstallall
+zipinstallall: $(BUILDSTAMP)
 	$(MAKE) fpc_zipinstall ZIPTARGET=install FULLZIPNAME=fpc-$(PACKAGE_VERSION).$(TARGETSUFFIX) $(INSTALLOPTS)
 .PHONY: crossall crossinstall crosszipinstall crosssinglezipinstall
 crossall:

+ 23 - 4
Makefile.fpc

@@ -20,6 +20,9 @@ fpcdir=.
 rule=help
 
 [prerules]
+REQUIREDVERSION=2.6.0
+
+
 # make versions < 3.77 (OS2 version) are buggy
 ifndef inOS2
 override FPCDIR:=$(BASEDIR)
@@ -149,11 +152,15 @@ endif
 BASEPACKDIR=$(BASEDIR)/basepack
 
 # Newly created fpcmake
+ifndef FPCMAKE
 ifeq ($(FULL_SOURCE),$(FULL_TARGET))
 FPCMAKENEW=$(BASEDIR)/utils/fpcm/fpcmake$(EXEEXT)
 else
 FPCMAKENEW=fpcmake
 endif
+else
+FPCMAKENEW=$(FPCMAKE)
+endif
 
 # Build/install options
 CLEANOPTS=FPC=$(PPNEW)
@@ -177,9 +184,11 @@ BuildOnlyBaseCPUs=jvm
 ifneq ($(wildcard utils),)
 NOUTILSTARGETS=embedded gba $(BuildOnlyBaseCPUs)
 ifeq ($(findstring $(OS_TARGET),$(NOUTILSTARGETS)),)
+ifdef BUILDFULLNATIVE
 UTILS=1
 endif
 endif
+endif
 
 [rules]
 .NOTPARALLEL:
@@ -230,13 +239,23 @@ BUILDSTAMP=build-stamp.$(FULL_TARGET)
 
 .PHONY: all clean distclean build buildbase install installbase installother zipinstallbase zipinstallotherzipinstall singlezipinstall
 
+versioncheckstartingcompiler: 
+ifndef CROSSCOMPILE
+ifndef OVERRIDEVERSIONCHECK
+ifneq ($(FPC_VERSION),$(REQUIREDVERSION))
+	$(error The only supported starting compiler version is $(REQUIREDVERSION). You are trying to build with $(FPC_VERSION). If you are absolutely sure that the current compiler is built from the exact same version/revision, you can try to use OVERRIDEVERSIONCHECK=1 to override )
+endif
+endif
+endif
 
 ifeq ($(findstring $(CPU_TARGET),$(BuildOnlyBaseCPUs)),)
-all: build
+all: versioncheckstartingcompiler build
 install: installall
+zipinstall: zipinstallall
 else
-all: buildbase
+all: versioncheckstartingcompiler buildbase
 install: installbase
+zipinstall: zipinstallbase
 endif
 
 clean: $(addsuffix _distclean,$(TARGET_DIRS))
@@ -318,8 +337,8 @@ ifeq ($(findstring $(CPU_TARGET), $(BuildOnlyBaseCPUs)),)
         $(MAKE) installother $(INSTALLOPTS)
 endif
 
-singlezipinstall: zipinstall
-zipinstall: $(BUILDSTAMP)
+singlezipinstall: zipinstallall
+zipinstallall: $(BUILDSTAMP)
         $(MAKE) fpc_zipinstall ZIPTARGET=install FULLZIPNAME=fpc-$(PACKAGE_VERSION).$(TARGETSUFFIX) $(INSTALLOPTS)
 
 

+ 59 - 33
compiler/Makefile

@@ -1,5 +1,5 @@
 #
-# Don't edit, this file is generated by FPCMake Version 2.0.0 [2012/07/08]
+# Don't edit, this file is generated by FPCMake Version 2.0.0 [2013/01/16]
 #
 default: all
 MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim i386-android m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii powerpc-aix sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-netbsd x86_64-solaris x86_64-openbsd x86_64-darwin x86_64-win64 x86_64-embedded arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian arm-android powerpc64-linux powerpc64-darwin powerpc64-embedded powerpc64-aix avr-embedded armeb-linux armeb-embedded mips-linux mipsel-linux jvm-java jvm-android
@@ -209,6 +209,14 @@ endif
 ifeq ($(OS_TARGET),linux)
 linuxHier=1
 endif
+ifndef CROSSCOMPILE
+BUILDFULLNATIVE=1
+export BUILDFULLNATIVE
+endif
+ifdef BUILDFULLNATIVE
+BUILDNATIVE=1
+export BUILDNATIVE
+endif
 export OS_TARGET OS_SOURCE ARCH CPU_TARGET CPU_SOURCE FULL_TARGET FULL_SOURCE TARGETSUFFIX SOURCESUFFIX CROSSCOMPILE
 ifdef FPCDIR
 override FPCDIR:=$(subst \,/,$(FPCDIR))
@@ -258,11 +266,13 @@ ifndef BINUTILSPREFIX
 ifndef CROSSBINDIR
 ifdef CROSSCOMPILE
 ifndef DARWIN2DARWIN
+ifneq ($(CPU_TARGET),jvm)
 BINUTILSPREFIX=$(CPU_TARGET)-$(OS_TARGET)-
 endif
 endif
 endif
 endif
+endif
 UNITSDIR:=$(wildcard $(FPCDIR)/units/$(TARGETSUFFIX))
 ifeq ($(UNITSDIR),)
 UNITSDIR:=$(wildcard $(FPCDIR)/units/$(OS_TARGET))
@@ -295,8 +305,8 @@ endif
 override PACKAGE_NAME=compiler
 override PACKAGE_VERSION=2.7.1
 unexport FPC_VERSION FPC_COMPILERINFO
-CYCLETARGETS=i386 powerpc sparc arm x86_64 powerpc64 m68k armeb mipsel mips avr
-ALLTARGETS=$(CYCLETARGETS) jvm
+CYCLETARGETS=i386 powerpc sparc arm x86_64 powerpc64 m68k armeb mipsel mips avr jvm
+ALLTARGETS=$(CYCLETARGETS)
 ifdef ALPHA
 PPC_TARGET=alpha
 endif
@@ -362,28 +372,28 @@ RTLOPT:=$(OPT)
 endif
 ifdef CYCLELEVEL
 ifeq ($(CYCLELEVEL),1)
-LOCALOOPT+=$(OPTLEVEL1)
-RTLOPT+=$(OPTLEVEL1)
-LOCALOPT+=$(LOCALOPTLEVEL1)
-RTLOPT+=$(RTLOPTLEVEL1)
+override LOCALOPT+=$(OPTLEVEL1)
+override RTLOPT+=$(OPTLEVEL1)
+override LOCALOPT+=$(LOCALOPTLEVEL1)
+override RTLOPT+=$(RTLOPTLEVEL1)
 endif
 ifeq ($(CYCLELEVEL),2)
-LOCALOOPT+=$(OPTLEVEL2)
-RTLOPT+=$(OPTLEVEL2)
-LOCALOPT+=$(LOCALOPTLEVEL2)
-RTLOPT+=$(RTLOPTLEVEL2)
+override LOCALOPT+=$(OPTLEVEL2)
+override RTLOPT+=$(OPTLEVEL2)
+override LOCALOPT+=$(LOCALOPTLEVEL2)
+override RTLOPT+=$(RTLOPTLEVEL2)
 endif
 ifeq ($(CYCLELEVEL),3)
-LOCALOOPT+=$(OPTLEVEL3)
-RTLOPT+=$(OPTLEVEL3)
-LOCALOPT+=$(LOCALOPTLEVEL3)
-RTLOPT+=$(RTLOPTLEVEL3)
+override LOCALOPT+=$(OPTLEVEL3)
+override RTLOPT+=$(OPTLEVEL3)
+override LOCALOPT+=$(LOCALOPTLEVEL3)
+override RTLOPT+=$(RTLOPTLEVEL3)
 endif
 ifeq ($(CYCLELEVEL),4)
-LOCALOOPT+=$(OPTLEVEL4)
-RTLOPT+=$(OPTLEVEL4)
-LOCALOPT+=$(LOCALOPTLEVEL4)
-RTLOPT+=$(RTLOPTLEVEL4)
+override LOCALOPT+=$(OPTLEVEL4)
+override RTLOPT+=$(OPTLEVEL4)
+override LOCALOPT+=$(LOCALOPTLEVEL4)
+override RTLOPT+=$(RTLOPTLEVEL4)
 endif
 endif
 override OPT=
@@ -426,7 +436,7 @@ CPUSUF=jvm
 endif
 NOCPUDEF=1
 MSGFILE=msg/error$(FPCLANG).msg
-SVNVERSION:=$(wildcard $(addsuffix /svnversion$(SRCEXEEXT),$(SEARCHPATH)))
+SVNVERSION:=$(firstword $(wildcard $(addsuffix /svnversion$(SRCEXEEXT),$(SEARCHPATH))))
 REVINC:=$(wildcard revision.inc)
 ifneq ($(REVINC),)
 override LOCALOPT+=-dREVINC
@@ -2154,6 +2164,18 @@ BATCHEXT=.sh
 EXEEXT=
 SHORTSUFFIX=aix
 endif
+ifeq ($(OS_TARGET),java)
+OEXT=.class
+ASMEXT=.j
+SHAREDLIBEXT=.jar
+SHORTSUFFIX=java
+endif
+ifeq ($(OS_TARGET),android)
+OEXT=.class
+ASMEXT=.j
+SHAREDLIBEXT=.jar
+SHORTSUFFIX=android
+endif
 ifneq ($(findstring $(OS_SOURCE),$(LIMIT83fs)),)
 FPCMADE=fpcmade.$(SHORTSUFFIX)
 ZIPSUFFIX=$(SHORTSUFFIX)
@@ -2762,17 +2784,12 @@ endif
 endif
 ifdef CREATESHARED
 override FPCOPT+=-Cg
-ifeq ($(CPU_TARGET),i386)
-override FPCOPT+=-Aas
 endif
-endif
-ifeq ($(findstring 2.0.,$(FPC_VERSION)),)
 ifneq ($(findstring $(OS_TARGET),freebsd openbsd netbsd linux solaris),)
 ifeq ($(CPU_TARGET),x86_64)
 override FPCOPT+=-Cg
 endif
 endif
-endif
 ifdef LINKSHARED
 endif
 ifdef OPT
@@ -3649,7 +3666,10 @@ insdatx86 : $(COMPILER_UNITTARGETDIR) x86/x86ins.dat
 insdatarm : arm/armins.dat
 	    $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkarmins.pp
 	cd arm && ..$(PATHSEP)utils$(PATHSEP)mkarmins$(SRCEXEEXT)
-insdat: insdatx86 insdatarm
+insdataarch64 : aarch64/a64ins.dat
+	    $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mka64ins.pp
+	cd aarch64 && ..$(PATHSEP)utils$(PATHSEP)mka64ins$(SRCEXEEXT)
+insdat: insdatx86 insdatarm insdataarch64
 regdatarm : arm/armreg.dat
 	    $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkarmreg.pp
 	cd arm && ..$(PATHSEP)utils$(PATHSEP)mkarmreg$(SRCEXEEXT)
@@ -3659,6 +3679,12 @@ regdatia64 : ia64/ia64reg.dat
 regdatsp : sparc/spreg.dat
 	    $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkspreg.pp
 	cd sparc && ..$(PATHSEP)utils$(PATHSEP)mkspreg$(SRCEXEEXT)
+regdatavr : avr/avrreg.dat
+	    $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkavrreg.pp
+	cd avr && ..$(PATHSEP)utils$(PATHSEP)mkavrreg$(SRCEXEEXT)
+regdataarch64 : aarch64/a64reg.dat
+	    $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mka64reg.pp
+	cd aarch64 && ..$(PATHSEP)utils$(PATHSEP)mka64reg$(SRCEXEEXT)
 revision.inc :
 ifneq ($(REVSTR),)
 ifdef USEZIPWRAPPER
@@ -3760,12 +3786,12 @@ cycle:
 	$(MAKE) echotime
 else
 cycle:
-	$(MAKE) OS_TARGET=$(OS_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl
+	$(MAKE) OS_TARGET=$(OS_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl CYCLELEVEL=1
 	$(MAKE) OS_TARGET=$(OS_SOURCE) EXENAME=$(TEMPNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=1
-	$(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl
+	$(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl CYCLELEVEL=2
 	$(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=2
 ifndef CROSSINSTALL
-	$(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' rtlclean rtl
+	$(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' rtlclean rtl CYCLELEVEL=3
 ifneq ($(OS_TARGET),embedded)
 ifneq ($(OS_TARGET),gba)
 	$(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' cycleclean compiler CYCLELEVEL=3
@@ -3776,12 +3802,12 @@ endif
 else
 cycle: override FPC=
 cycle:
-	$(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl
+	$(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl CYCLELEVEL=1
 	$(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) EXENAME=$(TEMPNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=1
-	$(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl
+	$(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl CYCLELEVEL=‚
 	$(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) PPC_TARGET=$(CPU_TARGET) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=2
 ifndef CROSSINSTALL
-	$(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' 'OPT=$(RTLOPT) $(CROSSOPT)' rtlclean rtl
+	$(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' 'OPT=$(RTLOPT) $(CROSSOPT)' rtlclean rtl CYCLELEVEL=3
 ifndef NoNativeBinaries
 	$(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' 'OPT=$(LOCALOPT) $(CROSSOPT)' cycleclean compiler CYCLELEVEL=3
 endif
@@ -3790,7 +3816,7 @@ endif
 cycledep:
 	$(MAKE) cycle USEDEPEND=1
 extcycle:
-	$(MAKE) cycle OPT="$(OPT) -n -OG2p3 -glttt -CRriot -dEXTDEBUG"
+	$(MAKE) cycle OPT="$(OPT) -n -OG2p3 -glttt -CRriot -dEXTDEBUG" ALLOW_WARNINGS=1
 cvstest:
 	$(MAKE) cycle 'LOCALOPT=-n -Se' 'RTLOPT=-n -Se'
 full: fullcycle

+ 41 - 27
compiler/Makefile.fpc

@@ -32,10 +32,10 @@ fpcdir=..
 unexport FPC_VERSION FPC_COMPILERINFO
 
 # Which platforms are ready for inclusion in the cycle
-CYCLETARGETS=i386 powerpc sparc arm x86_64 powerpc64 m68k armeb mipsel mips avr
+CYCLETARGETS=i386 powerpc sparc arm x86_64 powerpc64 m68k armeb mipsel mips avr jvm
 
 # All supported targets used for clean
-ALLTARGETS=$(CYCLETARGETS) jvm
+ALLTARGETS=$(CYCLETARGETS)
 
 # Allow ALPHA, POWERPC, POWERPC64, M68K, I386, jvm defines for target cpu
 ifdef ALPHA
@@ -125,28 +125,28 @@ endif
 
 ifdef CYCLELEVEL
 ifeq ($(CYCLELEVEL),1)
-LOCALOOPT+=$(OPTLEVEL1)
-RTLOPT+=$(OPTLEVEL1)
-LOCALOPT+=$(LOCALOPTLEVEL1)
-RTLOPT+=$(RTLOPTLEVEL1)
+override LOCALOPT+=$(OPTLEVEL1)
+override RTLOPT+=$(OPTLEVEL1)
+override LOCALOPT+=$(LOCALOPTLEVEL1)
+override RTLOPT+=$(RTLOPTLEVEL1)
 endif
 ifeq ($(CYCLELEVEL),2)
-LOCALOOPT+=$(OPTLEVEL2)
-RTLOPT+=$(OPTLEVEL2)
-LOCALOPT+=$(LOCALOPTLEVEL2)
-RTLOPT+=$(RTLOPTLEVEL2)
+override LOCALOPT+=$(OPTLEVEL2)
+override RTLOPT+=$(OPTLEVEL2)
+override LOCALOPT+=$(LOCALOPTLEVEL2)
+override RTLOPT+=$(RTLOPTLEVEL2)
 endif
 ifeq ($(CYCLELEVEL),3)
-LOCALOOPT+=$(OPTLEVEL3)
-RTLOPT+=$(OPTLEVEL3)
-LOCALOPT+=$(LOCALOPTLEVEL3)
-RTLOPT+=$(RTLOPTLEVEL3)
+override LOCALOPT+=$(OPTLEVEL3)
+override RTLOPT+=$(OPTLEVEL3)
+override LOCALOPT+=$(LOCALOPTLEVEL3)
+override RTLOPT+=$(RTLOPTLEVEL3)
 endif
 ifeq ($(CYCLELEVEL),4)
-LOCALOOPT+=$(OPTLEVEL4)
-RTLOPT+=$(OPTLEVEL4)
-LOCALOPT+=$(LOCALOPTLEVEL4)
-RTLOPT+=$(RTLOPTLEVEL4)
+override LOCALOPT+=$(OPTLEVEL4)
+override RTLOPT+=$(OPTLEVEL4)
+override LOCALOPT+=$(LOCALOPTLEVEL4)
+override RTLOPT+=$(RTLOPTLEVEL4)
 endif
 endif
 
@@ -202,7 +202,7 @@ NOCPUDEF=1
 MSGFILE=msg/error$(FPCLANG).msg
 
 
-SVNVERSION:=$(wildcard $(addsuffix /svnversion$(SRCEXEEXT),$(SEARCHPATH)))
+SVNVERSION:=$(firstword $(wildcard $(addsuffix /svnversion$(SRCEXEEXT),$(SEARCHPATH))))
 # Check if revision.inc is present
 REVINC:=$(wildcard revision.inc)
 ifneq ($(REVINC),)
@@ -476,7 +476,11 @@ insdatarm : arm/armins.dat
 	    $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkarmins.pp
         cd arm && ..$(PATHSEP)utils$(PATHSEP)mkarmins$(SRCEXEEXT)
 
-insdat: insdatx86 insdatarm
+insdataarch64 : aarch64/a64ins.dat
+	    $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mka64ins.pp
+        cd aarch64 && ..$(PATHSEP)utils$(PATHSEP)mka64ins$(SRCEXEEXT)
+
+insdat: insdatx86 insdatarm insdataarch64
 
 regdatarm : arm/armreg.dat
 	    $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkarmreg.pp
@@ -490,6 +494,14 @@ regdatsp : sparc/spreg.dat
             $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkspreg.pp
         cd sparc && ..$(PATHSEP)utils$(PATHSEP)mkspreg$(SRCEXEEXT)
 
+regdatavr : avr/avrreg.dat
+            $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mkavrreg.pp
+        cd avr && ..$(PATHSEP)utils$(PATHSEP)mkavrreg$(SRCEXEEXT)
+
+regdataarch64 : aarch64/a64reg.dat
+	    $(COMPILER) -FE$(COMPILERUTILSDIR) $(COMPILERUTILSDIR)/mka64reg.pp
+        cd aarch64 && ..$(PATHSEP)utils$(PATHSEP)mka64reg$(SRCEXEEXT)
+
 # revision.inc rule
 revision.inc :
 ifneq ($(REVSTR),)
@@ -639,14 +651,14 @@ else
 
 cycle:
 # ppc (source native)
-        $(MAKE) OS_TARGET=$(OS_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl
+        $(MAKE) OS_TARGET=$(OS_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl CYCLELEVEL=1
         $(MAKE) OS_TARGET=$(OS_SOURCE) EXENAME=$(TEMPNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=1
 # ppcross<ARCH> (source native)
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl CYCLELEVEL=2
         $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=2
 # ppc<ARCH> (target native)
 ifndef CROSSINSTALL
-        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' rtlclean rtl
+        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' rtlclean rtl CYCLELEVEL=3
 # building a native compiler for embedded targets is not possible
 ifneq ($(OS_TARGET),embedded)
 # building a native compiler for the arm-gba target is not possible
@@ -672,14 +684,14 @@ cycle: override FPC=
 cycle:
 # ppc (source native)
 # Clear detected compiler binary, because it can be existing crosscompiler binary, but we need native compiler here
-        $(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl
+        $(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl CYCLELEVEL=1
         $(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) EXENAME=$(TEMPNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=1
 # ppcross<ARCH> (source native)
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl CYCLELEVEL=‚
         $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) PPC_TARGET=$(CPU_TARGET) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=2
 # ppc<ARCH> (target native)
 ifndef CROSSINSTALL
-        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' 'OPT=$(RTLOPT) $(CROSSOPT)' rtlclean rtl
+        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' 'OPT=$(RTLOPT) $(CROSSOPT)' rtlclean rtl CYCLELEVEL=3
 # building a native compiler for JVM and embedded targets is not possible
 ifndef NoNativeBinaries
         $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' 'OPT=$(LOCALOPT) $(CROSSOPT)' cycleclean compiler CYCLELEVEL=3
@@ -691,8 +703,10 @@ endif
 cycledep:
         $(MAKE) cycle USEDEPEND=1
 
+# extcycle should still work, but generates
+# lots of warnings, so ALLOW_WARNINGS=1 is required
 extcycle:
-        $(MAKE) cycle OPT="$(OPT) -n -OG2p3 -glttt -CRriot -dEXTDEBUG"
+        $(MAKE) cycle OPT="$(OPT) -n -OG2p3 -glttt -CRriot -dEXTDEBUG" ALLOW_WARNINGS=1
 
 cvstest:
         $(MAKE) cycle 'LOCALOPT=-n -Se' 'RTLOPT=-n -Se'

+ 133 - 0
compiler/aarch64/a64att.inc

@@ -0,0 +1,133 @@
+{ don't edit, this file is generated from armins.dat }
+(
+'b',
+'cb',
+'tb',
+'bl',
+'blr',
+'br',
+'ret',
+'ldr',
+'str',
+'ldp',
+'stp',
+'ldnp',
+'stnp',
+'ldtr',
+'sttr',
+'ldxr',
+'stxr',
+'ldar',
+'stlr',
+'ldaxr',
+'stlxr',
+'prfm',
+'add',
+'adc',
+'sub',
+'sbc',
+'cmp',
+'cmn',
+'mov',
+'and',
+'bic',
+'eor',
+'eon',
+'orr',
+'orn',
+'tst',
+'mvn',
+'movk',
+'adrp',
+'adr',
+'bfm',
+'sbfm',
+'ubfm',
+'extr',
+'sxt',
+'uxt',
+'asrv',
+'llslv',
+'lsrv',
+'rorv',
+'cls',
+'clz',
+'rbit',
+'rev',
+'rev16',
+'rev32',
+'csel',
+'csinc',
+'csinv',
+'csneg',
+'ccmn',
+'ccmp',
+'madd',
+'msub',
+'smaddl',
+'smsubl',
+'smulh',
+'umaddl',
+'umsubl',
+'umulh',
+'sdiv',
+'udiv',
+'neg',
+'asr',
+'lsl',
+'lsr',
+'ror',
+'cset',
+'csetm',
+'cinc',
+'cinv',
+'cneg',
+'ngc',
+'mneg',
+'mul',
+'smnegl',
+'smull',
+'umnegl',
+'umull',
+'fmov',
+'fcvt',
+'fcvtas',
+'fcvtau',
+'fcvtms',
+'fcvtmu',
+'fcvtns',
+'fcvtnu',
+'fcvtps',
+'fcvtpu',
+'fcvtzs',
+'fcvtzu',
+'scvtf',
+'ucvtf',
+'fprinta',
+'fprinti',
+'fprintm',
+'fprintn',
+'fprintp',
+'fprintx',
+'fprintz',
+'fabs',
+'fneg',
+'fsqrt',
+'fadd',
+'fdiv',
+'fmul',
+'fnmul',
+'fsub',
+'fmax',
+'fmin',
+'fminnm',
+'fmadd',
+'fmsub',
+'fnmadd',
+'fnmsub',
+'fcmp',
+'fcmpe',
+'fccmp',
+'fcmmpe',
+'fcsel'
+);

+ 133 - 0
compiler/aarch64/a64atts.inc

@@ -0,0 +1,133 @@
+{ don't edit, this file is generated from armins.dat }
+(
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE
+);

+ 262 - 0
compiler/aarch64/a64ins.dat

@@ -0,0 +1,262 @@
+[B]
+
+[CB]
+
+[TB]
+
+[BL]
+
+[BLR]
+
+[BR]
+
+[RET]
+
+[LDR]
+
+[STR]
+
+[LDP]
+
+[STP]
+
+[LDNP]
+
+[STNP]
+
+[LDTR]
+
+[STTR]
+
+[LDXR]
+
+[STXR]
+
+[LDAR]
+
+[STLR]
+
+[LDAXR]
+
+[STLXR]
+
+[PRFM]
+
+[ADD]
+
+[ADC]
+
+[SUB]
+
+[SBC]
+
+[CMP]
+
+[CMN]
+
+[MOV]
+
+[AND]
+
+[BIC]
+
+[EOR]
+
+[EON]
+
+[ORR]
+
+[ORN]
+
+[TST]
+
+[MVN]
+
+[MOVK]
+
+[ADRP]
+
+[ADR]
+
+[BFM]
+
+[SBFM]
+
+[UBFM]
+
+[EXTR]
+
+[SXT]
+
+[UXT]
+
+[ASRV]
+
+[LLSLV]
+
+[LSRV]
+
+[RORV]
+
+[CLS]
+
+[CLZ]
+
+[RBIT]
+
+[REV]
+
+[REV16]
+
+[REV32]
+
+[CSEL]
+
+[CSINC]
+
+[CSINV]
+
+[CSNEG]
+
+[CCMN]
+
+[CCMP]
+
+[MADD]
+
+[MSUB]
+
+[SMADDL]
+
+[SMSUBL]
+
+[SMULH]
+
+[UMADDL]
+
+[UMSUBL]
+
+[UMULH]
+
+[SDIV]
+
+[UDIV]
+
+; Aliases
+; they are not generated by the compiler, they are only used for inline assembler
+[NEG]
+
+[ASR]
+
+[LSL]
+
+[LSR]
+
+[ROR]
+
+[CSET]
+
+[CSETM]
+
+[CINC]
+
+[CINV]
+
+[CNEG]
+
+[NGC]
+
+[MNEG]
+
+[MUL]
+
+[SMNEGL]
+
+[SMULL]
+
+[UMNEGL]
+
+[UMULL]
+
+[FMOV]
+
+[FCVT]
+
+[FCVTAS]
+
+[FCVTAU]
+
+[FCVTMS]
+
+[FCVTMU]
+
+[FCVTNS]
+
+[FCVTNU]
+
+[FCVTPS]
+
+[FCVTPU]
+
+[FCVTZS]
+
+[FCVTZU]
+
+[SCVTF]
+
+[UCVTF]
+
+[FPRINTA]
+
+[FPRINTI]
+
+[FPRINTM]
+
+[FPRINTN]
+
+[FPRINTP]
+
+[FPRINTX]
+
+[FPRINTZ]
+
+[FABS]
+
+[FNEG]
+
+[FSQRT]
+
+[FADD]
+
+[FDIV]
+
+[FMUL]
+
+[FNMUL]
+
+[FSUB]
+
+[FMAX]
+
+[FMIN]
+
+[FMINNM]
+
+[FMADD]
+
+[FMSUB]
+
+[FNMADD]
+
+[FNMSUB]
+
+[FCMP]
+
+[FCMPE]
+
+[FCCMP]
+
+[FCMMPE]
+
+[FCSEL]
+

+ 2 - 0
compiler/aarch64/a64nop.inc

@@ -0,0 +1,2 @@
+{ don't edit, this file is generated from a64ins.dat }
+0;

+ 133 - 0
compiler/aarch64/a64op.inc

@@ -0,0 +1,133 @@
+{ don't edit, this file is generated from armins.dat }
+(
+A_B,
+A_CB,
+A_TB,
+A_BL,
+A_BLR,
+A_BR,
+A_RET,
+A_LDR,
+A_STR,
+A_LDP,
+A_STP,
+A_LDNP,
+A_STNP,
+A_LDTR,
+A_STTR,
+A_LDXR,
+A_STXR,
+A_LDAR,
+A_STLR,
+A_LDAXR,
+A_STLXR,
+A_PRFM,
+A_ADD,
+A_ADC,
+A_SUB,
+A_SBC,
+A_CMP,
+A_CMN,
+A_MOV,
+A_AND,
+A_BIC,
+A_EOR,
+A_EON,
+A_ORR,
+A_ORN,
+A_TST,
+A_MVN,
+A_MOVK,
+A_ADRP,
+A_ADR,
+A_BFM,
+A_SBFM,
+A_UBFM,
+A_EXTR,
+A_SXT,
+A_UXT,
+A_ASRV,
+A_LLSLV,
+A_LSRV,
+A_RORV,
+A_CLS,
+A_CLZ,
+A_RBIT,
+A_REV,
+A_REV16,
+A_REV32,
+A_CSEL,
+A_CSINC,
+A_CSINV,
+A_CSNEG,
+A_CCMN,
+A_CCMP,
+A_MADD,
+A_MSUB,
+A_SMADDL,
+A_SMSUBL,
+A_SMULH,
+A_UMADDL,
+A_UMSUBL,
+A_UMULH,
+A_SDIV,
+A_UDIV,
+A_NEG,
+A_ASR,
+A_LSL,
+A_LSR,
+A_ROR,
+A_CSET,
+A_CSETM,
+A_CINC,
+A_CINV,
+A_CNEG,
+A_NGC,
+A_MNEG,
+A_MUL,
+A_SMNEGL,
+A_SMULL,
+A_UMNEGL,
+A_UMULL,
+A_FMOV,
+A_FCVT,
+A_FCVTAS,
+A_FCVTAU,
+A_FCVTMS,
+A_FCVTMU,
+A_FCVTNS,
+A_FCVTNU,
+A_FCVTPS,
+A_FCVTPU,
+A_FCVTZS,
+A_FCVTZU,
+A_SCVTF,
+A_UCVTF,
+A_FPRINTA,
+A_FPRINTI,
+A_FPRINTM,
+A_FPRINTN,
+A_FPRINTP,
+A_FPRINTX,
+A_FPRINTZ,
+A_FABS,
+A_FNEG,
+A_FSQRT,
+A_FADD,
+A_FDIV,
+A_FMUL,
+A_FNMUL,
+A_FSUB,
+A_FMAX,
+A_FMIN,
+A_FMINNM,
+A_FMADD,
+A_FMSUB,
+A_FNMADD,
+A_FNMSUB,
+A_FCMP,
+A_FCMPE,
+A_FCCMP,
+A_FCMMPE,
+A_FCSEL
+);

+ 238 - 0
compiler/aarch64/a64reg.dat

@@ -0,0 +1,238 @@
+;
+; AArch64 registers
+;
+; layout
+; <name>,<type>,<subtype>,<value>,<stdname>,<stab idx>,<dwarf idx>
+;
+NO,$00,$00,$00,INVALID,-1,-1
+; Integer registers
+W0,$01,$04,$00,w0,0,0
+X0,$01,$05,$00,x0,0,0
+W1,$01,$04,$01,w1,1,1
+X1,$01,$05,$01,x1,1,1
+W2,$01,$04,$02,w2,2,2
+X2,$01,$05,$02,x2,2,2
+W3,$01,$04,$03,w3,3,3
+X3,$01,$05,$03,x3,3,3
+W4,$01,$04,$04,w4,4,4
+X4,$01,$05,$04,x4,4,4
+W5,$01,$04,$05,w5,5,5
+X5,$01,$05,$05,x5,5,5
+W6,$01,$04,$06,w6,6,6
+X6,$01,$05,$06,x6,6,6
+W7,$01,$04,$07,w7,7,7
+X7,$01,$05,$07,x7,7,7
+W8,$01,$04,$08,w8,8,8
+X8,$01,$05,$08,x8,8,8
+W9,$01,$04,$09,w9,9,9
+X9,$01,$05,$09,x9,9,9
+W10,$01,$04,$0A,w10,10,10
+X10,$01,$05,$0A,x10,10,10
+W11,$01,$04,$0B,w11,11,11
+X11,$01,$05,$0B,x11,11,11
+W12,$01,$04,$0C,w12,12,12
+X12,$01,$05,$0C,x12,12,12
+W13,$01,$04,$0D,w13,13,13
+X13,$01,$05,$0D,x13,13,13
+W14,$01,$04,$0E,w14,14,14
+X14,$01,$05,$0E,x14,14,14
+W15,$01,$04,$0F,w15,15,15
+X15,$01,$05,$0F,x15,15,15
+W16,$01,$04,$10,w16,16,16
+X16,$01,$05,$10,x16,16,16
+W17,$01,$04,$11,w17,17,17
+X17,$01,$05,$11,x17,17,17
+W18,$01,$04,$12,w18,18,18
+X18,$01,$05,$12,x18,18,18
+W19,$01,$04,$13,w19,19,19
+X19,$01,$05,$13,x19,19,19
+W20,$01,$04,$14,w20,20,20
+X20,$01,$05,$14,x20,20,20
+W21,$01,$04,$15,w21,21,21
+X21,$01,$05,$15,x21,21,21
+W22,$01,$04,$16,w22,22,22
+X22,$01,$05,$16,x22,22,22
+W23,$01,$04,$17,w23,23,23
+X23,$01,$05,$17,x23,23,23
+W24,$01,$04,$18,w24,24,24
+X24,$01,$05,$18,x24,24,24
+W25,$01,$04,$19,w25,25,25
+X25,$01,$05,$19,x25,25,25
+W26,$01,$04,$1A,w26,26,26
+X26,$01,$05,$1A,x26,26,26
+W27,$01,$04,$1B,w27,27,27
+X27,$01,$05,$1B,x27,27,27
+W28,$01,$04,$1C,w28,28,28
+X28,$01,$05,$1C,x28,28,28
+W29,$01,$04,$1D,w29,29,29
+X29,$01,$05,$1D,x29,29,29
+W30,$01,$04,$1E,w30,30,30
+X30,$01,$05,$1E,x30,30,30
+WZR,$01,$04,$1F,wzr,31,31
+XZR,$01,$05,$1F,xzr,31,31
+
+
+; vfp registers
+B0,$04,$01,$00,b0,0,0
+H0,$04,$03,$00,h0,0,0
+S0,$04,$09,$00,s0,0,0
+D0,$04,$0a,$00,d0,0,0
+Q0,$04,$05,$00,q0,0,0
+B1,$04,$01,$01,b1,1,1
+H1,$04,$03,$01,h1,1,1
+S1,$04,$09,$01,s1,1,1
+D1,$04,$0a,$01,d1,1,1
+Q1,$04,$05,$01,q1,1,1
+B2,$04,$01,$02,b2,2,2
+H2,$04,$03,$02,h2,2,2
+S2,$04,$09,$02,s2,2,2
+D2,$04,$0a,$02,d2,2,2
+Q2,$04,$05,$02,q2,2,2
+B3,$04,$01,$03,b3,3,3
+H3,$04,$03,$03,h3,3,3
+S3,$04,$09,$03,s3,3,3
+D3,$04,$0a,$03,d3,3,3
+Q3,$04,$05,$03,q3,3,3
+B4,$04,$01,$04,b4,4,4
+H4,$04,$03,$04,h4,4,4
+S4,$04,$09,$04,s4,4,4
+D4,$04,$0a,$04,d4,4,4
+Q4,$04,$05,$04,q4,4,4
+B5,$04,$01,$05,b5,5,5
+H5,$04,$03,$05,h5,5,5
+S5,$04,$09,$05,s5,5,5
+D5,$04,$0a,$05,d5,5,5
+Q5,$04,$05,$05,q5,5,5
+B6,$04,$01,$06,b6,6,6
+H6,$04,$03,$06,h6,6,6
+S6,$04,$09,$06,s6,6,6
+D6,$04,$0a,$06,d6,6,6
+Q6,$04,$05,$06,q6,6,6
+B7,$04,$01,$07,b7,7,7
+H7,$04,$03,$07,h7,7,7
+S7,$04,$09,$07,s7,7,7
+D7,$04,$0a,$07,d7,7,7
+Q7,$04,$05,$07,q7,7,7
+B8,$04,$01,$08,b8,8,8
+H8,$04,$03,$08,h8,8,8
+S8,$04,$09,$08,s8,8,8
+D8,$04,$0a,$08,d8,8,8
+Q8,$04,$05,$08,q8,8,8
+B9,$04,$01,$09,b9,9,9
+H9,$04,$03,$09,h9,9,9
+S9,$04,$09,$09,s9,9,9
+D9,$04,$0a,$09,d9,9,9
+Q9,$04,$05,$09,q9,9,9
+B10,$04,$01,$0A,b10,10,10
+H10,$04,$03,$0A,h10,10,10
+S10,$04,$09,$0A,s10,10,10
+D10,$04,$0a,$0A,d10,10,10
+Q10,$04,$05,$0A,q10,10,10
+B11,$04,$01,$0B,b11,11,11
+H11,$04,$03,$0B,h11,11,11
+S11,$04,$09,$0B,s11,11,11
+D11,$04,$0a,$0B,d11,11,11
+Q11,$04,$05,$0B,q11,11,11
+B12,$04,$01,$0C,b12,12,12
+H12,$04,$03,$0C,h12,12,12
+S12,$04,$09,$0C,s12,12,12
+D12,$04,$0a,$0C,d12,12,12
+Q12,$04,$05,$0C,q12,12,12
+B13,$04,$01,$0D,b13,13,13
+H13,$04,$03,$0D,h13,13,13
+S13,$04,$09,$0D,s13,13,13
+D13,$04,$0a,$0D,d13,13,13
+Q13,$04,$05,$0D,q13,13,13
+B14,$04,$01,$0E,b14,14,14
+H14,$04,$03,$0E,h14,14,14
+S14,$04,$09,$0E,s14,14,14
+D14,$04,$0a,$0E,d14,14,14
+Q14,$04,$05,$0E,q14,14,14
+B15,$04,$01,$0F,b15,15,15
+H15,$04,$03,$0F,h15,15,15
+S15,$04,$09,$0F,s15,15,15
+D15,$04,$0a,$0F,d15,15,15
+Q15,$04,$05,$0F,q15,15,15
+B16,$04,$01,$10,b16,16,16
+H16,$04,$03,$10,h16,16,16
+S16,$04,$09,$10,s16,16,16
+D16,$04,$0a,$10,d16,16,16
+Q16,$04,$05,$10,q16,16,16
+B17,$04,$01,$11,b17,17,17
+H17,$04,$03,$11,h17,17,17
+S17,$04,$09,$11,s17,17,17
+D17,$04,$0a,$11,d17,17,17
+Q17,$04,$05,$11,q17,17,17
+B18,$04,$01,$12,b18,18,18
+H18,$04,$03,$12,h18,18,18
+S18,$04,$09,$12,s18,18,18
+D18,$04,$0a,$12,d18,18,18
+Q18,$04,$05,$12,q18,18,18
+B19,$04,$01,$13,b19,19,19
+H19,$04,$03,$13,h19,19,19
+S19,$04,$09,$13,s19,19,19
+D19,$04,$0a,$13,d19,19,19
+Q19,$04,$05,$13,q19,19,19
+B20,$04,$01,$14,b20,20,20
+H20,$04,$03,$14,h20,20,20
+S20,$04,$09,$14,s20,20,20
+D20,$04,$0a,$14,d20,20,20
+Q20,$04,$05,$14,q20,20,20
+B21,$04,$01,$15,b21,21,21
+H21,$04,$03,$15,h21,21,21
+S21,$04,$09,$15,s21,21,21
+D21,$04,$0a,$15,d21,21,21
+Q21,$04,$05,$15,q21,21,21
+B22,$04,$01,$16,b22,22,22
+H22,$04,$03,$16,h22,22,22
+S22,$04,$09,$16,s22,22,22
+D22,$04,$0a,$16,d22,22,22
+Q22,$04,$05,$16,q22,22,22
+B23,$04,$01,$17,b23,23,23
+H23,$04,$03,$17,h23,23,23
+S23,$04,$09,$17,s23,23,23
+D23,$04,$0a,$17,d23,23,23
+Q23,$04,$05,$17,q23,23,23
+B24,$04,$01,$18,b24,24,24
+H24,$04,$03,$18,h24,24,24
+S24,$04,$09,$18,s24,24,24
+D24,$04,$0a,$18,d24,24,24
+Q24,$04,$05,$18,q24,24,24
+B25,$04,$01,$19,b25,25,25
+H25,$04,$03,$19,h25,25,25
+S25,$04,$09,$19,s25,25,25
+D25,$04,$0a,$19,d25,25,25
+Q25,$04,$05,$19,q25,25,25
+B26,$04,$01,$1A,b26,26,26
+H26,$04,$03,$1A,h26,26,26
+S26,$04,$09,$1A,s26,26,26
+D26,$04,$0a,$1A,d26,26,26
+Q26,$04,$05,$1A,q26,26,26
+B27,$04,$01,$1B,b27,27,27
+H27,$04,$03,$1B,h27,27,27
+S27,$04,$09,$1B,s27,27,27
+D27,$04,$0a,$1B,d27,27,27
+Q27,$04,$05,$1B,q27,27,27
+B28,$04,$01,$1C,b28,28,28
+H28,$04,$03,$1C,h28,28,28
+S28,$04,$09,$1C,s28,28,28
+D28,$04,$0a,$1C,d28,28,28
+Q28,$04,$05,$1C,q28,28,28
+B29,$04,$01,$1D,b29,29,29
+H29,$04,$03,$1D,h29,29,29
+S29,$04,$09,$1D,s29,29,29
+D29,$04,$0a,$1D,d29,29,29
+Q29,$04,$05,$1D,q29,29,29
+B30,$04,$01,$1E,b30,30,30
+H30,$04,$03,$1E,h30,30,30
+S30,$04,$09,$1E,s30,30,30
+D30,$04,$0a,$1E,d30,30,30
+Q30,$04,$05,$1E,q30,30,30
+B31,$04,$01,$1F,b31,31,31
+H31,$04,$03,$1F,h31,31,31
+S31,$04,$09,$1F,s31,31,31
+D31,$04,$0a,$1F,d31,31,31
+Q31,$04,$05,$1F,q31,31,31
+
+NZCV,$05,$00,$00,nzcv,0,0
+

+ 4 - 0
compiler/aarch64/a64tab.inc

@@ -0,0 +1,4 @@
+{ don't edit, this file is generated from armins.dat }
+(
+
+);

+ 2178 - 0
compiler/aarch64/aasmcpu.pas

@@ -0,0 +1,2178 @@
+{
+    Copyright (c) 2003-2012 by Florian Klaempfl and others
+
+    Contains the assembler object for ARM64
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit aasmcpu;
+
+{$i fpcdefs.inc}
+
+interface
+
+uses
+  cclasses,globtype,globals,verbose,
+  aasmbase,aasmtai,aasmdata,aasmsym,
+  ogbase,
+  symtype,
+  cpubase,cpuinfo,cgbase,cgutils;
+
+    const
+      { "mov reg,reg" source operand number }
+      O_MOV_SOURCE = 1;
+      { "mov reg,reg" source operand number }
+      O_MOV_DEST = 0;
+
+      { Operand types }
+      OT_NONE      = $00000000;
+
+      OT_BITS8     = $00000001;  { size, and other attributes, of the operand  }
+      OT_BITS16    = $00000002;
+      OT_BITS32    = $00000004;
+      OT_BITS64    = $00000008;  { FPU only  }
+      OT_BITS80    = $00000010;
+      OT_FAR       = $00000020;  { this means 16:16 or 16:32, like in CALL/JMP }
+      OT_NEAR      = $00000040;
+      OT_SHORT     = $00000080;
+      OT_BITSTINY  = $00000100;  { fpu constant }
+      OT_BITSSHIFTER =
+                     $00000200;
+
+      OT_SIZE_MASK = $000003FF;  { all the size attributes  }
+      OT_NON_SIZE  = longint(not OT_SIZE_MASK);
+
+      OT_SIGNED    = $00000100;  { the operand need to be signed -128-127 }
+
+      OT_TO        = $00000200;  { operand is followed by a colon  }
+                                 { reverse effect in FADD, FSUB &c  }
+      OT_COLON     = $00000400;
+
+      OT_SHIFTEROP = $00000800;
+      OT_REGISTER  = $00001000;
+      OT_IMMEDIATE = $00002000;
+      OT_REGLIST   = $00008000;
+      OT_IMM8      = $00002001;
+      OT_IMM24     = $00002002;
+      OT_IMM32     = $00002004;
+      OT_IMM64     = $00002008;
+      OT_IMM80     = $00002010;
+      OT_IMMTINY   = $00002100;
+      OT_IMMSHIFTER= $00002200;
+      OT_IMMEDIATE24 = OT_IMM24;
+      OT_SHIFTIMM  = OT_SHIFTEROP or OT_IMMSHIFTER;
+      OT_SHIFTIMMEDIATE = OT_SHIFTIMM;
+      OT_IMMEDIATESHIFTER = OT_IMMSHIFTER;
+
+      OT_IMMEDIATEFPU = OT_IMMTINY;
+
+      OT_REGMEM    = $00200000;  { for r/m, ie EA, operands  }
+      OT_REGNORM   = $00201000;  { 'normal' reg, qualifies as EA  }
+      OT_REG8      = $00201001;
+      OT_REG16     = $00201002;
+      OT_REG32     = $00201004;
+      OT_REG64     = $00201008;
+      OT_VREG      = $00201010;  { vector register }
+      OT_REGF      = $00201020;  { coproc register }
+      OT_MEMORY    = $00204000;  { register number in 'basereg'  }
+      OT_MEM8      = $00204001;
+      OT_MEM16     = $00204002;
+      OT_MEM32     = $00204004;
+      OT_MEM64     = $00204008;
+      OT_MEM80     = $00204010;
+      { word/byte load/store }
+      OT_AM2       = $00010000;
+      { misc ld/st operations }
+      OT_AM3       = $00020000;
+      { multiple ld/st operations }
+      OT_AM4       = $00040000;
+      { co proc. ld/st operations }
+      OT_AM5       = $00080000;
+      OT_AMMASK    = $000f0000;
+      { IT instruction }
+      OT_CONDITION = $00100000;
+
+      OT_MEMORYAM2 = OT_MEMORY or OT_AM2;
+      OT_MEMORYAM3 = OT_MEMORY or OT_AM3;
+      OT_MEMORYAM4 = OT_MEMORY or OT_AM4;
+      OT_MEMORYAM5 = OT_MEMORY or OT_AM5;
+
+      OT_FPUREG    = $01000000;  { floating point stack registers  }
+      OT_REG_SMASK = $00070000;  { special register operands: these may be treated differently  }
+                                 { a mask for the following  }
+
+      OT_MEM_OFFS  = $00604000;  { special type of EA  }
+                                 { simple [address] offset  }
+      OT_ONENESS   = $00800000;  { special type of immediate operand  }
+                                 { so UNITY == IMMEDIATE | ONENESS  }
+      OT_UNITY     = $00802000;  { for shift/rotate instructions  }
+
+      instabentries = {$i a64nop.inc}
+
+      maxinfolen = 5;
+
+      IF_NONE   = $00000000;
+
+      IF_ARMMASK    = $000F0000;
+      IF_ARM7       = $00070000;
+      IF_FPMASK     = $00F00000;
+      IF_FPA        = $00100000;
+
+      { if the instruction can change in a second pass }
+      IF_PASS2  = longint($80000000);
+
+    type
+      TInsTabCache=array[TasmOp] of longint;
+      PInsTabCache=^TInsTabCache;
+
+      tinsentry = record
+        opcode  : tasmop;
+        ops     : byte;
+        optypes : array[0..3] of longint;
+        code    : array[0..maxinfolen] of char;
+        flags   : longint;
+      end;
+
+      pinsentry=^tinsentry;
+
+{    const
+      InsTab : array[0..instabentries-1] of TInsEntry={$i a64tab.inc} }
+
+    var
+      InsTabCache : PInsTabCache;
+
+    type
+      taicpu = class(tai_cpu_abstract_sym)
+         oppostfix : TOpPostfix;
+         procedure loadshifterop(opidx:longint;const so:tshifterop);
+         constructor op_none(op : tasmop);
+
+         constructor op_reg(op : tasmop;_op1 : tregister);
+         constructor op_ref(op : tasmop;const _op1 : treference);
+         constructor op_const(op : tasmop;_op1 : longint);
+
+         constructor op_reg_reg(op : tasmop;_op1,_op2 : tregister);
+         constructor op_reg_ref(op : tasmop;_op1 : tregister;const _op2 : treference);
+         constructor op_reg_const(op:tasmop; _op1: tregister; _op2: aint);
+
+         constructor op_reg_reg_reg(op : tasmop;_op1,_op2,_op3 : tregister);
+         constructor op_reg_reg_reg_reg(op : tasmop;_op1,_op2,_op3,_op4 : tregister);
+         constructor op_reg_reg_const(op : tasmop;_op1,_op2 : tregister; _op3: aint);
+         constructor op_reg_reg_sym_ofs(op : tasmop;_op1,_op2 : tregister; _op3: tasmsymbol;_op3ofs: longint);
+         constructor op_reg_reg_ref(op : tasmop;_op1,_op2 : tregister; const _op3: treference);
+         constructor op_reg_reg_shifterop(op : tasmop;_op1,_op2 : tregister;_op3 : tshifterop);
+         constructor op_reg_reg_reg_shifterop(op : tasmop;_op1,_op2,_op3 : tregister;_op4 : tshifterop);
+
+         { this is for Jmp instructions }
+         constructor op_cond_sym(op : tasmop;cond:TAsmCond;_op1 : tasmsymbol);
+
+         constructor op_sym(op : tasmop;_op1 : tasmsymbol);
+         constructor op_sym_ofs(op : tasmop;_op1 : tasmsymbol;_op1ofs:longint);
+         constructor op_reg_sym_ofs(op : tasmop;_op1 : tregister;_op2:tasmsymbol;_op2ofs : longint);
+         constructor op_sym_ofs_ref(op : tasmop;_op1 : tasmsymbol;_op1ofs:longint;const _op2 : treference);
+
+         function is_same_reg_move(regtype: Tregistertype):boolean; override;
+
+         function spilling_get_operation_type(opnr: longint): topertype;override;
+
+         { assembler }
+      public
+         { the next will reset all instructions that can change in pass 2 }
+         procedure ResetPass1;override;
+         procedure ResetPass2;override;
+         function  CheckIfValid:boolean;
+         function GetString:string;
+         function  Pass1(objdata:TObjData):longint;override;
+         procedure Pass2(objdata:TObjData);override;
+      protected
+         procedure ppuloadoper(ppufile:tcompilerppufile;var o:toper);override;
+         procedure ppuwriteoper(ppufile:tcompilerppufile;const o:toper);override;
+         procedure ppubuildderefimploper(var o:toper);override;
+         procedure ppuderefoper(var o:toper);override;
+      private
+         { next fields are filled in pass1, so pass2 is faster }
+         inssize   : shortint;
+         insoffset : longint;
+         LastInsOffset : longint; { need to be public to be reset }
+         insentry  : PInsEntry;
+         function  InsEnd:longint;
+         procedure create_ot(objdata:TObjData);
+         function  Matches(p:PInsEntry):longint;
+         function  calcsize(p:PInsEntry):shortint;
+         procedure gencode(objdata:TObjData);
+         function  NeedAddrPrefix(opidx:byte):boolean;
+         procedure Swapoperands;
+         function  FindInsentry(objdata:TObjData):boolean;
+      end;
+
+      tai_align = class(tai_align_abstract)
+        { nothing to add }
+      end;
+
+    function spilling_create_load(const ref:treference;r:tregister):Taicpu;
+    function spilling_create_store(r:tregister; const ref:treference):Taicpu;
+
+    function setoppostfix(i : taicpu;pf : toppostfix) : taicpu;
+    function setcondition(i : taicpu;c : tasmcond) : taicpu;
+
+    { inserts pc relative symbols at places where they are reachable
+      and transforms special instructions to valid instruction encodings }
+    procedure finalizearmcode(list,listtoinsert : TAsmList);
+    { inserts .pdata section and dummy function prolog needed for arm-wince exception handling }
+    procedure InsertPData;
+
+    procedure InitAsm;
+    procedure DoneAsm;
+
+
+implementation
+
+  uses
+    cutils,rgobj,itcpugas,aoptcpu;
+
+
+    procedure taicpu.loadshifterop(opidx:longint;const so:tshifterop);
+      begin
+        allocate_oper(opidx+1);
+        with oper[opidx]^ do
+          begin
+            if typ<>top_shifterop then
+              begin
+                clearop(opidx);
+                new(shifterop);
+              end;
+            shifterop^:=so;
+            typ:=top_shifterop;
+          end;
+      end;
+
+
+{*****************************************************************************
+                                 taicpu Constructors
+*****************************************************************************}
+
+    constructor taicpu.op_none(op : tasmop);
+      begin
+         inherited create(op);
+      end;
+
+
+    { for pld }
+    constructor taicpu.op_ref(op : tasmop;const _op1 : treference);
+      begin
+         inherited create(op);
+         ops:=1;
+         loadref(0,_op1);
+      end;
+
+
+    constructor taicpu.op_reg(op : tasmop;_op1 : tregister);
+      begin
+         inherited create(op);
+         ops:=1;
+         loadreg(0,_op1);
+      end;
+
+
+    constructor taicpu.op_const(op : tasmop;_op1 : longint);
+      begin
+         inherited create(op);
+         ops:=1;
+         loadconst(0,aint(_op1));
+      end;
+
+
+    constructor taicpu.op_reg_reg(op : tasmop;_op1,_op2 : tregister);
+      begin
+         inherited create(op);
+         ops:=2;
+         loadreg(0,_op1);
+         loadreg(1,_op2);
+      end;
+
+
+    constructor taicpu.op_reg_const(op:tasmop; _op1: tregister; _op2: aint);
+      begin
+         inherited create(op);
+         ops:=2;
+         loadreg(0,_op1);
+         loadconst(1,aint(_op2));
+      end;
+
+
+    constructor taicpu.op_reg_ref(op : tasmop;_op1 : tregister;const _op2 : treference);
+      begin
+         inherited create(op);
+         ops:=2;
+         loadreg(0,_op1);
+         loadref(1,_op2);
+      end;
+
+
+    constructor taicpu.op_reg_reg_reg(op : tasmop;_op1,_op2,_op3 : tregister);
+      begin
+         inherited create(op);
+         ops:=3;
+         loadreg(0,_op1);
+         loadreg(1,_op2);
+         loadreg(2,_op3);
+      end;
+
+
+    constructor taicpu.op_reg_reg_reg_reg(op : tasmop;_op1,_op2,_op3,_op4 : tregister);
+      begin
+         inherited create(op);
+         ops:=4;
+         loadreg(0,_op1);
+         loadreg(1,_op2);
+         loadreg(2,_op3);
+         loadreg(3,_op4);
+      end;
+
+
+     constructor taicpu.op_reg_reg_const(op : tasmop;_op1,_op2 : tregister; _op3: aint);
+       begin
+         inherited create(op);
+         ops:=3;
+         loadreg(0,_op1);
+         loadreg(1,_op2);
+         loadconst(2,aint(_op3));
+      end;
+
+
+     constructor taicpu.op_reg_reg_sym_ofs(op : tasmop;_op1,_op2 : tregister; _op3: tasmsymbol;_op3ofs: longint);
+       begin
+         inherited create(op);
+         ops:=3;
+         loadreg(0,_op1);
+         loadreg(1,_op2);
+         loadsymbol(0,_op3,_op3ofs);
+      end;
+
+
+     constructor taicpu.op_reg_reg_ref(op : tasmop;_op1,_op2 : tregister; const _op3: treference);
+       begin
+         inherited create(op);
+         ops:=3;
+         loadreg(0,_op1);
+         loadreg(1,_op2);
+         loadref(2,_op3);
+      end;
+
+
+     constructor taicpu.op_reg_reg_shifterop(op : tasmop;_op1,_op2 : tregister;_op3 : tshifterop);
+      begin
+         inherited create(op);
+         ops:=3;
+         loadreg(0,_op1);
+         loadreg(1,_op2);
+         loadshifterop(2,_op3);
+      end;
+
+
+     constructor taicpu.op_reg_reg_reg_shifterop(op : tasmop;_op1,_op2,_op3 : tregister;_op4 : tshifterop);
+      begin
+         inherited create(op);
+         ops:=4;
+         loadreg(0,_op1);
+         loadreg(1,_op2);
+         loadreg(2,_op3);
+         loadshifterop(3,_op4);
+      end;
+
+
+    constructor taicpu.op_cond_sym(op : tasmop;cond:TAsmCond;_op1 : tasmsymbol);
+      begin
+         inherited create(op);
+         condition:=cond;
+         ops:=1;
+         loadsymbol(0,_op1,0);
+      end;
+
+
+    constructor taicpu.op_sym(op : tasmop;_op1 : tasmsymbol);
+      begin
+         inherited create(op);
+         ops:=1;
+         loadsymbol(0,_op1,0);
+      end;
+
+
+    constructor taicpu.op_sym_ofs(op : tasmop;_op1 : tasmsymbol;_op1ofs:longint);
+      begin
+         inherited create(op);
+         ops:=1;
+         loadsymbol(0,_op1,_op1ofs);
+      end;
+
+
+     constructor taicpu.op_reg_sym_ofs(op : tasmop;_op1 : tregister;_op2:tasmsymbol;_op2ofs : longint);
+      begin
+         inherited create(op);
+         ops:=2;
+         loadreg(0,_op1);
+         loadsymbol(1,_op2,_op2ofs);
+      end;
+
+
+    constructor taicpu.op_sym_ofs_ref(op : tasmop;_op1 : tasmsymbol;_op1ofs:longint;const _op2 : treference);
+      begin
+         inherited create(op);
+         ops:=2;
+         loadsymbol(0,_op1,_op1ofs);
+         loadref(1,_op2);
+      end;
+
+
+    function taicpu.is_same_reg_move(regtype: Tregistertype):boolean;
+      begin
+        { allow the register allocator to remove unnecessary moves }
+        result:=(
+                  ((opcode=A_MOV) and (regtype = R_INTREGISTER)) or
+                  ((opcode=A_FMOV) and (regtype = R_MMREGISTER))
+                ) and
+                (oppostfix in [PF_None]) and
+                (condition=C_None) and
+                (ops=2) and
+                (oper[0]^.typ=top_reg) and
+                (oper[1]^.typ=top_reg) and
+                (oper[0]^.reg=oper[1]^.reg);
+      end;
+
+
+    function spilling_create_load(const ref:treference;r:tregister):Taicpu;
+      var
+        op: tasmop;
+      begin
+        case getregtype(r) of
+          R_INTREGISTER :
+            result:=taicpu.op_reg_ref(A_LDR,r,ref);
+          R_MMREGISTER :
+            begin
+              case getsubreg(r) of
+                R_SUBFD:
+                  op:=A_LDR;
+                R_SUBFS:
+                  op:=A_LDR;
+                else
+                  internalerror(2009112905);
+              end;
+              result:=taicpu.op_reg_ref(op,r,ref);
+            end;
+          else
+            internalerror(200401041);
+        end;
+      end;
+
+
+    function spilling_create_store(r:tregister; const ref:treference):Taicpu;
+      var
+        op: tasmop;
+      begin
+        case getregtype(r) of
+          R_INTREGISTER :
+            result:=taicpu.op_reg_ref(A_STR,r,ref);
+          R_MMREGISTER :
+            begin
+              case getsubreg(r) of
+                R_SUBFD:
+                  op:=A_STR;
+                R_SUBFS:
+                  op:=A_STR;
+                else
+                  internalerror(2009112904);
+              end;
+              result:=taicpu.op_reg_ref(op,r,ref);
+            end;
+          else
+            internalerror(200401041);
+        end;
+      end;
+
+
+    function taicpu.spilling_get_operation_type(opnr: longint): topertype;
+      begin
+        case opcode of
+          A_ADC,A_ADD,A_AND,A_BIC,
+          A_EOR,A_CLZ,A_RBIT,
+          A_LDR,
+          A_MOV,A_MVN,A_MUL,
+          A_ORR,A_SBC,A_SUB,
+          A_UXT,A_SXT:
+            if opnr=0 then
+              result:=operand_write
+            else
+              result:=operand_read;
+          A_B,A_BL,
+          A_CMN,A_CMP,A_TST:
+            result:=operand_read;
+          A_STR:
+            { important is what happens with the involved registers }
+            if opnr=0 then
+              result := operand_read
+            else
+              { check for pre/post indexed }
+              result := operand_read;
+          else
+            internalerror(200403151);
+        end;
+      end;
+
+
+    procedure BuildInsTabCache;
+      var
+        i : longint;
+      begin
+(*        new(instabcache);
+        FillChar(instabcache^,sizeof(tinstabcache),$ff);
+        i:=0;
+        while (i<InsTabEntries) do
+          begin
+            if InsTabCache^[InsTab[i].Opcode]=-1 then
+              InsTabCache^[InsTab[i].Opcode]:=i;
+            inc(i);
+          end; *)
+      end;
+
+
+    procedure InitAsm;
+      begin
+        if not assigned(instabcache) then
+          BuildInsTabCache;
+      end;
+
+
+    procedure DoneAsm;
+      begin
+        if assigned(instabcache) then
+          begin
+            dispose(instabcache);
+            instabcache:=nil;
+          end;
+      end;
+
+
+    function setoppostfix(i : taicpu;pf : toppostfix) : taicpu;
+      begin
+        i.oppostfix:=pf;
+        result:=i;
+      end;
+
+
+    function setcondition(i : taicpu;c : tasmcond) : taicpu;
+      begin
+        i.condition:=c;
+        result:=i;
+      end;
+
+
+    Function SimpleGetNextInstruction(Current: tai; Var Next: tai): Boolean;
+      Begin
+        Current:=tai(Current.Next);
+        While Assigned(Current) And (Current.typ In SkipInstr) Do
+          Current:=tai(Current.Next);
+        Next:=Current;
+        If Assigned(Next) And Not(Next.typ In SkipInstr) Then
+           Result:=True
+          Else
+            Begin
+              Next:=Nil;
+              Result:=False;
+            End;
+      End;
+
+
+(*
+    function armconstequal(hp1,hp2: tai): boolean;
+      begin
+        result:=false;
+        if hp1.typ<>hp2.typ then
+          exit;
+        case hp1.typ of
+          tai_const:
+            result:=
+              (tai_const(hp2).sym=tai_const(hp).sym) and
+              (tai_const(hp2).value=tai_const(hp).value) and
+              (tai(hp2.previous).typ=ait_label);
+            tai_const:
+              result:=
+                (tai_const(hp2).sym=tai_const(hp).sym) and
+                (tai_const(hp2).value=tai_const(hp).value) and
+                (tai(hp2.previous).typ=ait_label);
+        end;
+      end;
+*)
+
+    procedure insertpcrelativedata(list,listtoinsert : TAsmList);
+      var
+        curinspos,
+        penalty,
+        lastinspos,
+        { increased for every data element > 4 bytes inserted }
+        currentsize,
+        extradataoffset,
+        limit: longint;
+        curop : longint;
+        curtai : tai;
+        curdatatai,hp,hp2 : tai;
+        curdata : TAsmList;
+        l : tasmlabel;
+        doinsert,
+        removeref : boolean;
+      begin
+(*
+        curdata:=TAsmList.create;
+        lastinspos:=-1;
+        curinspos:=0;
+        extradataoffset:=0;
+        limit:=1016;
+        curtai:=tai(list.first);
+        doinsert:=false;
+        while assigned(curtai) do
+          begin
+            { instruction? }
+            case curtai.typ of
+              ait_instruction:
+                begin
+                  { walk through all operand of the instruction }
+                  for curop:=0 to taicpu(curtai).ops-1 do
+                    begin
+                      { reference? }
+                      if (taicpu(curtai).oper[curop]^.typ=top_ref) then
+                        begin
+                          { pc relative symbol? }
+                          curdatatai:=tai(taicpu(curtai).oper[curop]^.ref^.symboldata);
+                          if assigned(curdatatai) and
+                            { move only if we're at the first reference of a label }
+                            not(tai_label(curdatatai).moved) then
+                            begin
+                              tai_label(curdatatai).moved:=true;
+                              { check if symbol already used. }
+                              { if yes, reuse the symbol }
+                              hp:=tai(curdatatai.next);
+                              removeref:=false;
+                              if assigned(hp) then
+                                begin
+                                  case hp.typ of
+                                    ait_const:
+                                      begin
+                                        if (tai_const(hp).consttype=aitconst_64bit) then
+                                          inc(extradataoffset);
+                                      end;
+                                    ait_comp_64bit,
+                                    ait_real_64bit:
+                                      begin
+                                        inc(extradataoffset);
+                                      end;
+                                    ait_real_80bit:
+                                      begin
+                                        inc(extradataoffset,2);
+                                      end;
+                                  end;
+                                  if (hp.typ=ait_const) then
+                                    begin
+                                      hp2:=tai(curdata.first);
+                                      while assigned(hp2) do
+                                        begin
+    {                                      if armconstequal(hp2,hp) then }
+                                          if (hp2.typ=ait_const) and (tai_const(hp2).sym=tai_const(hp).sym)
+                                            and (tai_const(hp2).value=tai_const(hp).value) and (tai(hp2.previous).typ=ait_label)
+                                          then
+                                            begin
+                                              with taicpu(curtai).oper[curop]^.ref^ do
+                                                begin
+                                                  symboldata:=hp2.previous;
+                                                  symbol:=tai_label(hp2.previous).labsym;
+                                                end;
+                                              removeref:=true;
+                                              break;
+                                            end;
+                                          hp2:=tai(hp2.next);
+                                        end;
+                                    end;
+                                end;
+                              { move or remove symbol reference }
+                              repeat
+                                hp:=tai(curdatatai.next);
+                                listtoinsert.remove(curdatatai);
+                                if removeref then
+                                  curdatatai.free
+                                else
+                                  curdata.concat(curdatatai);
+                                curdatatai:=hp;
+                              until (curdatatai=nil) or (curdatatai.typ=ait_label);
+                              if lastinspos=-1 then
+                                lastinspos:=curinspos;
+                            end;
+                        end;
+                    end;
+                  inc(curinspos);
+                end;
+              ait_align:
+                begin
+                  { code is always 4 byte aligned, so we don't have to take care of .align 2 which would
+                    requires also incrementing curinspos by 1 }
+                  inc(curinspos,(tai_align(curtai).aligntype div 4));
+                end;
+              ait_const:
+                begin
+                  inc(curinspos);
+                  if (tai_const(curtai).consttype=aitconst_64bit) then
+                    inc(curinspos);
+                end;
+              ait_real_32bit:
+                begin
+                  inc(curinspos);
+                end;
+              ait_comp_64bit,
+              ait_real_64bit:
+                begin
+                  inc(curinspos,2);
+                end;
+              ait_real_80bit:
+                begin
+                  inc(curinspos,3);
+                end;
+            end;
+            { special case for case jump tables }
+            if SimpleGetNextInstruction(curtai,hp) and
+              (tai(hp).typ=ait_instruction) and
+              (taicpu(hp).opcode=A_LDR) and
+              (taicpu(hp).oper[0]^.typ=top_reg) and
+              (taicpu(hp).oper[0]^.reg=NR_PC) then
+              begin
+                penalty:=1;
+                hp:=tai(hp.next);
+                { skip register allocations and comments inserted by the optimizer }
+                while assigned(hp) and (hp.typ in [ait_comment,ait_regalloc]) do
+                  hp:=tai(hp.next);
+                while assigned(hp) and (hp.typ=ait_const) do
+                  begin
+                    inc(penalty);
+                    hp:=tai(hp.next);
+                  end;
+              end
+            else
+              penalty:=0;
+
+            { FLD/FST VFP instructions have a limit of +/- 1024, not 4096 }
+            if SimpleGetNextInstruction(curtai,hp) and
+               (tai(hp).typ=ait_instruction) and
+               ((taicpu(hp).opcode=A_FLDS) or
+                (taicpu(hp).opcode=A_FLDD)) then
+              limit:=254;
+
+            { don't miss an insert }
+            doinsert:=doinsert or
+              (not(curdata.empty) and
+               (curinspos-lastinspos+penalty+extradataoffset>limit));
+
+            { split only at real instructions else the test below fails }
+            if doinsert and (curtai.typ=ait_instruction) and
+              (
+                { don't split loads of pc to lr and the following move }
+                not(
+                    (taicpu(curtai).opcode=A_MOV) and
+                    (taicpu(curtai).oper[0]^.typ=top_reg) and
+                    (taicpu(curtai).oper[0]^.reg=NR_R14) and
+                    (taicpu(curtai).oper[1]^.typ=top_reg) and
+                    (taicpu(curtai).oper[1]^.reg=NR_PC)
+                   )
+              ) then
+              begin
+                lastinspos:=-1;
+                extradataoffset:=0;
+                limit:=1016;
+                doinsert:=false;
+                hp:=tai(curtai.next);
+                current_asmdata.getjumplabel(l);
+                curdata.insert(taicpu.op_sym(A_B,l));
+                curdata.concat(tai_label.create(l));
+                list.insertlistafter(curtai,curdata);
+                curtai:=hp;
+              end
+            else
+              curtai:=tai(curtai.next);
+          end;
+        list.concatlist(curdata);
+        curdata.free;
+*)
+      end;
+
+
+    procedure finalizearmcode(list, listtoinsert: TAsmList);
+      begin
+        insertpcrelativedata(list, listtoinsert);
+      end;
+
+    procedure InsertPData;
+      var
+        prolog: TAsmList;
+      begin
+        prolog:=TAsmList.create;
+        new_section(prolog,sec_code,'FPC_EH_PROLOG',sizeof(pint),secorder_begin);
+        prolog.concat(Tai_const.Createname('_ARM_ExceptionHandler', 0));
+        prolog.concat(Tai_const.Create_32bit(0));
+        prolog.concat(Tai_symbol.Createname_global('FPC_EH_CODE_START',AT_DATA,0));
+        { dummy function }
+        prolog.concat(taicpu.op_reg(A_BR,NR_X29));
+        current_asmdata.asmlists[al_start].insertList(prolog);
+        prolog.Free;
+        new_section(current_asmdata.asmlists[al_end],sec_pdata,'',sizeof(pint));
+        current_asmdata.asmlists[al_end].concat(Tai_const.Createname('FPC_EH_CODE_START', 0));
+        current_asmdata.asmlists[al_end].concat(Tai_const.Create_32bit(longint($ffffff01)));
+      end;
+
+(*
+      Floating point instruction format information, taken from the linux kernel
+      ARM Floating Point Instruction Classes
+      | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
+      |c o n d|1 1 0 P|U|u|W|L|   Rn  |v|  Fd |0|0|0|1|  o f f s e t  | CPDT
+      |c o n d|1 1 0 P|U|w|W|L|   Rn  |x|  Fd |0|0|1|0|  o f f s e t  | CPDT (copro 2)
+      | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
+      |c o n d|1 1 1 0|a|b|c|d|e|  Fn |j|  Fd |0|0|0|1|f|g|h|0|i|  Fm | CPDO
+      |c o n d|1 1 1 0|a|b|c|L|e|  Fn |   Rd  |0|0|0|1|f|g|h|1|i|  Fm | CPRT
+      |c o n d|1 1 1 0|a|b|c|1|e|  Fn |1|1|1|1|0|0|0|1|f|g|h|1|i|  Fm | comparisons
+      | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
+
+      CPDT            data transfer instructions
+                      LDF, STF, LFM (copro 2), SFM (copro 2)
+
+      CPDO            dyadic arithmetic instructions
+                      ADF, MUF, SUF, RSF, DVF, RDF,
+                      POW, RPW, RMF, FML, FDV, FRD, POL
+
+      CPDO            monadic arithmetic instructions
+                      MVF, MNF, ABS, RND, SQT, LOG, LGN, EXP,
+                      SIN, COS, TAN, ASN, ACS, ATN, URD, NRM
+
+      CPRT            joint arithmetic/data transfer instructions
+                      FIX (arithmetic followed by load/store)
+                      FLT (load/store followed by arithmetic)
+                      CMF, CNF CMFE, CNFE (comparisons)
+                      WFS, RFS (write/read floating point status register)
+                      WFC, RFC (write/read floating point control register)
+
+      cond            condition codes
+      P               pre/post index bit: 0 = postindex, 1 = preindex
+      U               up/down bit: 0 = stack grows down, 1 = stack grows up
+      W               write back bit: 1 = update base register (Rn)
+      L               load/store bit: 0 = store, 1 = load
+      Rn              base register
+      Rd              destination/source register
+      Fd              floating point destination register
+      Fn              floating point source register
+      Fm              floating point source register or floating point constant
+
+      uv              transfer length (TABLE 1)
+      wx              register count (TABLE 2)
+      abcd            arithmetic opcode (TABLES 3 & 4)
+      ef              destination size (rounding precision) (TABLE 5)
+      gh              rounding mode (TABLE 6)
+      j               dyadic/monadic bit: 0 = dyadic, 1 = monadic
+      i               constant bit: 1 = constant (TABLE 6)
+      */
+
+      /*
+      TABLE 1
+      +-------------------------+---+---+---------+---------+
+      |  Precision              | u | v | FPSR.EP | length  |
+      +-------------------------+---+---+---------+---------+
+      | Single                  | 0 | 0 |    x    | 1 words |
+      | Double                  | 1 | 1 |    x    | 2 words |
+      | Extended                | 1 | 1 |    x    | 3 words |
+      | Packed decimal          | 1 | 1 |    0    | 3 words |
+      | Expanded packed decimal | 1 | 1 |    1    | 4 words |
+      +-------------------------+---+---+---------+---------+
+      Note: x = don't care
+      */
+
+      /*
+      TABLE 2
+      +---+---+---------------------------------+
+      | w | x | Number of registers to transfer |
+      +---+---+---------------------------------+
+      | 0 | 1 |  1                              |
+      | 1 | 0 |  2                              |
+      | 1 | 1 |  3                              |
+      | 0 | 0 |  4                              |
+      +---+---+---------------------------------+
+      */
+
+      /*
+      TABLE 3: Dyadic Floating Point Opcodes
+      +---+---+---+---+----------+-----------------------+-----------------------+
+      | a | b | c | d | Mnemonic | Description           | Operation             |
+      +---+---+---+---+----------+-----------------------+-----------------------+
+      | 0 | 0 | 0 | 0 | ADF      | Add                   | Fd := Fn + Fm         |
+      | 0 | 0 | 0 | 1 | MUF      | Multiply              | Fd := Fn * Fm         |
+      | 0 | 0 | 1 | 0 | SUF      | Subtract              | Fd := Fn - Fm         |
+      | 0 | 0 | 1 | 1 | RSF      | Reverse subtract      | Fd := Fm - Fn         |
+      | 0 | 1 | 0 | 0 | DVF      | Divide                | Fd := Fn / Fm         |
+      | 0 | 1 | 0 | 1 | RDF      | Reverse divide        | Fd := Fm / Fn         |
+      | 0 | 1 | 1 | 0 | POW      | Power                 | Fd := Fn ^ Fm         |
+      | 0 | 1 | 1 | 1 | RPW      | Reverse power         | Fd := Fm ^ Fn         |
+      | 1 | 0 | 0 | 0 | RMF      | Remainder             | Fd := IEEE rem(Fn/Fm) |
+      | 1 | 0 | 0 | 1 | FML      | Fast Multiply         | Fd := Fn * Fm         |
+      | 1 | 0 | 1 | 0 | FDV      | Fast Divide           | Fd := Fn / Fm         |
+      | 1 | 0 | 1 | 1 | FRD      | Fast reverse divide   | Fd := Fm / Fn         |
+      | 1 | 1 | 0 | 0 | POL      | Polar angle (ArcTan2) | Fd := arctan2(Fn,Fm)  |
+      | 1 | 1 | 0 | 1 |          | undefined instruction | trap                  |
+      | 1 | 1 | 1 | 0 |          | undefined instruction | trap                  |
+      | 1 | 1 | 1 | 1 |          | undefined instruction | trap                  |
+      +---+---+---+---+----------+-----------------------+-----------------------+
+      Note: POW, RPW, POL are deprecated, and are available for backwards
+            compatibility only.
+      */
+
+      /*
+      TABLE 4: Monadic Floating Point Opcodes
+      +---+---+---+---+----------+-----------------------+-----------------------+
+      | a | b | c | d | Mnemonic | Description           | Operation             |
+      +---+---+---+---+----------+-----------------------+-----------------------+
+      | 0 | 0 | 0 | 0 | MVF      | Move                  | Fd := Fm              |
+      | 0 | 0 | 0 | 1 | MNF      | Move negated          | Fd := - Fm            |
+      | 0 | 0 | 1 | 0 | ABS      | Absolute value        | Fd := abs(Fm)         |
+      | 0 | 0 | 1 | 1 | RND      | Round to integer      | Fd := int(Fm)         |
+      | 0 | 1 | 0 | 0 | SQT      | Square root           | Fd := sqrt(Fm)        |
+      | 0 | 1 | 0 | 1 | LOG      | Log base 10           | Fd := log10(Fm)       |
+      | 0 | 1 | 1 | 0 | LGN      | Log base e            | Fd := ln(Fm)          |
+      | 0 | 1 | 1 | 1 | EXP      | Exponent              | Fd := e ^ Fm          |
+      | 1 | 0 | 0 | 0 | SIN      | Sine                  | Fd := sin(Fm)         |
+      | 1 | 0 | 0 | 1 | COS      | Cosine                | Fd := cos(Fm)         |
+      | 1 | 0 | 1 | 0 | TAN      | Tangent               | Fd := tan(Fm)         |
+      | 1 | 0 | 1 | 1 | ASN      | Arc Sine              | Fd := arcsin(Fm)      |
+      | 1 | 1 | 0 | 0 | ACS      | Arc Cosine            | Fd := arccos(Fm)      |
+      | 1 | 1 | 0 | 1 | ATN      | Arc Tangent           | Fd := arctan(Fm)      |
+      | 1 | 1 | 1 | 0 | URD      | Unnormalized round    | Fd := int(Fm)         |
+      | 1 | 1 | 1 | 1 | NRM      | Normalize             | Fd := norm(Fm)        |
+      +---+---+---+---+----------+-----------------------+-----------------------+
+      Note: LOG, LGN, EXP, SIN, COS, TAN, ASN, ACS, ATN are deprecated, and are
+            available for backwards compatibility only.
+      */
+
+      /*
+      TABLE 5
+      +-------------------------+---+---+
+      |  Rounding Precision     | e | f |
+      +-------------------------+---+---+
+      | IEEE Single precision   | 0 | 0 |
+      | IEEE Double precision   | 0 | 1 |
+      | IEEE Extended precision | 1 | 0 |
+      | undefined (trap)        | 1 | 1 |
+      +-------------------------+---+---+
+      */
+
+      /*
+      TABLE 5
+      +---------------------------------+---+---+
+      |  Rounding Mode                  | g | h |
+      +---------------------------------+---+---+
+      | Round to nearest (default)      | 0 | 0 |
+      | Round toward plus infinity      | 0 | 1 |
+      | Round toward negative infinity  | 1 | 0 |
+      | Round toward zero               | 1 | 1 |
+      +---------------------------------+---+---+
+*)
+    function taicpu.GetString:string;
+      var
+        i : longint;
+        s : string;
+        addsize : boolean;
+      begin
+        s:='['+gas_op2str[opcode];
+        for i:=0 to ops-1 do
+         begin
+           with oper[i]^ do
+             begin
+               if i=0 then
+                s:=s+' '
+               else
+                s:=s+',';
+               { type }
+               addsize:=false;
+               if (ot and OT_VREG)=OT_VREG then
+                s:=s+'vreg'
+               else
+                 if (ot and OT_FPUREG)=OT_FPUREG then
+                  s:=s+'fpureg'
+               else
+                if (ot and OT_REGISTER)=OT_REGISTER then
+                 begin
+                   s:=s+'reg';
+                   addsize:=true;
+                 end
+               else
+                if (ot and OT_REGLIST)=OT_REGLIST then
+                 begin
+                   s:=s+'reglist';
+                   addsize:=false;
+                 end
+               else
+                if (ot and OT_IMMEDIATE)=OT_IMMEDIATE then
+                 begin
+                   s:=s+'imm';
+                   addsize:=true;
+                 end
+               else
+                if (ot and OT_MEMORY)=OT_MEMORY then
+                 begin
+                   s:=s+'mem';
+                   addsize:=true;
+                   if (ot and OT_AM2)<>0 then
+                     s:=s+' am2 ';
+                 end
+               else
+                 s:=s+'???';
+               { size }
+               if addsize then
+                begin
+                  if (ot and OT_BITS8)<>0 then
+                    s:=s+'8'
+                  else
+                   if (ot and OT_BITS16)<>0 then
+                    s:=s+'24'
+                  else
+                   if (ot and OT_BITS32)<>0 then
+                    s:=s+'32'
+                  else
+                   if (ot and OT_BITSSHIFTER)<>0 then
+                    s:=s+'shifter'
+                  else
+                    s:=s+'??';
+                  { signed }
+                  if (ot and OT_SIGNED)<>0 then
+                   s:=s+'s';
+                end;
+             end;
+         end;
+        GetString:=s+']';
+      end;
+
+
+    procedure taicpu.ResetPass1;
+      begin
+        { we need to reset everything here, because the choosen insentry
+          can be invalid for a new situation where the previously optimized
+          insentry is not correct }
+        InsEntry:=nil;
+        InsSize:=0;
+        LastInsOffset:=-1;
+      end;
+
+
+    procedure taicpu.ResetPass2;
+      begin
+        { we are here in a second pass, check if the instruction can be optimized }
+        if assigned(InsEntry) and
+           ((InsEntry^.flags and IF_PASS2)<>0) then
+         begin
+           InsEntry:=nil;
+           InsSize:=0;
+         end;
+        LastInsOffset:=-1;
+      end;
+
+
+    function taicpu.CheckIfValid:boolean;
+      begin
+        Result:=False; { unimplemented }
+      end;
+
+
+    function taicpu.Pass1(objdata:TObjData):longint;
+      begin
+        Pass1:=0;
+        LastInsOffset:=-1;
+      end;
+
+
+    procedure taicpu.Pass2(objdata:TObjData);
+      begin
+        { error in pass1 ? }
+        if insentry=nil then
+         exit;
+        current_filepos:=fileinfo;
+        { Generate the instruction }
+        GenCode(objdata);
+      end;
+
+
+    procedure taicpu.ppuloadoper(ppufile:tcompilerppufile;var o:toper);
+      begin
+      end;
+
+
+    procedure taicpu.ppuwriteoper(ppufile:tcompilerppufile;const o:toper);
+      begin
+      end;
+
+
+    procedure taicpu.ppubuildderefimploper(var o:toper);
+      begin
+      end;
+
+
+    procedure taicpu.ppuderefoper(var o:toper);
+      begin
+      end;
+
+
+    function  taicpu.InsEnd:longint;
+      begin
+        Result:=0; { unimplemented }
+      end;
+
+
+    procedure taicpu.create_ot(objdata:TObjData);
+      begin
+      end;
+
+
+    function taicpu.Matches(p:PInsEntry):longint;
+      begin
+      end;
+
+
+    function  taicpu.calcsize(p:PInsEntry):shortint;
+      begin
+        result:=4;
+      end;
+
+
+    function  taicpu.NeedAddrPrefix(opidx:byte):boolean;
+      begin
+        Result:=False; { unimplemented }
+      end;
+
+
+    procedure taicpu.Swapoperands;
+      begin
+      end;
+
+
+    function taicpu.FindInsentry(objdata:TObjData):boolean;
+      begin
+      end;
+
+
+    procedure taicpu.gencode(objdata:TObjData);
+      var
+        bytes : dword;
+        i_field : byte;
+
+      procedure setshifterop(op : byte);
+        begin
+          case oper[op]^.typ of
+            top_const:
+              begin
+                i_field:=1;
+                bytes:=bytes or dword(oper[op]^.val and $fff);
+              end;
+            top_reg:
+              begin
+                i_field:=0;
+                bytes:=bytes or (getsupreg(oper[op]^.reg) shl 16);
+
+                { does a real shifter op follow? }
+                if (op+1<=op) and (oper[op+1]^.typ=top_shifterop) then
+                  begin
+                  end;
+              end;
+          else
+            internalerror(2005091103);
+          end;
+        end;
+
+      begin
+        bytes:=$0;
+        { evaluate and set condition code }
+
+        { condition code allowed? }
+
+        { setup rest of the instruction }
+        case insentry^.code[0] of
+          #$08:
+            begin
+              { set instruction code }
+              bytes:=bytes or (ord(insentry^.code[1]) shl 26);
+              bytes:=bytes or (ord(insentry^.code[2]) shl 21);
+
+              { set destination }
+              bytes:=bytes or (getsupreg(oper[0]^.reg) shl 12);
+
+              { create shifter op }
+              setshifterop(1);
+
+              { set i field }
+              bytes:=bytes or (i_field shl 25);
+
+              { set s if necessary }
+              if oppostfix=PF_S then
+                bytes:=bytes or (1 shl 20);
+            end;
+          #$ff:
+            internalerror(2005091101);
+          else
+            internalerror(2005091102);
+        end;
+        { we're finished, write code }
+        objdata.writebytes(bytes,sizeof(bytes));
+      end;
+
+
+{$ifdef dummy}
+(*
+static void gencode (long segment, long offset, int bits,
+                     insn *ins, char *codes, long insn_end)
+{
+    int has_S_code;             /* S - setflag */
+    int has_B_code;             /* B - setflag */
+    int has_T_code;             /* T - setflag */
+    int has_W_code;             /* ! => W flag */
+    int has_F_code;             /* ^ => S flag */
+    int keep;
+    unsigned char c;
+    unsigned char bytes[4];
+    long          data, size;
+    static int cc_code[] =      /* bit pattern of cc */
+  {                             /* order as enum in  */
+    0x0E, 0x03, 0x02, 0x00,     /* nasm.h            */
+    0x0A, 0x0C, 0x08, 0x0D,
+    0x09, 0x0B, 0x04, 0x01,
+    0x05, 0x07, 0x06,
+  };
+
+
+#ifdef DEBUG
+static char *CC[] =
+  {                                    /* condition code names */
+    "AL", "CC", "CS", "EQ",
+    "GE", "GT", "HI", "LE",
+    "LS", "LT", "MI", "NE",
+    "PL", "VC", "VS", "",
+    "S"
+};
+
+
+    has_S_code = (ins->condition & C_SSETFLAG);
+    has_B_code = (ins->condition & C_BSETFLAG);
+    has_T_code = (ins->condition & C_TSETFLAG);
+    has_W_code = (ins->condition & C_EXSETFLAG);
+    has_F_code = (ins->condition & C_FSETFLAG);
+    ins->condition = (ins->condition & 0x0F);
+
+
+    if (rt_debug)
+      {
+    printf ("gencode: instruction: %s%s", insn_names[ins->opcode],
+            CC[ins->condition & 0x0F]);
+    if (has_S_code)
+      printf ("S");
+    if (has_B_code)
+      printf ("B");
+    if (has_T_code)
+      printf ("T");
+    if (has_W_code)
+      printf ("!");
+    if (has_F_code)
+      printf ("^");
+
+    printf ("\n");
+
+    c = *codes;
+
+    printf ("   (%d)  decode - '0x%02X'\n", ins->operands, c);
+
+
+    bytes[0] = 0xB;
+    bytes[1] = 0xE;
+    bytes[2] = 0xE;
+    bytes[3] = 0xF;
+      }
+
+    // First condition code in upper nibble
+    if (ins->condition < C_NONE)
+      {
+        c = cc_code[ins->condition] << 4;
+      }
+    else
+      {
+        c = cc_code[C_AL] << 4; // is often ALWAYS but not always
+      }
+
+
+    switch (keep = *codes)
+      {
+        case 1:
+          // B, BL
+          ++codes;
+          c |= *codes++;
+          bytes[0] = c;
+
+          if (ins->oprs[0].segment != segment)
+            {
+              // fais une relocation
+              c = 1;
+              data = 0; // Let the linker locate ??
+            }
+          else
+            {
+              c = 0;
+              data = ins->oprs[0].offset - (offset + 8);
+
+              if (data % 4)
+                {
+                  errfunc (ERR_NONFATAL, "offset not aligned on 4 bytes");
+                }
+            }
+
+          if (data >= 0x1000)
+            {
+              errfunc (ERR_NONFATAL, "too long offset");
+            }
+
+          data = data >> 2;
+          bytes[1] = (data >> 16) & 0xFF;
+          bytes[2] = (data >> 8)  & 0xFF;
+          bytes[3] = (data )      & 0xFF;
+
+          if (c == 1)
+            {
+//            out (offset, segment, &bytes[0], OUT_RAWDATA+1, NO_SEG, NO_SEG);
+              out (offset, segment, &bytes[0], OUT_REL3ADR+4, ins->oprs[0].segment, NO_SEG);
+            }
+          else
+            {
+              out (offset, segment, &bytes[0], OUT_RAWDATA+4, NO_SEG, NO_SEG);
+            }
+          return;
+
+        case 2:
+          // SWI
+          ++codes;
+          c |= *codes++;
+          bytes[0] = c;
+          data = ins->oprs[0].offset;
+          bytes[1] = (data >> 16) & 0xFF;
+          bytes[2] = (data >> 8) & 0xFF;
+          bytes[3] = (data) & 0xFF;
+          out (offset, segment, &bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
+          return;
+        case 3:
+          // BX
+          ++codes;
+          c |= *codes++;
+          bytes[0] = c;
+          bytes[1] = *codes++;
+          bytes[2] = *codes++;
+          bytes[3] = *codes++;
+          c = regval (&ins->oprs[0],1);
+          if (c == 15)  // PC
+            {
+              errfunc (ERR_WARNING, "'BX' with R15 has undefined behaviour");
+            }
+          else if (c > 15)
+            {
+              errfunc (ERR_NONFATAL, "Illegal register specified for 'BX'");
+            }
+
+          bytes[3] |= (c & 0x0F);
+          out (offset, segment, bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
+          return;
+
+        case 4:         // AND Rd,Rn,Rm
+        case 5:         // AND Rd,Rn,Rm,<shift>Rs
+        case 6:         // AND Rd,Rn,Rm,<shift>imm
+        case 7:         // AND Rd,Rn,<shift>imm
+          ++codes;
+#ifdef DEBUG
+          if (rt_debug)
+            {
+              printf ("         decode - '0x%02X'\n", keep);
+              printf ("           code - '0x%02X'\n", (unsigned char) ( *codes));
+            }
+#endif
+          bytes[0] = c | *codes;
+          ++codes;
+
+          bytes[1] = *codes;
+          if (has_S_code)
+            bytes[1] |= 0x10;
+          c = regval (&ins->oprs[1],1);
+          // Rn in low nibble
+          bytes[1] |= c;
+
+          // Rd in high nibble
+          bytes[2] = regval (&ins->oprs[0],1) << 4;
+
+          if (keep != 7)
+            {
+              // Rm in low nibble
+              bytes[3] = regval (&ins->oprs[2],1);
+            }
+
+          // Shifts if any
+          if (keep == 5 || keep == 6)
+            {
+              // Shift in bytes 2 and 3
+              if (keep == 5)
+                {
+                  // Rs
+                  c = regval (&ins->oprs[3],1);
+                  bytes[2] |= c;
+
+                  c = 0x10;             // Set bit 4 in byte[3]
+                }
+              if (keep == 6)
+                {
+                  c = (ins->oprs[3].offset) & 0x1F;
+
+                  // #imm
+                  bytes[2] |= c >> 1;
+                  if (c & 0x01)
+                    {
+                      bytes[3] |= 0x80;
+                    }
+                  c = 0;                // Clr bit 4 in byte[3]
+                }
+              // <shift>
+              c |= shiftval (&ins->oprs[3]) << 5;
+
+              bytes[3] |= c;
+            }
+
+          // reg,reg,imm
+          if (keep == 7)
+            {
+              int shimm;
+
+              shimm = imm_shift (ins->oprs[2].offset);
+
+              if (shimm == -1)
+                {
+                  errfunc (ERR_NONFATAL, "cannot create that constant");
+                }
+              bytes[3] = shimm & 0xFF;
+              bytes[2] |= (shimm & 0xF00) >> 8;
+            }
+
+          out (offset, segment, bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
+          return;
+
+        case 8:         // MOV Rd,Rm
+        case 9:         // MOV Rd,Rm,<shift>Rs
+        case 0xA:       // MOV Rd,Rm,<shift>imm
+        case 0xB:       // MOV Rd,<shift>imm
+          ++codes;
+#ifdef DEBUG
+          if (rt_debug)
+            {
+              printf ("         decode - '0x%02X'\n", keep);
+              printf ("           code - '0x%02X'\n", (unsigned char) ( *codes));
+            }
+#endif
+          bytes[0] = c | *codes;
+          ++codes;
+
+          bytes[1] = *codes;
+          if (has_S_code)
+            bytes[1] |= 0x10;
+
+          // Rd in high nibble
+          bytes[2] = regval (&ins->oprs[0],1) << 4;
+
+          if (keep != 0x0B)
+            {
+              // Rm in low nibble
+              bytes[3] = regval (&ins->oprs[1],1);
+            }
+
+          // Shifts if any
+          if (keep == 0x09 || keep == 0x0A)
+            {
+              // Shift in bytes 2 and 3
+              if (keep == 0x09)
+                {
+                  // Rs
+                  c = regval (&ins->oprs[2],1);
+                  bytes[2] |= c;
+
+                  c = 0x10;             // Set bit 4 in byte[3]
+                }
+              if (keep == 0x0A)
+                {
+                  c = (ins->oprs[2].offset) & 0x1F;
+
+                  // #imm
+                  bytes[2] |= c >> 1;
+                  if (c & 0x01)
+                    {
+                      bytes[3] |= 0x80;
+                    }
+                  c = 0;                // Clr bit 4 in byte[3]
+                }
+              // <shift>
+              c |= shiftval (&ins->oprs[2]) << 5;
+
+              bytes[3] |= c;
+            }
+
+          // reg,imm
+          if (keep == 0x0B)
+            {
+              int shimm;
+
+              shimm = imm_shift (ins->oprs[1].offset);
+
+              if (shimm == -1)
+                {
+                  errfunc (ERR_NONFATAL, "cannot create that constant");
+                }
+              bytes[3] = shimm & 0xFF;
+              bytes[2] |= (shimm & 0xF00) >> 8;
+            }
+
+          out (offset, segment, bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
+          return;
+
+
+        case 0xC:       // CMP Rn,Rm
+        case 0xD:       // CMP Rn,Rm,<shift>Rs
+        case 0xE:       // CMP Rn,Rm,<shift>imm
+        case 0xF:       // CMP Rn,<shift>imm
+          ++codes;
+
+          bytes[0] = c | *codes++;
+
+          bytes[1] = *codes;
+
+          // Implicit S code
+          bytes[1] |= 0x10;
+
+          c = regval (&ins->oprs[0],1);
+          // Rn in low nibble
+          bytes[1] |= c;
+
+          // No destination
+          bytes[2] = 0;
+
+          if (keep != 0x0B)
+            {
+              // Rm in low nibble
+              bytes[3] = regval (&ins->oprs[1],1);
+            }
+
+          // Shifts if any
+          if (keep == 0x0D || keep == 0x0E)
+            {
+              // Shift in bytes 2 and 3
+              if (keep == 0x0D)
+                {
+                  // Rs
+                  c = regval (&ins->oprs[2],1);
+                  bytes[2] |= c;
+
+                  c = 0x10;             // Set bit 4 in byte[3]
+                }
+              if (keep == 0x0E)
+                {
+                  c = (ins->oprs[2].offset) & 0x1F;
+
+                  // #imm
+                  bytes[2] |= c >> 1;
+                  if (c & 0x01)
+                    {
+                      bytes[3] |= 0x80;
+                    }
+                  c = 0;                // Clr bit 4 in byte[3]
+                }
+              // <shift>
+              c |= shiftval (&ins->oprs[2]) << 5;
+
+              bytes[3] |= c;
+            }
+
+          // reg,imm
+          if (keep == 0x0F)
+            {
+              int shimm;
+
+              shimm = imm_shift (ins->oprs[1].offset);
+
+              if (shimm == -1)
+                {
+                  errfunc (ERR_NONFATAL, "cannot create that constant");
+                }
+              bytes[3] = shimm & 0xFF;
+              bytes[2] |= (shimm & 0xF00) >> 8;
+            }
+
+          out (offset, segment, bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
+          return;
+
+        case 0x10:      // MRS Rd,<psr>
+          ++codes;
+
+          bytes[0] = c | *codes++;
+
+          bytes[1] = *codes++;
+
+          // Rd
+          c = regval (&ins->oprs[0],1);
+
+          bytes[2] = c << 4;
+
+          bytes[3] = 0;
+
+          c = ins->oprs[1].basereg;
+
+          if (c == R_CPSR || c == R_SPSR)
+            {
+              if (c == R_SPSR)
+                {
+                  bytes[1] |= 0x40;
+                }
+            }
+          else
+            {
+              errfunc (ERR_NONFATAL, "CPSR or SPSR expected");
+            }
+
+          out (offset, segment, bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
+
+          return;
+
+        case 0x11:      // MSR <psr>,Rm
+        case 0x12:      // MSR <psrf>,Rm
+        case 0x13:      // MSR <psrf>,#expression
+          ++codes;
+
+          bytes[0] = c | *codes++;
+
+          bytes[1] = *codes++;
+
+          bytes[2] = *codes;
+
+
+          if (keep == 0x11 || keep == 0x12)
+            {
+              // Rm
+              c = regval (&ins->oprs[1],1);
+
+              bytes[3] = c;
+            }
+          else
+            {
+              int shimm;
+
+              shimm = imm_shift (ins->oprs[1].offset);
+
+              if (shimm == -1)
+                {
+                  errfunc (ERR_NONFATAL, "cannot create that constant");
+                }
+              bytes[3] = shimm & 0xFF;
+              bytes[2] |= (shimm & 0xF00) >> 8;
+            }
+
+          c = ins->oprs[0].basereg;
+
+          if ( keep == 0x11)
+            {
+              if ( c == R_CPSR || c == R_SPSR)
+                {
+                if ( c== R_SPSR)
+                  {
+                    bytes[1] |= 0x40;
+                  }
+                }
+            else
+              {
+                errfunc (ERR_NONFATAL, "CPSR or SPSR expected");
+              }
+            }
+          else
+            {
+              if ( c == R_CPSR_FLG || c == R_SPSR_FLG)
+                {
+                  if ( c== R_SPSR_FLG)
+                    {
+                      bytes[1] |= 0x40;
+                    }
+                }
+              else
+                {
+                  errfunc (ERR_NONFATAL, "CPSR_flg or SPSR_flg expected");
+                }
+            }
+          break;
+
+        case 0x14:      // MUL  Rd,Rm,Rs
+        case 0x15:      // MULA Rd,Rm,Rs,Rn
+          ++codes;
+
+          bytes[0] = c | *codes++;
+
+          bytes[1] = *codes++;
+
+          bytes[3] = *codes;
+
+          // Rd
+          bytes[1] |= regval (&ins->oprs[0],1);
+          if (has_S_code)
+            bytes[1] |= 0x10;
+
+          // Rm
+          bytes[3] |= regval (&ins->oprs[1],1);
+
+          // Rs
+          bytes[2] = regval (&ins->oprs[2],1);
+
+          if (keep == 0x15)
+            {
+              bytes[2] |= regval (&ins->oprs[3],1) << 4;
+            }
+          break;
+
+        case 0x16:      // SMLAL RdHi,RdLo,Rm,Rs
+          ++codes;
+
+          bytes[0] = c | *codes++;
+
+          bytes[1] = *codes++;
+
+          bytes[3] = *codes;
+
+          // RdHi
+          bytes[1] |= regval (&ins->oprs[1],1);
+          if (has_S_code)
+            bytes[1] |= 0x10;
+
+          // RdLo
+          bytes[2] = regval (&ins->oprs[0],1) << 4;
+          // Rm
+          bytes[3] |= regval (&ins->oprs[2],1);
+
+          // Rs
+          bytes[2] |= regval (&ins->oprs[3],1);
+
+          break;
+
+        case 0x17:      // LDR Rd, expression
+          ++codes;
+
+          bytes[0] = c | *codes++;
+
+          bytes[1] = *codes++;
+
+          // Rd
+          bytes[2] = regval (&ins->oprs[0],1) << 4;
+          if (has_B_code)
+            bytes[1] |= 0x40;
+          if (has_T_code)
+            {
+              errfunc (ERR_NONFATAL, "'T' not allowed in pre-index mode");
+            }
+          if (has_W_code)
+            {
+              errfunc (ERR_NONFATAL, "'!' not allowed");
+            }
+
+          // Rn - implicit R15
+          bytes[1] |= 0xF;
+
+          if (ins->oprs[1].segment != segment)
+            {
+              errfunc (ERR_NONFATAL, "label not in same segment");
+            }
+
+          data = ins->oprs[1].offset - (offset + 8);
+
+          if (data < 0)
+            {
+              data = -data;
+            }
+          else
+            {
+              bytes[1] |= 0x80;
+            }
+
+          if (data >= 0x1000)
+            {
+              errfunc (ERR_NONFATAL, "too long offset");
+            }
+
+          bytes[2] |= ((data & 0xF00) >> 8);
+          bytes[3] = data & 0xFF;
+          break;
+
+        case 0x18:      // LDR Rd, [Rn]
+          ++codes;
+
+          bytes[0] = c | *codes++;
+
+          bytes[1] = *codes++;
+
+          // Rd
+          bytes[2] = regval (&ins->oprs[0],1) << 4;
+          if (has_B_code)
+            bytes[1] |= 0x40;
+          if (has_T_code)
+            {
+              bytes[1] |= 0x20;         // write-back
+            }
+          else
+            {
+              bytes[0] |= 0x01;         // implicit pre-index mode
+            }
+
+          if (has_W_code)
+            {
+              bytes[1] |= 0x20;         // write-back
+            }
+
+          // Rn
+          c = regval (&ins->oprs[1],1);
+          bytes[1] |= c;
+
+          if (c == 0x15)                // R15
+            data = -8;
+          else
+            data = 0;
+
+          if (data < 0)
+            {
+              data = -data;
+            }
+          else
+            {
+              bytes[1] |= 0x80;
+            }
+
+          bytes[2] |= ((data & 0xF00) >> 8);
+          bytes[3] = data & 0xFF;
+          break;
+
+        case 0x19:      // LDR Rd, [Rn,#expression]
+        case 0x20:      // LDR Rd, [Rn,Rm]
+        case 0x21:      // LDR Rd, [Rn,Rm,shift]
+          ++codes;
+
+          bytes[0] = c | *codes++;
+
+          bytes[1] = *codes++;
+
+          // Rd
+          bytes[2] = regval (&ins->oprs[0],1) << 4;
+          if (has_B_code)
+            bytes[1] |= 0x40;
+
+          // Rn
+          c = regval (&ins->oprs[1],1);
+          bytes[1] |= c;
+
+          if (ins->oprs[ins->operands-1].bracket)       // FIXME: Bracket on last operand -> pre-index  <--
+            {
+              bytes[0] |= 0x01;         // pre-index mode
+              if (has_W_code)
+                {
+                  bytes[1] |= 0x20;
+                }
+              if (has_T_code)
+                {
+                  errfunc (ERR_NONFATAL, "'T' not allowed in pre-index mode");
+                }
+            }
+          else
+            {
+              if (has_T_code)           // Forced write-back in post-index mode
+                {
+                  bytes[1] |= 0x20;
+                }
+              if (has_W_code)
+                {
+                  errfunc (ERR_NONFATAL, "'!' not allowed in post-index mode");
+                }
+            }
+
+          if (keep == 0x19)
+            {
+              data = ins->oprs[2].offset;
+
+              if (data < 0)
+                {
+                  data = -data;
+                }
+              else
+                {
+                  bytes[1] |= 0x80;
+                }
+
+              if (data >= 0x1000)
+                {
+                  errfunc (ERR_NONFATAL, "too long offset");
+                }
+
+              bytes[2] |= ((data & 0xF00) >> 8);
+              bytes[3] = data & 0xFF;
+            }
+          else
+            {
+              if (ins->oprs[2].minus == 0)
+                {
+                  bytes[1] |= 0x80;
+                }
+              c = regval (&ins->oprs[2],1);
+              bytes[3] = c;
+
+              if (keep == 0x21)
+                {
+                  c = ins->oprs[3].offset;
+                  if (c > 0x1F)
+                    {
+                      errfunc (ERR_NONFATAL, "too large shiftvalue");
+                      c = c & 0x1F;
+                    }
+
+                  bytes[2] |= c >> 1;
+                  if (c & 0x01)
+                    {
+                      bytes[3] |= 0x80;
+                    }
+                  bytes[3] |= shiftval (&ins->oprs[3]) << 5;
+                }
+            }
+
+          break;
+
+        case 0x22:      // LDRH Rd, expression
+          ++codes;
+
+          bytes[0] = c | 0x01;          // Implicit pre-index
+
+          bytes[1] = *codes++;
+
+          // Rd
+          bytes[2] = regval (&ins->oprs[0],1) << 4;
+
+          // Rn - implicit R15
+          bytes[1] |= 0xF;
+
+          if (ins->oprs[1].segment != segment)
+            {
+              errfunc (ERR_NONFATAL, "label not in same segment");
+            }
+
+          data = ins->oprs[1].offset - (offset + 8);
+
+          if (data < 0)
+            {
+              data = -data;
+            }
+          else
+            {
+              bytes[1] |= 0x80;
+            }
+
+          if (data >= 0x100)
+            {
+              errfunc (ERR_NONFATAL, "too long offset");
+            }
+          bytes[3] = *codes++;
+
+          bytes[2] |= ((data & 0xF0) >> 4);
+          bytes[3] |= data & 0xF;
+          break;
+
+        case 0x23:      // LDRH Rd, Rn
+          ++codes;
+
+          bytes[0] = c | 0x01;          // Implicit pre-index
+
+          bytes[1] = *codes++;
+
+          // Rd
+          bytes[2] = regval (&ins->oprs[0],1) << 4;
+
+          // Rn
+          c = regval (&ins->oprs[1],1);
+          bytes[1] |= c;
+
+          if (c == 0x15)                // R15
+            data = -8;
+          else
+            data = 0;
+
+          if (data < 0)
+            {
+              data = -data;
+            }
+          else
+            {
+              bytes[1] |= 0x80;
+            }
+
+          if (data >= 0x100)
+            {
+              errfunc (ERR_NONFATAL, "too long offset");
+            }
+          bytes[3] = *codes++;
+
+          bytes[2] |= ((data & 0xF0) >> 4);
+          bytes[3] |= data & 0xF;
+          break;
+
+        case 0x24:      // LDRH Rd, Rn, expression
+        case 0x25:      // LDRH Rd, Rn, Rm
+          ++codes;
+
+          bytes[0] = c;
+
+          bytes[1] = *codes++;
+
+          // Rd
+          bytes[2] = regval (&ins->oprs[0],1) << 4;
+
+          // Rn
+          c = regval (&ins->oprs[1],1);
+          bytes[1] |= c;
+
+          if (ins->oprs[ins->operands-1].bracket)       // FIXME: Bracket on last operand -> pre-index  <--
+            {
+              bytes[0] |= 0x01;         // pre-index mode
+              if (has_W_code)
+                {
+                  bytes[1] |= 0x20;
+                }
+            }
+          else
+            {
+              if (has_W_code)
+                {
+                  errfunc (ERR_NONFATAL, "'!' not allowed in post-index mode");
+                }
+            }
+
+          bytes[3] = *codes++;
+
+          if (keep == 0x24)
+            {
+              data = ins->oprs[2].offset;
+
+              if (data < 0)
+                {
+                  data = -data;
+                }
+              else
+                {
+                  bytes[1] |= 0x80;
+                }
+
+              if (data >= 0x100)
+                {
+                  errfunc (ERR_NONFATAL, "too long offset");
+                }
+
+              bytes[2] |= ((data & 0xF0) >> 4);
+              bytes[3] |= data & 0xF;
+            }
+          else
+            {
+              if (ins->oprs[2].minus == 0)
+                {
+                  bytes[1] |= 0x80;
+                }
+              c = regval (&ins->oprs[2],1);
+              bytes[3] |= c;
+
+            }
+          break;
+
+        case 0x26:      // LDM/STM Rn, {reg-list}
+          ++codes;
+
+          bytes[0] = c;
+
+          bytes[0] |= ( *codes >> 4) & 0xF;
+          bytes[1] = ( *codes << 4) & 0xF0;
+          ++codes;
+
+          if (has_W_code)
+            {
+              bytes[1] |= 0x20;
+            }
+          if (has_F_code)
+            {
+              bytes[1] |= 0x40;
+            }
+
+          // Rn
+          bytes[1] |= regval (&ins->oprs[0],1);
+
+          data = ins->oprs[1].basereg;
+
+          bytes[2] = ((data >> 8) & 0xFF);
+          bytes[3] = (data & 0xFF);
+
+          break;
+
+        case 0x27:      // SWP Rd, Rm, [Rn]
+          ++codes;
+
+          bytes[0] = c;
+
+          bytes[0] |= *codes++;
+
+          bytes[1] = regval (&ins->oprs[2],1);
+          if (has_B_code)
+            {
+              bytes[1] |= 0x40;
+            }
+          bytes[2] = regval (&ins->oprs[0],1) << 4;
+          bytes[3] = *codes++;
+          bytes[3] |= regval (&ins->oprs[1],1);
+          break;
+
+        default:
+          errfunc (ERR_FATAL, "unknown decoding of instruction");
+
+          bytes[0] = c;
+          // And a fix nibble
+          ++codes;
+          bytes[0] |= *codes++;
+
+         if ( *codes == 0x01)           // An I bit
+           {
+
+           }
+         if ( *codes == 0x02)           // An I bit
+           {
+
+           }
+         ++codes;
+      }
+    out (offset, segment, bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
+}
+
+*)
+{$endif dummy}
+
+begin
+  cai_align:=tai_align;
+end.
+

+ 66 - 0
compiler/aarch64/aoptcpu.pas

@@ -0,0 +1,66 @@
+{
+    Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
+    Development Team
+
+    This unit implements the ARM64 optimizer object
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+
+
+Unit aoptcpu;
+
+{$i fpcdefs.inc}
+
+Interface
+
+uses cpubase, aasmtai, aopt, aoptcpub;
+
+Type
+  TCpuAsmOptimizer = class(TAsmOptimizer)
+    { uses the same constructor as TAopObj }
+    function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
+    procedure PeepHoleOptPass2;override;
+  End;
+
+Implementation
+
+  uses
+    aasmbase,aasmcpu,cgbase;
+
+  function CanBeCond(p : tai) : boolean;
+    begin
+      result:=(p.typ=ait_instruction) and (taicpu(p).condition=C_None);
+    end;
+
+
+  function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
+    var
+      next1: tai;
+    begin
+      result := false;
+    end;
+
+
+  procedure TCpuAsmOptimizer.PeepHoleOptPass2;
+    begin
+    end;
+
+begin
+  casmoptimizer:=TCpuAsmOptimizer;
+End.
+

+ 134 - 0
compiler/aarch64/aoptcpub.pas

@@ -0,0 +1,134 @@
+ {
+    Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
+    Development Team
+
+    This unit contains several types and constants necessary for the
+    optimizer to work on the ARM64 architecture
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+Unit aoptcpub; { Assembler OPTimizer CPU specific Base }
+
+{$i fpcdefs.inc}
+
+{ enable the following define if memory references can have both a base and }
+{ index register in 1 operand                                               }
+
+{ enable the following define if memory references can have a scaled index }
+{ define RefsHaveScale}
+
+{ enable the following define if memory references can have a segment }
+{ override                                                            }
+
+{ define RefsHaveSegment}
+
+Interface
+
+Uses
+  cgbase,aasmtai,
+  cpubase,aasmcpu,AOptBase;
+
+Type
+
+{ type of a normal instruction }
+  TInstr = Taicpu;
+  PInstr = ^TInstr;
+
+{ ************************************************************************* }
+{ **************************** TCondRegs ********************************** }
+{ ************************************************************************* }
+{ Info about the conditional registers                                      }
+  TCondRegs = Object
+    Constructor Init;
+    Destructor Done;
+  End;
+
+{ ************************************************************************* }
+{ **************************** TAoptBaseCpu ******************************* }
+{ ************************************************************************* }
+
+  TAoptBaseCpu = class(TAoptBase)
+    function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override;
+  End;
+
+
+{ ************************************************************************* }
+{ ******************************* Constants ******************************* }
+{ ************************************************************************* }
+Const
+
+{ the maximum number of things (registers, memory, ...) a single instruction }
+{ changes                                                                    }
+
+  MaxCh = 3;
+
+{ the maximum number of operands an instruction has }
+
+  MaxOps = 4;
+
+{Oper index of operand that contains the source (reference) with a load }
+{instruction                                                            }
+
+  LoadSrc = 0;
+
+{Oper index of operand that contains the destination (register) with a load }
+{instruction                                                                }
+
+  LoadDst = 1;
+
+{Oper index of operand that contains the source (register) with a store }
+{instruction                                                            }
+
+  StoreSrc = 0;
+
+{Oper index of operand that contains the destination (reference) with a load }
+{instruction                                                                 }
+
+  StoreDst = 1;
+
+  aopt_uncondjmp = A_B;
+  aopt_condjmp = A_B;
+
+Implementation
+
+{ ************************************************************************* }
+{ **************************** TCondRegs ********************************** }
+{ ************************************************************************* }
+  Constructor TCondRegs.init;
+    Begin
+    End;
+
+
+  Destructor TCondRegs.Done; {$ifdef inl} inline; {$endif inl}
+    Begin
+    End;
+
+
+  function TAoptBaseCpu.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;
+    var
+      i : Longint;
+    begin
+      result:=false;
+      for i:=0 to taicpu(p1).ops-1 do
+        if (taicpu(p1).oper[i]^.typ=top_reg) and (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
+          begin
+            result:=true;
+            exit;
+          end;
+    end;
+
+End.

+ 41 - 0
compiler/aarch64/aoptcpud.pas

@@ -0,0 +1,41 @@
+{
+    Copyright (c) 1998-2002 by Jonas Maebe, member of the Free Pascal
+    Development Team
+
+    This unit contains the processor specific implementation of the
+    assembler optimizer data flow analyzer.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+Unit aoptcpud;
+
+{$i fpcdefs.inc}
+
+Interface
+
+uses
+  AOptDA;
+
+Type
+  TAOptDFACpu = class(TAOptDFA)
+  End;
+
+Implementation
+
+
+End.
+

+ 458 - 0
compiler/aarch64/cpubase.pas

@@ -0,0 +1,458 @@
+{
+    Copyright (c) 1998-2012 by Florian Klaempfl and Peter Vreman
+
+    Contains the base types for ARM64
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+{ Base unit for processor information. This unit contains
+  enumerations of registers, opcodes, sizes, and other
+  such things which are processor specific.
+}
+unit cpubase;
+
+{$define USEINLINE}
+
+{$i fpcdefs.inc}
+
+  interface
+
+    uses
+      cutils,cclasses,
+      globtype,globals,
+      cpuinfo,
+      aasmbase,
+      cgbase
+      ;
+
+
+{*****************************************************************************
+                                Assembler Opcodes
+*****************************************************************************}
+
+    type
+      TAsmOp= {$i a64op.inc}
+
+      { This should define the array of instructions as string }
+      op2strtable=array[tasmop] of string[11];
+
+    const
+      { First value of opcode enumeration }
+      firstop = low(tasmop);
+      { Last value of opcode enumeration  }
+      lastop  = high(tasmop);
+
+{*****************************************************************************
+                                  Registers
+*****************************************************************************}
+
+    type
+      { Number of registers used for indexing in tables }
+      tregisterindex=0..{$i ra64nor.inc}-1;
+
+    const
+      { Available Superregisters }
+      {$i ra64sup.inc}
+
+      R_SUBWHOLE = R_SUBQ;
+
+      { Available Registers }
+      {$i ra64con.inc}
+
+      { Integer Super registers first and last }
+      first_int_supreg = RS_X0;
+      first_int_imreg = $20;
+
+      { Integer Super registers first and last }
+      first_fpu_supreg = RS_S0;
+      first_fpu_imreg = $20;
+
+      { MM Super register first and last }
+      first_mm_supreg    = RS_S0;
+      first_mm_imreg     = $20;
+
+      { Required parameter alignment when calling a routine declared as
+        stdcall and cdecl. The alignment value should be the one defined
+        by GCC or the target ABI.
+
+        The value of this constant is equal to the constant
+        PARM_BOUNDARY / BITS_PER_UNIT in the GCC source.
+      }
+      std_param_align = 4;
+
+      { TODO: Calculate bsstart}
+      regnumber_count_bsstart = 128;
+
+      regnumber_table : array[tregisterindex] of tregister = (
+        {$i ra64num.inc}
+      );
+
+      regstabs_table : array[tregisterindex] of shortint = (
+        {$i ra64sta.inc}
+      );
+
+      regdwarf_table : array[tregisterindex] of shortint = (
+        {$i ra64dwa.inc}
+      );
+      { registers which may be destroyed by calls }
+      VOLATILE_INTREGISTERS = [RS_X0..RS_X18,RS_X29..RS_X30];
+      VOLATILE_MMREGISTERS =  [RS_D0..RS_D7,RS_D16..RS_D31];
+
+    type
+      totherregisterset = set of tregisterindex;
+
+{*****************************************************************************
+                          Instruction post fixes
+*****************************************************************************}
+    type
+      { ARM instructions load/store and arithmetic instructions
+        can have several instruction post fixes which are collected
+        in this enumeration
+      }
+      TOpPostfix = (PF_None,
+        { update condition flags }
+        PF_S,
+        { load/store }
+        PF_B,PF_SB,PF_H,PF_SH
+      );
+
+      TOpPostfixes = set of TOpPostfix;
+
+    const
+      oppostfix2str : array[TOpPostfix] of string[2] = ('',
+        's',
+        'b','sb','h','sh');
+
+{*****************************************************************************
+                                Conditions
+*****************************************************************************}
+
+    type
+      TAsmCond=(C_None,
+        C_EQ,C_NE,C_HS,C_LO,C_MI,C_PL,C_VS,C_VC,C_HI,C_LS,
+        C_GE,C_LT,C_GT,C_LE,C_AL,C_NV
+      );
+
+      TAsmConds = set of TAsmCond;
+
+    const
+      cond2str : array[TAsmCond] of string[2]=('',
+        'eq','ne','hs','lo','mi','pl','vs','vc','hi','ls',
+        'ge','lt','gt','le','al','nv'
+      );
+
+      uppercond2str : array[TAsmCond] of string[2]=('',
+        'EQ','NE','hs','LO','MI','PL','VS','VC','HI','LS',
+        'GE','LT','GT','LE','AL','NV'
+      );
+
+{*****************************************************************************
+                                   Flags
+*****************************************************************************}
+
+    type
+      TResFlags = (F_EQ,F_NE,F_CS,F_CC,F_MI,F_PL,F_VS,F_VC,F_HI,F_LS,
+        F_GE,F_LT,F_GT,F_LE);
+
+{*****************************************************************************
+                                Operands
+*****************************************************************************}
+
+      taddressmode = (AM_OFFSET,AM_PREINDEXED,AM_POSTINDEXED);
+      tshiftmode = (SM_None,SM_LSL,SM_LSR,SM_ASR,SM_ROR);
+
+      tupdatereg = (UR_None,UR_Update);
+
+      pshifterop = ^tshifterop;
+
+      tshifterop = record
+        shiftmode : tshiftmode;
+        shiftimm : byte;
+      end;
+
+      tcpumodeflag = (mfA, mfI, mfF);
+      tcpumodeflags = set of tcpumodeflag;
+
+      tspecialregflag = (srC, srX, srS, srF);
+      tspecialregflags = set of tspecialregflag;
+
+{*****************************************************************************
+                                 Constants
+*****************************************************************************}
+
+    const
+      max_operands = 6;
+
+      maxintregs = 32;
+      maxfpuregs = 32;
+      maxaddrregs = 0;
+
+{*****************************************************************************
+                                Operand Sizes
+*****************************************************************************}
+
+    type
+      topsize = (S_NO,
+        S_B,S_W,S_L,S_BW,S_BL,S_WL,
+        S_IS,S_IL,S_IQ,
+        S_FS,S_FL,S_FX,S_D,S_Q,S_FV,S_FXX
+      );
+
+{*****************************************************************************
+                          Default generic sizes
+*****************************************************************************}
+
+   const
+      { Defines the default address size for a processor, }
+      OS_ADDR = OS_64;
+      { the natural int size for a processor,
+        has to match osuinttype/ossinttype as initialized in psystem }
+      OS_INT = OS_64;
+      OS_SINT = OS_S64;
+      { the maximum float size for a processor,           }
+      OS_FLOAT = OS_F64;
+      { the size of a vector register for a processor     }
+      OS_VECTOR = OS_M128;
+
+{*****************************************************************************
+                          Generic Register names
+*****************************************************************************}
+
+      NR_SP = NR_XZR;
+      RS_SP = RS_XZR;
+      NR_WSP = NR_WZR;
+      RS_WSP = RS_WZR;
+
+      { Stack pointer register }
+      NR_STACK_POINTER_REG = NR_SP;
+      RS_STACK_POINTER_REG = RS_SP;
+      { Frame pointer register (initialized in tarmprocinfo.init_framepointer) }
+      RS_FRAME_POINTER_REG: tsuperregister = RS_X29;
+      NR_FRAME_POINTER_REG: tregister = NR_X29;
+      { Register for addressing absolute data in a position independant way,
+        such as in PIC code. The exact meaning is ABI specific. For
+        further information look at GCC source : PIC_OFFSET_TABLE_REGNUM
+      }
+      NR_PIC_OFFSET_REG = NR_X18;
+      { Results are returned in this register (32-bit values) }
+      NR_FUNCTION_RETURN_REG = NR_X0;
+      RS_FUNCTION_RETURN_REG = RS_X0;
+      { The value returned from a function is available in this register }
+      NR_FUNCTION_RESULT_REG = NR_FUNCTION_RETURN_REG;
+      RS_FUNCTION_RESULT_REG = RS_FUNCTION_RETURN_REG;
+
+      NR_FPU_RESULT_REG = NR_NO;
+
+      NR_MM_RESULT_REG  = NR_D0;
+
+      NR_RETURN_ADDRESS_REG = NR_FUNCTION_RETURN_REG;
+
+      { Offset where the parent framepointer is pushed }
+      PARENT_FRAMEPOINTER_OFFSET = 0;
+
+      NR_DEFAULTFLAGS = NR_NZCV;
+      RS_DEFAULTFLAGS = RS_NZCV;
+
+{*****************************************************************************
+                       GCC /ABI linking information
+*****************************************************************************}
+
+    const
+      { Registers which must be saved when calling a routine declared as
+        cppdecl, cdecl, stdcall, safecall, palmossyscall. The registers
+        saved should be the ones as defined in the target ABI and / or GCC.
+
+        This value can be deduced from the CALLED_USED_REGISTERS array in the
+        GCC source.
+      }
+      saved_standard_registers : array[0..9] of tsuperregister =
+        (RS_X19,RS_X20,RS_X21,RS_X22,RS_X23,RS_X24,RS_X25,RS_X26,RS_X27,RS_X28);
+
+      { this is only for the generic code which is not used for this architecture }
+      saved_mm_registers : array[0..7] of tsuperregister = (RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15);
+
+{*****************************************************************************
+                                  Helpers
+*****************************************************************************}
+
+    { Returns the tcgsize corresponding with the size of reg.}
+    function reg_cgsize(const reg: tregister) : tcgsize;
+    function cgsize2subreg(regtype: tregistertype; s:Tcgsize):Tsubregister;
+    function is_calljmp(o:tasmop):boolean;{$ifdef USEINLINE}inline;{$endif USEINLINE}
+    procedure inverse_flags(var f: TResFlags);
+    function flags_to_cond(const f: TResFlags) : TAsmCond;
+    function findreg_by_number(r:Tregister):tregisterindex;
+    function std_regnum_search(const s:string):Tregister;
+    function std_regname(r:Tregister):string;
+
+    function inverse_cond(const c: TAsmCond): TAsmCond; {$ifdef USEINLINE}inline;{$endif USEINLINE}
+    function conditions_equal(const c1, c2: TAsmCond): boolean; {$ifdef USEINLINE}inline;{$endif USEINLINE}
+
+    procedure shifterop_reset(var so : tshifterop); {$ifdef USEINLINE}inline;{$endif USEINLINE}
+
+    function dwarf_reg(r:tregister):shortint;
+
+  implementation
+
+    uses
+      systems,rgBase,verbose;
+
+    const
+      std_regname_table : TRegNameTable = (
+        {$i ra64std.inc}
+      );
+
+      regnumber_index : array[tregisterindex] of tregisterindex = (
+        {$i ra64rni.inc}
+      );
+
+      std_regname_index : array[tregisterindex] of tregisterindex = (
+        {$i ra64sri.inc}
+      );
+
+
+    function cgsize2subreg(regtype: tregistertype; s:Tcgsize):Tsubregister;
+      begin
+        case regtype of
+          R_MMREGISTER:
+            begin
+              case s of
+                OS_F32:
+                  cgsize2subreg:=R_SUBFS;
+                OS_F64:
+                  cgsize2subreg:=R_SUBFD;
+                else
+                  internalerror(2009112701);
+              end;
+            end;
+          else
+            cgsize2subreg:=R_SUBWHOLE;
+        end;
+      end;
+
+
+    function reg_cgsize(const reg: tregister): tcgsize;
+      begin
+        case getregtype(reg) of
+          R_INTREGISTER :
+            reg_cgsize:=OS_32;
+          R_FPUREGISTER :
+            reg_cgsize:=OS_F80;
+          R_MMREGISTER :
+            begin
+              case getsubreg(reg) of
+                R_SUBFD,
+                R_SUBWHOLE:
+                  result:=OS_F64;
+                R_SUBFS:
+                  result:=OS_F32;
+                else
+                  internalerror(2009112903);
+              end;
+            end;
+          else
+            internalerror(200303181);
+          end;
+        end;
+
+
+    function is_calljmp(o:tasmop):boolean;{$ifdef USEINLINE}inline;{$endif USEINLINE}
+      begin
+        { This isn't 100% perfect because the arm allows jumps also by writing to PC=R15.
+          To overcome this problem we simply forbid that FPC generates jumps by loading R15 }
+        is_calljmp:= o in [A_B,A_BLR,A_RET];
+      end;
+
+
+    procedure inverse_flags(var f: TResFlags);
+      const
+        inv_flags: array[TResFlags] of TResFlags =
+          (F_NE,F_EQ,F_CC,F_CS,F_PL,F_MI,F_VC,F_VS,F_LS,F_HI,
+          F_LT,F_GE,F_LE,F_GT);
+      begin
+        f:=inv_flags[f];
+      end;
+
+
+    function flags_to_cond(const f: TResFlags) : TAsmCond;
+      const
+        flag_2_cond: array[F_EQ..F_LE] of TAsmCond =
+          (C_EQ,C_NE,C_HI,C_LO,C_MI,C_PL,C_VS,C_VC,C_HI,C_LS,
+           C_GE,C_LT,C_GT,C_LE);
+      begin
+        if f>high(flag_2_cond) then
+          internalerror(200112301);
+        result:=flag_2_cond[f];
+      end;
+
+
+    function findreg_by_number(r:Tregister):tregisterindex;
+      begin
+        result:=rgBase.findreg_by_number_table(r,regnumber_index);
+      end;
+
+
+    function std_regnum_search(const s:string):Tregister;
+      begin
+        result:=regnumber_table[findreg_by_name_table(s,std_regname_table,std_regname_index)];
+      end;
+
+
+    function std_regname(r:Tregister):string;
+      var
+        p : tregisterindex;
+      begin
+        p:=findreg_by_number_table(r,regnumber_index);
+        if p<>0 then
+          result:=std_regname_table[p]
+        else
+          result:=generic_regname(r);
+      end;
+
+
+    procedure shifterop_reset(var so : tshifterop);{$ifdef USEINLINE}inline;{$endif USEINLINE}
+      begin
+        FillChar(so,sizeof(so),0);
+      end;
+
+
+    function inverse_cond(const c: TAsmCond): TAsmCond; {$ifdef USEINLINE}inline;{$endif USEINLINE}
+      const
+        inverse: array[TAsmCond] of TAsmCond=(C_None,
+          C_NE,C_EQ,C_LO,C_HI,C_PL,C_MI,C_VC,C_VS,C_LS,C_HI,
+          C_LT,C_GE,C_LE,C_GT,C_None,C_None
+        );
+      begin
+        result := inverse[c];
+      end;
+
+
+    function conditions_equal(const c1, c2: TAsmCond): boolean; {$ifdef USEINLINE}inline;{$endif USEINLINE}
+      begin
+        result := c1 = c2;
+      end;
+
+
+    function dwarf_reg(r:tregister):shortint;
+      begin
+        result:=regdwarf_table[findreg_by_number(r)];
+        if result=-1 then
+          internalerror(200603251);
+      end;
+
+
+end.

+ 102 - 0
compiler/aarch64/cpuinfo.pas

@@ -0,0 +1,102 @@
+{
+    Copyright (c) 1998-2002 by the Free Pascal development team
+
+    Basic Processor information for AArch64
+
+    See the file COPYING.FPC, included in this distribution,
+    for details about the copyright.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+ **********************************************************************}
+
+Unit CPUInfo;
+
+Interface
+
+  uses
+    globtype;
+
+Type
+   bestreal = double;
+   ts32real = single;
+   ts64real = double;
+   ts80real = type extended;
+   ts128real = type extended;
+   ts64comp = comp;
+
+   pbestreal=^bestreal;
+
+   { possible supported processors for this target }
+   tcputype =
+      (cpu_none,
+       cpu_armv8
+      );
+
+Type
+   tfputype =
+     (fpu_none,
+      fpu_vfp
+     );
+
+   tcontrollertype =
+     (ct_none
+     );
+
+
+Const
+   {# Size of native extended floating point type }
+   extended_size = 8;
+   {# Size of a multimedia register               }
+   mmreg_size = 16;
+   { target cpu string (used by compiler options) }
+   target_cpu_string = 'aarch64';
+
+   { calling conventions supported by the code generator }
+   supported_calling_conventions : tproccalloptions = [
+     pocall_internproc,
+     pocall_safecall,
+     pocall_stdcall,
+     { same as stdcall only different name mangling }
+     pocall_cdecl,
+     { same as stdcall only different name mangling }
+     pocall_cppdecl,
+     { same as stdcall but floating point numbers are handled like equal sized integers }
+     pocall_softfloat,
+     { same as stdcall (requires that all const records are passed by
+       reference, but that's already done for stdcall) }
+     pocall_mwpascal,
+     { used for interrupt handling }
+     pocall_interrupt
+   ];
+
+   cputypestr : array[tcputype] of string[8] = ('',
+     'ARMV8'
+   );
+
+   fputypestr : array[tfputype] of string[9] = ('',
+     'VFP'
+   );
+
+
+   { Supported optimizations, only used for information }
+   supported_optimizerswitches = genericlevel1optimizerswitches+
+                                 genericlevel2optimizerswitches+
+                                 genericlevel3optimizerswitches-
+                                 { no need to write info about those }
+                                 [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
+                                 [cs_opt_regvar,cs_opt_loopunroll,cs_opt_tailrecursion,
+				  cs_opt_stackframe,cs_opt_nodecse,cs_opt_reorder_fields,cs_opt_fastmath];
+
+   level1optimizerswitches = genericlevel1optimizerswitches;
+   level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches +
+     [cs_opt_regvar,cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse];
+   level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [cs_opt_scheduler{,cs_opt_loopunroll}];
+   level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [];
+
+Implementation
+
+end.
+

+ 716 - 0
compiler/aarch64/cpupara.pas

@@ -0,0 +1,716 @@
+{
+    Copyright (c) 2003-2012 by Florian Klaempfl and others
+
+    AArch64 specific calling conventions
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ ****************************************************************************
+}
+{ AArch64 specific calling conventions are handled by this unit
+}
+unit cpupara;
+
+{$i fpcdefs.inc}
+
+  interface
+
+    uses
+       globtype,globals,
+       aasmtai,aasmdata,
+       cpuinfo,cpubase,cgbase,cgutils,
+       symconst,symbase,symtype,symdef,parabase,paramgr;
+
+    type
+       taarch64paramanager = class(tparamanager)
+          function get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;override;
+          function get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;override;
+          function get_volatile_registers_mm(calloption : tproccalloption):tcpuregisterset;override;
+          function push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;override;
+          function ret_in_param(def:tdef;pd:tabstractprocdef):boolean;override;
+          procedure getintparaloc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);override;
+          function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
+          function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
+          function get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
+         private
+          procedure init_values(var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword; var sparesinglereg: tregister);
+          function create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
+            var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword; var sparesinglereg: tregister; isvariadic: boolean):longint;
+       end;
+
+  implementation
+
+    uses
+       verbose,systems,cutils,
+       rgobj,
+       defutil,symsym,symtable;
+
+
+    function taarch64paramanager.get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;
+      begin
+        result:=VOLATILE_INTREGISTERS
+      end;
+
+
+    function taarch64paramanager.get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;
+      begin
+        result:=[];
+      end;
+
+
+    function taarch64paramanager.get_volatile_registers_mm(calloption: tproccalloption): tcpuregisterset;
+      begin
+        result:=VOLATILE_MMREGISTERS;
+      end;
+
+
+    procedure taarch64paramanager.getintparaloc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);
+      var
+        paraloc : pcgparalocation;
+        def : tdef;
+      begin
+        if nr<1 then
+          internalerror(2002070801);
+        def:=tparavarsym(pd.paras[nr-1]).vardef;
+        cgpara.reset;
+        cgpara.size:=def_cgsize(def);
+        cgpara.intsize:=tcgsize2size[cgpara.size];
+        cgpara.alignment:=std_param_align;
+        cgpara.def:=def;
+        paraloc:=cgpara.add_location;
+        with paraloc^ do
+          begin
+            size:=OS_INT;
+            { the four first parameters are passed into registers }
+            if nr<=8 then
+              begin
+                loc:=LOC_REGISTER;
+                register:=newreg(R_INTREGISTER,RS_X0+nr-1,R_SUBWHOLE);
+              end
+            else
+              begin
+                { the other parameters are passed on the stack }
+                loc:=LOC_REFERENCE;
+                reference.index:=NR_STACK_POINTER_REG;
+                reference.offset:=(nr-9)*8;
+              end;
+          end;
+      end;
+
+
+    function Is_HFA(p : tdef) : boolean;
+      begin
+        result:=false;
+      end;
+
+
+    function getparaloc(calloption : tproccalloption; p : tdef; isvariadic: boolean) : tcgloc;
+      begin
+         { Later, the LOC_REFERENCE is in most cases changed into LOC_REGISTER
+           if push_addr_param for the def is true
+         }
+         case p.typ of
+            orddef:
+              getparaloc:=LOC_REGISTER;
+            floatdef:
+              getparaloc:=LOC_MMREGISTER
+            enumdef:
+              getparaloc:=LOC_REGISTER;
+            pointerdef:
+              getparaloc:=LOC_REGISTER;
+            formaldef:
+              getparaloc:=LOC_REGISTER;
+            classrefdef:
+              getparaloc:=LOC_REGISTER;
+            recorddef:
+              getparaloc:=LOC_REGISTER;
+            objectdef:
+              getparaloc:=LOC_REGISTER;
+            stringdef:
+              if is_shortstring(p) or is_longstring(p) then
+                getparaloc:=LOC_REFERENCE
+              else
+                getparaloc:=LOC_REGISTER;
+            procvardef:
+              getparaloc:=LOC_REGISTER;
+            filedef:
+              getparaloc:=LOC_REGISTER;
+            arraydef:
+              getparaloc:=LOC_REFERENCE;
+            setdef:
+              if is_smallset(p) then
+                getparaloc:=LOC_REGISTER
+              else
+                getparaloc:=LOC_REFERENCE;
+            variantdef:
+              getparaloc:=LOC_REGISTER;
+            { avoid problems with errornous definitions }
+            errordef:
+              getparaloc:=LOC_REGISTER;
+            else
+              internalerror(2002071001);
+         end;
+      end;
+
+
+    function taarch64paramanager.push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;
+      begin
+        result:=false;
+        if varspez in [vs_var,vs_out,vs_constref] then
+          begin
+            result:=true;
+            exit;
+          end;
+        case def.typ of
+          objectdef:
+            result:=not(Is_HFA(def) and (is_object(def) and ((varspez=vs_const) or (def.size=0));
+          recorddef:
+            { note: should this ever be changed, make sure that const records
+                are always passed by reference for calloption=pocall_mwpascal }
+            result:=(varspez=vs_const) or (def.size=0);
+          variantdef,
+          formaldef:
+            result:=true;
+          arraydef:
+            result:=(tarraydef(def).highrange>=tarraydef(def).lowrange) or
+                             is_open_array(def) or
+                             is_array_of_const(def) or
+                             is_array_constructor(def);
+          setdef :
+            result:=def.size>16;
+          stringdef :
+            result:=tstringdef(def).stringtype in [st_shortstring,st_longstring];
+        end;
+      end;
+
+
+    function taarch64paramanager.ret_in_param(def:tdef;pd:tabstractprocdef):boolean;
+      var
+        i: longint;
+        sym: tsym;
+        fpufield: boolean;
+      begin
+        { this must be system independent safecall and record constructor result
+          is always return in param }
+        if (tf_safecall_exceptions in target_info.flags) and
+           (pd.proccalloption=pocall_safecall) or
+           ((pd.proctypeoption=potype_constructor)and is_record(def)) then
+          begin
+            result:=true;
+            exit;
+          end;
+        case def.typ of
+          recorddef:
+            begin
+              result:=def.size>4;
+              if not result and
+                 (target_info.abi in [abi_default,abi_armeb]) then
+                begin
+                  { in case of the old ARM abi (APCS), a struct is returned in
+                    a register only if it is simple. And what is a (non-)simple
+                    struct:
+
+                    "A non-simple type is any non-floating-point type of size
+                     greater than one word (including structures containing only
+                     floating-point fields), and certain single-word structured
+                     types."
+                       (-- ARM APCS documentation)
+
+                    So only floating point types or more than one word ->
+                    definitely non-simple (more than one word is already
+                    checked above). This includes unions/variant records with
+                    overlaid floating point and integer fields.
+
+                    Smaller than one word struct types are simple if they are
+                    "integer-like", and:
+
+                    "A structure is termed integer-like if its size is less than
+                    or equal to one word, and the offset of each of its
+                    addressable subfields is zero."
+                      (-- ARM APCS documentation)
+
+                    An "addressable subfield" is a field of which you can take
+                    the address, which in practive means any non-bitfield.
+                    In Pascal, there is no way to express the difference that
+                    you can have in C between "char" and "int :8". In this
+                    context, we use the fake distinction that a type defined
+                    inside the record itself (such as "a: 0..255;") indicates
+                    a bitpacked field while a field using a different type
+                    (such as "a: byte;") is not.
+                  }
+                  for i:=0 to trecorddef(def).symtable.SymList.count-1 do
+                    begin
+                      sym:=tsym(trecorddef(def).symtable.SymList[i]);
+                      if sym.typ<>fieldvarsym then
+                        continue;
+                      { bitfield -> ignore }
+                      if (trecordsymtable(trecorddef(def).symtable).usefieldalignment=bit_alignment) and
+                         (tfieldvarsym(sym).vardef.typ in [orddef,enumdef]) and
+                         (tfieldvarsym(sym).vardef.owner.defowner=def) then
+                        continue;
+                      { all other fields must be at offset zero }
+                      if tfieldvarsym(sym).fieldoffset<>0 then
+                        begin
+                          result:=true;
+                          exit;
+                        end;
+                      { floating point field -> also by reference }
+                      if tfieldvarsym(sym).vardef.typ=floatdef then
+                        begin
+                          result:=true;
+                          exit;
+                        end;
+                    end;
+                end;
+            end;
+          procvardef:
+            if not tprocvardef(def).is_addressonly then
+              result:=true
+            else
+              result:=false
+          else
+            result:=inherited ret_in_param(def,pd);
+        end;
+      end;
+
+
+    procedure taarch64paramanager.init_values(var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword; var sparesinglereg: tregister);
+      begin
+        curintreg:=RS_R0;
+        curfloatreg:=RS_F0;
+        curmmreg:=RS_D0;
+        cur_stack_offset:=0;
+        sparesinglereg := NR_NO;
+      end;
+
+
+    function taarch64paramanager.create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
+        var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword; var sparesinglereg: tregister; isvariadic: boolean):longint;
+
+      var
+        nextintreg,nextfloatreg,nextmmreg : tsuperregister;
+        paradef : tdef;
+        paraloc : pcgparalocation;
+        stack_offset : aword;
+        hp : tparavarsym;
+        loc : tcgloc;
+        paracgsize   : tcgsize;
+        paralen : longint;
+        i : integer;
+        firstparaloc: boolean;
+
+      procedure assignintreg;
+        begin
+          { In case of po_delphi_nested_cc, the parent frame pointer
+            is always passed on the stack. }
+           if (nextintreg<=RS_R3) and
+              (not(vo_is_parentfp in hp.varoptions) or
+               not(po_delphi_nested_cc in p.procoptions)) then
+             begin
+               paraloc^.loc:=LOC_REGISTER;
+               paraloc^.register:=newreg(R_INTREGISTER,nextintreg,R_SUBWHOLE);
+               inc(nextintreg);
+             end
+           else
+             begin
+               paraloc^.loc:=LOC_REFERENCE;
+               paraloc^.reference.index:=NR_STACK_POINTER_REG;
+               paraloc^.reference.offset:=stack_offset;
+               inc(stack_offset,4);
+            end;
+        end;
+
+
+      begin
+        result:=0;
+        nextintreg:=curintreg;
+        nextfloatreg:=curfloatreg;
+        nextmmreg:=curmmreg;
+        stack_offset:=cur_stack_offset;
+
+        for i:=0 to paras.count-1 do
+          begin
+            hp:=tparavarsym(paras[i]);
+            paradef:=hp.vardef;
+
+            hp.paraloc[side].reset;
+
+            { currently only support C-style array of const,
+              there should be no location assigned to the vararg array itself }
+            if (p.proccalloption in cstylearrayofconst) and
+               is_array_of_const(paradef) then
+              begin
+                paraloc:=hp.paraloc[side].add_location;
+                { hack: the paraloc must be valid, but is not actually used }
+                paraloc^.loc:=LOC_REGISTER;
+                paraloc^.register:=NR_R0;
+                paraloc^.size:=OS_ADDR;
+                break;
+              end;
+
+            if push_addr_param(hp.varspez,paradef,p.proccalloption) then
+              begin
+                paradef:=getpointerdef(paradef);
+                loc:=LOC_REGISTER;
+                paracgsize := OS_ADDR;
+                paralen := tcgsize2size[OS_ADDR];
+              end
+            else
+              begin
+                if not is_special_array(paradef) then
+                  paralen := paradef.size
+                else
+                  paralen := tcgsize2size[def_cgsize(paradef)];
+                loc := getparaloc(p.proccalloption,paradef,isvariadic);
+                if (paradef.typ in [objectdef,arraydef,recorddef]) and
+                  not is_special_array(paradef) and
+                  (hp.varspez in [vs_value,vs_const]) then
+                  paracgsize := int_cgsize(paralen)
+                else
+                  begin
+                    paracgsize:=def_cgsize(paradef);
+                    { for things like formaldef }
+                    if (paracgsize=OS_NO) then
+                      begin
+                        paracgsize:=OS_ADDR;
+                        paralen:=tcgsize2size[OS_ADDR];
+                        paradef:=voidpointertype;
+                      end;
+                  end
+              end;
+
+             hp.paraloc[side].size:=paracgsize;
+             hp.paraloc[side].Alignment:=std_param_align;
+             hp.paraloc[side].intsize:=paralen;
+             hp.paraloc[side].def:=paradef;
+             firstparaloc:=true;
+
+{$ifdef EXTDEBUG}
+             if paralen=0 then
+               internalerror(200410311);
+{$endif EXTDEBUG}
+             while paralen>0 do
+               begin
+                 paraloc:=hp.paraloc[side].add_location;
+
+                 if (loc=LOC_REGISTER) and (paracgsize in [OS_F32,OS_F64,OS_F80]) then
+                   case paracgsize of
+                     OS_F32:
+                       paraloc^.size:=OS_32;
+                     OS_F64:
+                       paraloc^.size:=OS_32;
+                     else
+                       internalerror(2005082901);
+                   end
+                 else if (paracgsize in [OS_NO,OS_64,OS_S64]) then
+                   paraloc^.size := OS_32
+                 else
+                   paraloc^.size:=paracgsize;
+                 case loc of
+                    LOC_REGISTER:
+                      begin
+                        { align registers for eabi }
+                        if (target_info.abi in [abi_eabi,abi_eabihf]) and
+                           firstparaloc and
+                           (paradef.alignment=8) then
+                          begin
+                            if (nextintreg in [RS_R1,RS_R3]) then
+                              inc(nextintreg)
+                            else if nextintreg>RS_R3 then
+                              stack_offset:=align(stack_offset,8);
+                          end;
+                        { this is not abi compliant
+                          why? (FK) }
+                        if nextintreg<=RS_R3 then
+                          begin
+                            paraloc^.loc:=LOC_REGISTER;
+                            paraloc^.register:=newreg(R_INTREGISTER,nextintreg,R_SUBWHOLE);
+                            inc(nextintreg);
+                          end
+                        else
+                          begin
+                            { LOC_REFERENCE always contains everything that's left }
+                            paraloc^.loc:=LOC_REFERENCE;
+                            paraloc^.size:=int_cgsize(paralen);
+                            if (side=callerside) then
+                              paraloc^.reference.index:=NR_STACK_POINTER_REG;
+                            paraloc^.reference.offset:=stack_offset;
+                            inc(stack_offset,align(paralen,4));
+                            paralen:=0;
+                         end;
+                      end;
+                    LOC_FPUREGISTER:
+                      begin
+                        if nextfloatreg<=RS_F3 then
+                          begin
+                            paraloc^.loc:=LOC_FPUREGISTER;
+                            paraloc^.register:=newreg(R_FPUREGISTER,nextfloatreg,R_SUBWHOLE);
+                            inc(nextfloatreg);
+                          end
+                        else
+                          begin
+                            paraloc^.loc:=LOC_REFERENCE;
+                            paraloc^.reference.index:=NR_STACK_POINTER_REG;
+                            paraloc^.reference.offset:=stack_offset;
+                            case paraloc^.size of
+                              OS_F32:
+                                inc(stack_offset,4);
+                              OS_F64:
+                                inc(stack_offset,8);
+                              OS_F80:
+                                inc(stack_offset,10);
+                              OS_F128:
+                                inc(stack_offset,16);
+                              else
+                                internalerror(200403201);
+                            end;
+                          end;
+                      end;
+                    LOC_MMREGISTER:
+                      begin
+                        if (nextmmreg<=RS_D7) or
+                           ((paraloc^.size = OS_F32) and
+                            (sparesinglereg<>NR_NO)) then
+                          begin
+                            paraloc^.loc:=LOC_MMREGISTER;
+                            case paraloc^.size of
+                              OS_F32:
+                                if sparesinglereg = NR_NO then 
+                                  begin     
+                                    paraloc^.register:=newreg(R_MMREGISTER,nextmmreg,R_SUBFS);
+                                    sparesinglereg:=newreg(R_MMREGISTER,nextmmreg-RS_S0+RS_S1,R_SUBFS);
+                                    inc(nextmmreg);
+                                  end
+                                else
+                                  begin
+                                    paraloc^.register:=sparesinglereg;
+                                    sparesinglereg := NR_NO;
+                                  end;
+                              OS_F64:
+                                begin
+                                  paraloc^.register:=newreg(R_MMREGISTER,nextmmreg,R_SUBFD);
+                                  inc(nextmmreg);
+                                end;
+                              else
+                                internalerror(2012031601);
+                            end;
+                          end
+                        else
+                          begin
+                            { once a floating point parameters has been placed
+                            on the stack we must not pass any more in vfp regs
+                            even if there is a single precision register still
+                            free}
+                            sparesinglereg := NR_NO;
+                            { LOC_REFERENCE always contains everything that's left }
+                            paraloc^.loc:=LOC_REFERENCE;
+                            paraloc^.size:=int_cgsize(paralen);
+                            if (side=callerside) then
+                              paraloc^.reference.index:=NR_STACK_POINTER_REG;
+                            paraloc^.reference.offset:=stack_offset;
+                            inc(stack_offset,align(paralen,4));
+                            paralen:=0;
+                         end;
+                      end;
+                    LOC_REFERENCE:
+                      begin
+                        if push_addr_param(hp.varspez,paradef,p.proccalloption) then
+                          begin
+                            paraloc^.size:=OS_ADDR;
+                            assignintreg
+                          end
+                        else
+                          begin
+                            { align stack for eabi }
+                            if (target_info.abi in [abi_eabi,abi_eabihf]) and
+                               firstparaloc and
+                               (paradef.alignment=8) then
+                              stack_offset:=align(stack_offset,8);
+
+                             paraloc^.size:=paracgsize;
+                             paraloc^.loc:=LOC_REFERENCE;
+                             paraloc^.reference.index:=NR_STACK_POINTER_REG;
+                             paraloc^.reference.offset:=stack_offset;
+                             inc(stack_offset,align(paralen,4));
+                             paralen:=0
+                          end;
+                      end;
+                    else
+                      internalerror(2002071002);
+                 end;
+                 if side=calleeside then
+                   begin
+                     if paraloc^.loc=LOC_REFERENCE then
+                       begin
+                         paraloc^.reference.index:=NR_FRAME_POINTER_REG;
+                         { on non-Darwin, the framepointer contains the value
+                           of the stack pointer on entry. On Darwin, the
+                           framepointer points to the previously saved
+                           framepointer (which is followed only by the saved
+                           return address -> framepointer + 4 = stack pointer
+                           on entry }
+                         if not(target_info.system in systems_darwin) then
+                           inc(paraloc^.reference.offset,4)
+                         else
+                           inc(paraloc^.reference.offset,8);
+                       end;
+                   end;
+                 dec(paralen,tcgsize2size[paraloc^.size]);
+                 firstparaloc:=false
+               end;
+          end;
+        curintreg:=nextintreg;
+        curfloatreg:=nextfloatreg;
+        curmmreg:=nextmmreg;
+        cur_stack_offset:=stack_offset;
+        result:=cur_stack_offset;
+      end;
+
+
+    function  taarch64paramanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;
+      var
+        paraloc : pcgparalocation;
+        retcgsize  : tcgsize;
+      begin
+         if set_common_funcretloc_info(p,forcetempdef,retcgsize,result) then
+           exit;
+
+        paraloc:=result.add_location;
+        { Return in FPU register? }
+        if result.def.typ=floatdef then
+          begin
+            if target_info.abi = abi_eabihf then 
+              begin
+                paraloc^.loc:=LOC_MMREGISTER;
+                case retcgsize of
+                  OS_64,
+                  OS_F64:
+                    begin
+                      paraloc^.register:=NR_MM_RESULT_REG;
+                    end;
+                  OS_32,
+                  OS_F32:
+                    begin
+                      paraloc^.register:=NR_S0;
+                    end;
+                  else
+                    internalerror(2012032501);
+                end;
+                paraloc^.size:=retcgsize;
+              end
+            else if (p.proccalloption in [pocall_softfloat]) or
+               (cs_fp_emulation in current_settings.moduleswitches) or
+               (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16,fpu_fpv4_s16]) then
+              begin
+                case retcgsize of
+                  OS_64,
+                  OS_F64:
+                    begin
+                      paraloc^.loc:=LOC_REGISTER;
+                      if target_info.endian = endian_big then
+                        paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG
+                      else
+                        paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG;
+                      paraloc^.size:=OS_32;
+                      paraloc:=result.add_location;
+                      paraloc^.loc:=LOC_REGISTER;
+                      if target_info.endian = endian_big then
+                        paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG
+                      else
+                        paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG;
+                      paraloc^.size:=OS_32;
+                    end;
+                  OS_32,
+                  OS_F32:
+                    begin
+                      paraloc^.loc:=LOC_REGISTER;
+                      paraloc^.register:=NR_FUNCTION_RETURN_REG;
+                      paraloc^.size:=OS_32;
+                    end;
+                  else
+                    internalerror(2005082603);
+                end;
+              end
+            else
+              begin
+                paraloc^.loc:=LOC_FPUREGISTER;
+                paraloc^.register:=NR_FPU_RESULT_REG;
+                paraloc^.size:=retcgsize;
+              end;
+          end
+          { Return in register }
+        else
+          begin
+            if retcgsize in [OS_64,OS_S64] then
+              begin
+                paraloc^.loc:=LOC_REGISTER;
+                if target_info.endian = endian_big then
+                  paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG
+                else
+                  paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG;
+                paraloc^.size:=OS_32;
+                paraloc:=result.add_location;
+                paraloc^.loc:=LOC_REGISTER;
+                if target_info.endian = endian_big then
+                  paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG
+                else
+                  paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG;
+                paraloc^.size:=OS_32;
+              end
+            else
+              begin
+                paraloc^.loc:=LOC_REGISTER;
+                paraloc^.register:=NR_FUNCTION_RETURN_REG;
+                if (result.intsize<>3) then
+                  paraloc^.size:=retcgsize
+                else
+                  paraloc^.size:=OS_32;
+              end;
+          end;
+      end;
+
+
+    function taarch64paramanager.create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;
+      var
+        cur_stack_offset: aword;
+        curintreg, curfloatreg, curmmreg: tsuperregister;
+        sparesinglereg:tregister;
+      begin
+        init_values(curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg);
+
+        result:=create_paraloc_info_intern(p,side,p.paras,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg,false);
+
+        create_funcretloc_info(p,side);
+     end;
+
+
+    function taarch64paramanager.create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;
+      var
+        cur_stack_offset: aword;
+        curintreg, curfloatreg, curmmreg: tsuperregister;
+        sparesinglereg:tregister;
+      begin
+        init_values(curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg);
+
+        result:=create_paraloc_info_intern(p,callerside,p.paras,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg,true);
+        if (p.proccalloption in cstylearrayofconst) then
+          { just continue loading the parameters in the registers }
+          result:=create_paraloc_info_intern(p,callerside,varargspara,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg,true)
+        else
+          internalerror(200410231);
+      end;
+
+begin
+   paramanager:=taarch64paramanager.create;
+end.

+ 93 - 0
compiler/aarch64/itcpugas.pas

@@ -0,0 +1,93 @@
+{
+    Copyright (c) 1998-2012 by Florian Klaempfl and others
+
+    This unit contains the ARM64 GAS instruction tables
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit itcpugas;
+
+{$i fpcdefs.inc}
+
+interface
+
+  uses
+    cpubase,cgbase;
+
+
+  const
+    { Standard opcode string table (for each tasmop enumeration). The
+      opcode strings should conform to the names as defined by the
+      processor manufacturer.
+    }
+    gas_op2str : op2strtable = {$i a64att.inc}
+
+    function gas_regnum_search(const s:string):Tregister;
+    function gas_regname(r:Tregister):string;
+
+
+implementation
+
+    uses
+      cutils,verbose,rgbase;
+
+    const
+      gas_regname_table : TRegNameTable = (
+        {$i ra64std.inc}
+      );
+
+      gas_regname_index : array[tregisterindex] of tregisterindex = (
+        {$i ra64sri.inc}
+      );
+
+    function findreg_by_gasname(const s:string):tregisterindex;
+      var
+        i,p : tregisterindex;
+      begin
+        {Binary search.}
+        p:=0;
+        i:=regnumber_count_bsstart;
+        repeat
+          if (p+i<=high(tregisterindex)) and (gas_regname_table[gas_regname_index[p+i]]<=s) then
+            p:=p+i;
+          i:=i shr 1;
+        until i=0;
+        if gas_regname_table[gas_regname_index[p]]=s then
+          findreg_by_gasname:=gas_regname_index[p]
+        else
+          findreg_by_gasname:=0;
+      end;
+
+
+    function gas_regnum_search(const s:string):Tregister;
+      begin
+        result:=regnumber_table[findreg_by_gasname(s)];
+      end;
+
+
+    function gas_regname(r:Tregister):string;
+      var
+        p : tregisterindex;
+      begin
+        p:=findreg_by_number(r);
+        if p<>0 then
+          result:=gas_regname_table[p]
+        else
+          result:=generic_regname(r);
+      end;
+
+end.

+ 227 - 0
compiler/aarch64/ra64con.inc

@@ -0,0 +1,227 @@
+{ don't edit, this file is generated from a64reg.dat }
+NR_NO = tregister($00000000);
+NR_W0 = tregister($01040000);
+NR_X0 = tregister($01050000);
+NR_W1 = tregister($01040001);
+NR_X1 = tregister($01050001);
+NR_W2 = tregister($01040002);
+NR_X2 = tregister($01050002);
+NR_W3 = tregister($01040003);
+NR_X3 = tregister($01050003);
+NR_W4 = tregister($01040004);
+NR_X4 = tregister($01050004);
+NR_W5 = tregister($01040005);
+NR_X5 = tregister($01050005);
+NR_W6 = tregister($01040006);
+NR_X6 = tregister($01050006);
+NR_W7 = tregister($01040007);
+NR_X7 = tregister($01050007);
+NR_W8 = tregister($01040008);
+NR_X8 = tregister($01050008);
+NR_W9 = tregister($01040009);
+NR_X9 = tregister($01050009);
+NR_W10 = tregister($0104000A);
+NR_X10 = tregister($0105000A);
+NR_W11 = tregister($0104000B);
+NR_X11 = tregister($0105000B);
+NR_W12 = tregister($0104000C);
+NR_X12 = tregister($0105000C);
+NR_W13 = tregister($0104000D);
+NR_X13 = tregister($0105000D);
+NR_W14 = tregister($0104000E);
+NR_X14 = tregister($0105000E);
+NR_W15 = tregister($0104000F);
+NR_X15 = tregister($0105000F);
+NR_W16 = tregister($01040010);
+NR_X16 = tregister($01050010);
+NR_W17 = tregister($01040011);
+NR_X17 = tregister($01050011);
+NR_W18 = tregister($01040012);
+NR_X18 = tregister($01050012);
+NR_W19 = tregister($01040013);
+NR_X19 = tregister($01050013);
+NR_W20 = tregister($01040014);
+NR_X20 = tregister($01050014);
+NR_W21 = tregister($01040015);
+NR_X21 = tregister($01050015);
+NR_W22 = tregister($01040016);
+NR_X22 = tregister($01050016);
+NR_W23 = tregister($01040017);
+NR_X23 = tregister($01050017);
+NR_W24 = tregister($01040018);
+NR_X24 = tregister($01050018);
+NR_W25 = tregister($01040019);
+NR_X25 = tregister($01050019);
+NR_W26 = tregister($0104001A);
+NR_X26 = tregister($0105001A);
+NR_W27 = tregister($0104001B);
+NR_X27 = tregister($0105001B);
+NR_W28 = tregister($0104001C);
+NR_X28 = tregister($0105001C);
+NR_W29 = tregister($0104001D);
+NR_X29 = tregister($0105001D);
+NR_W30 = tregister($0104001E);
+NR_X30 = tregister($0105001E);
+NR_WZR = tregister($0104001F);
+NR_XZR = tregister($0105001F);
+NR_B0 = tregister($04010000);
+NR_H0 = tregister($04030000);
+NR_S0 = tregister($04090000);
+NR_D0 = tregister($040a0000);
+NR_Q0 = tregister($04050000);
+NR_B1 = tregister($04010001);
+NR_H1 = tregister($04030001);
+NR_S1 = tregister($04090001);
+NR_D1 = tregister($040a0001);
+NR_Q1 = tregister($04050001);
+NR_B2 = tregister($04010002);
+NR_H2 = tregister($04030002);
+NR_S2 = tregister($04090002);
+NR_D2 = tregister($040a0002);
+NR_Q2 = tregister($04050002);
+NR_B3 = tregister($04010003);
+NR_H3 = tregister($04030003);
+NR_S3 = tregister($04090003);
+NR_D3 = tregister($040a0003);
+NR_Q3 = tregister($04050003);
+NR_B4 = tregister($04010004);
+NR_H4 = tregister($04030004);
+NR_S4 = tregister($04090004);
+NR_D4 = tregister($040a0004);
+NR_Q4 = tregister($04050004);
+NR_B5 = tregister($04010005);
+NR_H5 = tregister($04030005);
+NR_S5 = tregister($04090005);
+NR_D5 = tregister($040a0005);
+NR_Q5 = tregister($04050005);
+NR_B6 = tregister($04010006);
+NR_H6 = tregister($04030006);
+NR_S6 = tregister($04090006);
+NR_D6 = tregister($040a0006);
+NR_Q6 = tregister($04050006);
+NR_B7 = tregister($04010007);
+NR_H7 = tregister($04030007);
+NR_S7 = tregister($04090007);
+NR_D7 = tregister($040a0007);
+NR_Q7 = tregister($04050007);
+NR_B8 = tregister($04010008);
+NR_H8 = tregister($04030008);
+NR_S8 = tregister($04090008);
+NR_D8 = tregister($040a0008);
+NR_Q8 = tregister($04050008);
+NR_B9 = tregister($04010009);
+NR_H9 = tregister($04030009);
+NR_S9 = tregister($04090009);
+NR_D9 = tregister($040a0009);
+NR_Q9 = tregister($04050009);
+NR_B10 = tregister($0401000A);
+NR_H10 = tregister($0403000A);
+NR_S10 = tregister($0409000A);
+NR_D10 = tregister($040a000A);
+NR_Q10 = tregister($0405000A);
+NR_B11 = tregister($0401000B);
+NR_H11 = tregister($0403000B);
+NR_S11 = tregister($0409000B);
+NR_D11 = tregister($040a000B);
+NR_Q11 = tregister($0405000B);
+NR_B12 = tregister($0401000C);
+NR_H12 = tregister($0403000C);
+NR_S12 = tregister($0409000C);
+NR_D12 = tregister($040a000C);
+NR_Q12 = tregister($0405000C);
+NR_B13 = tregister($0401000D);
+NR_H13 = tregister($0403000D);
+NR_S13 = tregister($0409000D);
+NR_D13 = tregister($040a000D);
+NR_Q13 = tregister($0405000D);
+NR_B14 = tregister($0401000E);
+NR_H14 = tregister($0403000E);
+NR_S14 = tregister($0409000E);
+NR_D14 = tregister($040a000E);
+NR_Q14 = tregister($0405000E);
+NR_B15 = tregister($0401000F);
+NR_H15 = tregister($0403000F);
+NR_S15 = tregister($0409000F);
+NR_D15 = tregister($040a000F);
+NR_Q15 = tregister($0405000F);
+NR_B16 = tregister($04010010);
+NR_H16 = tregister($04030010);
+NR_S16 = tregister($04090010);
+NR_D16 = tregister($040a0010);
+NR_Q16 = tregister($04050010);
+NR_B17 = tregister($04010011);
+NR_H17 = tregister($04030011);
+NR_S17 = tregister($04090011);
+NR_D17 = tregister($040a0011);
+NR_Q17 = tregister($04050011);
+NR_B18 = tregister($04010012);
+NR_H18 = tregister($04030012);
+NR_S18 = tregister($04090012);
+NR_D18 = tregister($040a0012);
+NR_Q18 = tregister($04050012);
+NR_B19 = tregister($04010013);
+NR_H19 = tregister($04030013);
+NR_S19 = tregister($04090013);
+NR_D19 = tregister($040a0013);
+NR_Q19 = tregister($04050013);
+NR_B20 = tregister($04010014);
+NR_H20 = tregister($04030014);
+NR_S20 = tregister($04090014);
+NR_D20 = tregister($040a0014);
+NR_Q20 = tregister($04050014);
+NR_B21 = tregister($04010015);
+NR_H21 = tregister($04030015);
+NR_S21 = tregister($04090015);
+NR_D21 = tregister($040a0015);
+NR_Q21 = tregister($04050015);
+NR_B22 = tregister($04010016);
+NR_H22 = tregister($04030016);
+NR_S22 = tregister($04090016);
+NR_D22 = tregister($040a0016);
+NR_Q22 = tregister($04050016);
+NR_B23 = tregister($04010017);
+NR_H23 = tregister($04030017);
+NR_S23 = tregister($04090017);
+NR_D23 = tregister($040a0017);
+NR_Q23 = tregister($04050017);
+NR_B24 = tregister($04010018);
+NR_H24 = tregister($04030018);
+NR_S24 = tregister($04090018);
+NR_D24 = tregister($040a0018);
+NR_Q24 = tregister($04050018);
+NR_B25 = tregister($04010019);
+NR_H25 = tregister($04030019);
+NR_S25 = tregister($04090019);
+NR_D25 = tregister($040a0019);
+NR_Q25 = tregister($04050019);
+NR_B26 = tregister($0401001A);
+NR_H26 = tregister($0403001A);
+NR_S26 = tregister($0409001A);
+NR_D26 = tregister($040a001A);
+NR_Q26 = tregister($0405001A);
+NR_B27 = tregister($0401001B);
+NR_H27 = tregister($0403001B);
+NR_S27 = tregister($0409001B);
+NR_D27 = tregister($040a001B);
+NR_Q27 = tregister($0405001B);
+NR_B28 = tregister($0401001C);
+NR_H28 = tregister($0403001C);
+NR_S28 = tregister($0409001C);
+NR_D28 = tregister($040a001C);
+NR_Q28 = tregister($0405001C);
+NR_B29 = tregister($0401001D);
+NR_H29 = tregister($0403001D);
+NR_S29 = tregister($0409001D);
+NR_D29 = tregister($040a001D);
+NR_Q29 = tregister($0405001D);
+NR_B30 = tregister($0401001E);
+NR_H30 = tregister($0403001E);
+NR_S30 = tregister($0409001E);
+NR_D30 = tregister($040a001E);
+NR_Q30 = tregister($0405001E);
+NR_B31 = tregister($0401001F);
+NR_H31 = tregister($0403001F);
+NR_S31 = tregister($0409001F);
+NR_D31 = tregister($040a001F);
+NR_Q31 = tregister($0405001F);
+NR_NZCV = tregister($05000000);

+ 227 - 0
compiler/aarch64/ra64dwa.inc

@@ -0,0 +1,227 @@
+{ don't edit, this file is generated from a64reg.dat }
+-1,
+0,
+0,
+1,
+1,
+2,
+2,
+3,
+3,
+4,
+4,
+5,
+5,
+6,
+6,
+7,
+7,
+8,
+8,
+9,
+9,
+10,
+10,
+11,
+11,
+12,
+12,
+13,
+13,
+14,
+14,
+15,
+15,
+16,
+16,
+17,
+17,
+18,
+18,
+19,
+19,
+20,
+20,
+21,
+21,
+22,
+22,
+23,
+23,
+24,
+24,
+25,
+25,
+26,
+26,
+27,
+27,
+28,
+28,
+29,
+29,
+30,
+30,
+31,
+31,
+0,
+0,
+0,
+0,
+0,
+1,
+1,
+1,
+1,
+1,
+2,
+2,
+2,
+2,
+2,
+3,
+3,
+3,
+3,
+3,
+4,
+4,
+4,
+4,
+4,
+5,
+5,
+5,
+5,
+5,
+6,
+6,
+6,
+6,
+6,
+7,
+7,
+7,
+7,
+7,
+8,
+8,
+8,
+8,
+8,
+9,
+9,
+9,
+9,
+9,
+10,
+10,
+10,
+10,
+10,
+11,
+11,
+11,
+11,
+11,
+12,
+12,
+12,
+12,
+12,
+13,
+13,
+13,
+13,
+13,
+14,
+14,
+14,
+14,
+14,
+15,
+15,
+15,
+15,
+15,
+16,
+16,
+16,
+16,
+16,
+17,
+17,
+17,
+17,
+17,
+18,
+18,
+18,
+18,
+18,
+19,
+19,
+19,
+19,
+19,
+20,
+20,
+20,
+20,
+20,
+21,
+21,
+21,
+21,
+21,
+22,
+22,
+22,
+22,
+22,
+23,
+23,
+23,
+23,
+23,
+24,
+24,
+24,
+24,
+24,
+25,
+25,
+25,
+25,
+25,
+26,
+26,
+26,
+26,
+26,
+27,
+27,
+27,
+27,
+27,
+28,
+28,
+28,
+28,
+28,
+29,
+29,
+29,
+29,
+29,
+30,
+30,
+30,
+30,
+30,
+31,
+31,
+31,
+31,
+31,
+0

+ 2 - 0
compiler/aarch64/ra64nor.inc

@@ -0,0 +1,2 @@
+{ don't edit, this file is generated from a64reg.dat }
+226

+ 227 - 0
compiler/aarch64/ra64num.inc

@@ -0,0 +1,227 @@
+{ don't edit, this file is generated from a64reg.dat }
+tregister($00000000),
+tregister($01040000),
+tregister($01050000),
+tregister($01040001),
+tregister($01050001),
+tregister($01040002),
+tregister($01050002),
+tregister($01040003),
+tregister($01050003),
+tregister($01040004),
+tregister($01050004),
+tregister($01040005),
+tregister($01050005),
+tregister($01040006),
+tregister($01050006),
+tregister($01040007),
+tregister($01050007),
+tregister($01040008),
+tregister($01050008),
+tregister($01040009),
+tregister($01050009),
+tregister($0104000A),
+tregister($0105000A),
+tregister($0104000B),
+tregister($0105000B),
+tregister($0104000C),
+tregister($0105000C),
+tregister($0104000D),
+tregister($0105000D),
+tregister($0104000E),
+tregister($0105000E),
+tregister($0104000F),
+tregister($0105000F),
+tregister($01040010),
+tregister($01050010),
+tregister($01040011),
+tregister($01050011),
+tregister($01040012),
+tregister($01050012),
+tregister($01040013),
+tregister($01050013),
+tregister($01040014),
+tregister($01050014),
+tregister($01040015),
+tregister($01050015),
+tregister($01040016),
+tregister($01050016),
+tregister($01040017),
+tregister($01050017),
+tregister($01040018),
+tregister($01050018),
+tregister($01040019),
+tregister($01050019),
+tregister($0104001A),
+tregister($0105001A),
+tregister($0104001B),
+tregister($0105001B),
+tregister($0104001C),
+tregister($0105001C),
+tregister($0104001D),
+tregister($0105001D),
+tregister($0104001E),
+tregister($0105001E),
+tregister($0104001F),
+tregister($0105001F),
+tregister($04010000),
+tregister($04030000),
+tregister($04090000),
+tregister($040a0000),
+tregister($04050000),
+tregister($04010001),
+tregister($04030001),
+tregister($04090001),
+tregister($040a0001),
+tregister($04050001),
+tregister($04010002),
+tregister($04030002),
+tregister($04090002),
+tregister($040a0002),
+tregister($04050002),
+tregister($04010003),
+tregister($04030003),
+tregister($04090003),
+tregister($040a0003),
+tregister($04050003),
+tregister($04010004),
+tregister($04030004),
+tregister($04090004),
+tregister($040a0004),
+tregister($04050004),
+tregister($04010005),
+tregister($04030005),
+tregister($04090005),
+tregister($040a0005),
+tregister($04050005),
+tregister($04010006),
+tregister($04030006),
+tregister($04090006),
+tregister($040a0006),
+tregister($04050006),
+tregister($04010007),
+tregister($04030007),
+tregister($04090007),
+tregister($040a0007),
+tregister($04050007),
+tregister($04010008),
+tregister($04030008),
+tregister($04090008),
+tregister($040a0008),
+tregister($04050008),
+tregister($04010009),
+tregister($04030009),
+tregister($04090009),
+tregister($040a0009),
+tregister($04050009),
+tregister($0401000A),
+tregister($0403000A),
+tregister($0409000A),
+tregister($040a000A),
+tregister($0405000A),
+tregister($0401000B),
+tregister($0403000B),
+tregister($0409000B),
+tregister($040a000B),
+tregister($0405000B),
+tregister($0401000C),
+tregister($0403000C),
+tregister($0409000C),
+tregister($040a000C),
+tregister($0405000C),
+tregister($0401000D),
+tregister($0403000D),
+tregister($0409000D),
+tregister($040a000D),
+tregister($0405000D),
+tregister($0401000E),
+tregister($0403000E),
+tregister($0409000E),
+tregister($040a000E),
+tregister($0405000E),
+tregister($0401000F),
+tregister($0403000F),
+tregister($0409000F),
+tregister($040a000F),
+tregister($0405000F),
+tregister($04010010),
+tregister($04030010),
+tregister($04090010),
+tregister($040a0010),
+tregister($04050010),
+tregister($04010011),
+tregister($04030011),
+tregister($04090011),
+tregister($040a0011),
+tregister($04050011),
+tregister($04010012),
+tregister($04030012),
+tregister($04090012),
+tregister($040a0012),
+tregister($04050012),
+tregister($04010013),
+tregister($04030013),
+tregister($04090013),
+tregister($040a0013),
+tregister($04050013),
+tregister($04010014),
+tregister($04030014),
+tregister($04090014),
+tregister($040a0014),
+tregister($04050014),
+tregister($04010015),
+tregister($04030015),
+tregister($04090015),
+tregister($040a0015),
+tregister($04050015),
+tregister($04010016),
+tregister($04030016),
+tregister($04090016),
+tregister($040a0016),
+tregister($04050016),
+tregister($04010017),
+tregister($04030017),
+tregister($04090017),
+tregister($040a0017),
+tregister($04050017),
+tregister($04010018),
+tregister($04030018),
+tregister($04090018),
+tregister($040a0018),
+tregister($04050018),
+tregister($04010019),
+tregister($04030019),
+tregister($04090019),
+tregister($040a0019),
+tregister($04050019),
+tregister($0401001A),
+tregister($0403001A),
+tregister($0409001A),
+tregister($040a001A),
+tregister($0405001A),
+tregister($0401001B),
+tregister($0403001B),
+tregister($0409001B),
+tregister($040a001B),
+tregister($0405001B),
+tregister($0401001C),
+tregister($0403001C),
+tregister($0409001C),
+tregister($040a001C),
+tregister($0405001C),
+tregister($0401001D),
+tregister($0403001D),
+tregister($0409001D),
+tregister($040a001D),
+tregister($0405001D),
+tregister($0401001E),
+tregister($0403001E),
+tregister($0409001E),
+tregister($040a001E),
+tregister($0405001E),
+tregister($0401001F),
+tregister($0403001F),
+tregister($0409001F),
+tregister($040a001F),
+tregister($0405001F),
+tregister($05000000)

+ 227 - 0
compiler/aarch64/ra64rni.inc

@@ -0,0 +1,227 @@
+{ don't edit, this file is generated from a64reg.dat }
+0,
+1,
+3,
+5,
+7,
+9,
+11,
+13,
+15,
+17,
+19,
+21,
+23,
+25,
+27,
+29,
+31,
+33,
+35,
+37,
+39,
+41,
+43,
+45,
+47,
+49,
+51,
+53,
+55,
+57,
+59,
+61,
+63,
+2,
+4,
+6,
+8,
+10,
+12,
+14,
+16,
+18,
+20,
+22,
+24,
+26,
+28,
+30,
+32,
+34,
+36,
+38,
+40,
+42,
+44,
+46,
+48,
+50,
+52,
+54,
+56,
+58,
+60,
+62,
+64,
+65,
+70,
+75,
+80,
+85,
+90,
+95,
+100,
+105,
+110,
+115,
+120,
+125,
+130,
+135,
+140,
+145,
+150,
+155,
+160,
+165,
+170,
+175,
+180,
+185,
+190,
+195,
+200,
+205,
+210,
+215,
+220,
+66,
+71,
+76,
+81,
+86,
+91,
+96,
+101,
+106,
+111,
+116,
+121,
+126,
+131,
+136,
+141,
+146,
+151,
+156,
+161,
+166,
+171,
+176,
+181,
+186,
+191,
+196,
+201,
+206,
+211,
+216,
+221,
+69,
+74,
+79,
+84,
+89,
+94,
+99,
+104,
+109,
+114,
+119,
+124,
+129,
+134,
+139,
+144,
+149,
+154,
+159,
+164,
+169,
+174,
+179,
+184,
+189,
+194,
+199,
+204,
+209,
+214,
+219,
+224,
+67,
+72,
+77,
+82,
+87,
+92,
+97,
+102,
+107,
+112,
+117,
+122,
+127,
+132,
+137,
+142,
+147,
+152,
+157,
+162,
+167,
+172,
+177,
+182,
+187,
+192,
+197,
+202,
+207,
+212,
+217,
+222,
+68,
+73,
+78,
+83,
+88,
+93,
+98,
+103,
+108,
+113,
+118,
+123,
+128,
+133,
+138,
+143,
+148,
+153,
+158,
+163,
+168,
+173,
+178,
+183,
+188,
+193,
+198,
+203,
+208,
+213,
+218,
+223,
+225

+ 227 - 0
compiler/aarch64/ra64sri.inc

@@ -0,0 +1,227 @@
+{ don't edit, this file is generated from a64reg.dat }
+0,
+65,
+70,
+115,
+120,
+125,
+130,
+135,
+140,
+145,
+150,
+155,
+160,
+75,
+165,
+170,
+175,
+180,
+185,
+190,
+195,
+200,
+205,
+210,
+80,
+215,
+220,
+85,
+90,
+95,
+100,
+105,
+110,
+68,
+73,
+118,
+123,
+128,
+133,
+138,
+143,
+148,
+153,
+158,
+163,
+78,
+168,
+173,
+178,
+183,
+188,
+193,
+198,
+203,
+208,
+213,
+83,
+218,
+223,
+88,
+93,
+98,
+103,
+108,
+113,
+66,
+71,
+116,
+121,
+126,
+131,
+136,
+141,
+146,
+151,
+156,
+161,
+76,
+166,
+171,
+176,
+181,
+186,
+191,
+196,
+201,
+206,
+211,
+81,
+216,
+221,
+86,
+91,
+96,
+101,
+106,
+111,
+225,
+69,
+74,
+119,
+124,
+129,
+134,
+139,
+144,
+149,
+154,
+159,
+164,
+79,
+169,
+174,
+179,
+184,
+189,
+194,
+199,
+204,
+209,
+214,
+84,
+219,
+224,
+89,
+94,
+99,
+104,
+109,
+114,
+67,
+72,
+117,
+122,
+127,
+132,
+137,
+142,
+147,
+152,
+157,
+162,
+77,
+167,
+172,
+177,
+182,
+187,
+192,
+197,
+202,
+207,
+212,
+82,
+217,
+222,
+87,
+92,
+97,
+102,
+107,
+112,
+1,
+3,
+21,
+23,
+25,
+27,
+29,
+31,
+33,
+35,
+37,
+39,
+5,
+41,
+43,
+45,
+47,
+49,
+51,
+53,
+55,
+57,
+59,
+7,
+61,
+9,
+11,
+13,
+15,
+17,
+19,
+63,
+2,
+4,
+22,
+24,
+26,
+28,
+30,
+32,
+34,
+36,
+38,
+40,
+6,
+42,
+44,
+46,
+48,
+50,
+52,
+54,
+56,
+58,
+60,
+8,
+62,
+10,
+12,
+14,
+16,
+18,
+20,
+64

+ 227 - 0
compiler/aarch64/ra64sta.inc

@@ -0,0 +1,227 @@
+{ don't edit, this file is generated from a64reg.dat }
+-1,
+0,
+0,
+1,
+1,
+2,
+2,
+3,
+3,
+4,
+4,
+5,
+5,
+6,
+6,
+7,
+7,
+8,
+8,
+9,
+9,
+10,
+10,
+11,
+11,
+12,
+12,
+13,
+13,
+14,
+14,
+15,
+15,
+16,
+16,
+17,
+17,
+18,
+18,
+19,
+19,
+20,
+20,
+21,
+21,
+22,
+22,
+23,
+23,
+24,
+24,
+25,
+25,
+26,
+26,
+27,
+27,
+28,
+28,
+29,
+29,
+30,
+30,
+31,
+31,
+0,
+0,
+0,
+0,
+0,
+1,
+1,
+1,
+1,
+1,
+2,
+2,
+2,
+2,
+2,
+3,
+3,
+3,
+3,
+3,
+4,
+4,
+4,
+4,
+4,
+5,
+5,
+5,
+5,
+5,
+6,
+6,
+6,
+6,
+6,
+7,
+7,
+7,
+7,
+7,
+8,
+8,
+8,
+8,
+8,
+9,
+9,
+9,
+9,
+9,
+10,
+10,
+10,
+10,
+10,
+11,
+11,
+11,
+11,
+11,
+12,
+12,
+12,
+12,
+12,
+13,
+13,
+13,
+13,
+13,
+14,
+14,
+14,
+14,
+14,
+15,
+15,
+15,
+15,
+15,
+16,
+16,
+16,
+16,
+16,
+17,
+17,
+17,
+17,
+17,
+18,
+18,
+18,
+18,
+18,
+19,
+19,
+19,
+19,
+19,
+20,
+20,
+20,
+20,
+20,
+21,
+21,
+21,
+21,
+21,
+22,
+22,
+22,
+22,
+22,
+23,
+23,
+23,
+23,
+23,
+24,
+24,
+24,
+24,
+24,
+25,
+25,
+25,
+25,
+25,
+26,
+26,
+26,
+26,
+26,
+27,
+27,
+27,
+27,
+27,
+28,
+28,
+28,
+28,
+28,
+29,
+29,
+29,
+29,
+29,
+30,
+30,
+30,
+30,
+30,
+31,
+31,
+31,
+31,
+31,
+0

+ 227 - 0
compiler/aarch64/ra64std.inc

@@ -0,0 +1,227 @@
+{ don't edit, this file is generated from a64reg.dat }
+'INVALID',
+'w0',
+'x0',
+'w1',
+'x1',
+'w2',
+'x2',
+'w3',
+'x3',
+'w4',
+'x4',
+'w5',
+'x5',
+'w6',
+'x6',
+'w7',
+'x7',
+'w8',
+'x8',
+'w9',
+'x9',
+'w10',
+'x10',
+'w11',
+'x11',
+'w12',
+'x12',
+'w13',
+'x13',
+'w14',
+'x14',
+'w15',
+'x15',
+'w16',
+'x16',
+'w17',
+'x17',
+'w18',
+'x18',
+'w19',
+'x19',
+'w20',
+'x20',
+'w21',
+'x21',
+'w22',
+'x22',
+'w23',
+'x23',
+'w24',
+'x24',
+'w25',
+'x25',
+'w26',
+'x26',
+'w27',
+'x27',
+'w28',
+'x28',
+'w29',
+'x29',
+'w30',
+'x30',
+'wzr',
+'xzr',
+'b0',
+'h0',
+'s0',
+'d0',
+'q0',
+'b1',
+'h1',
+'s1',
+'d1',
+'q1',
+'b2',
+'h2',
+'s2',
+'d2',
+'q2',
+'b3',
+'h3',
+'s3',
+'d3',
+'q3',
+'b4',
+'h4',
+'s4',
+'d4',
+'q4',
+'b5',
+'h5',
+'s5',
+'d5',
+'q5',
+'b6',
+'h6',
+'s6',
+'d6',
+'q6',
+'b7',
+'h7',
+'s7',
+'d7',
+'q7',
+'b8',
+'h8',
+'s8',
+'d8',
+'q8',
+'b9',
+'h9',
+'s9',
+'d9',
+'q9',
+'b10',
+'h10',
+'s10',
+'d10',
+'q10',
+'b11',
+'h11',
+'s11',
+'d11',
+'q11',
+'b12',
+'h12',
+'s12',
+'d12',
+'q12',
+'b13',
+'h13',
+'s13',
+'d13',
+'q13',
+'b14',
+'h14',
+'s14',
+'d14',
+'q14',
+'b15',
+'h15',
+'s15',
+'d15',
+'q15',
+'b16',
+'h16',
+'s16',
+'d16',
+'q16',
+'b17',
+'h17',
+'s17',
+'d17',
+'q17',
+'b18',
+'h18',
+'s18',
+'d18',
+'q18',
+'b19',
+'h19',
+'s19',
+'d19',
+'q19',
+'b20',
+'h20',
+'s20',
+'d20',
+'q20',
+'b21',
+'h21',
+'s21',
+'d21',
+'q21',
+'b22',
+'h22',
+'s22',
+'d22',
+'q22',
+'b23',
+'h23',
+'s23',
+'d23',
+'q23',
+'b24',
+'h24',
+'s24',
+'d24',
+'q24',
+'b25',
+'h25',
+'s25',
+'d25',
+'q25',
+'b26',
+'h26',
+'s26',
+'d26',
+'q26',
+'b27',
+'h27',
+'s27',
+'d27',
+'q27',
+'b28',
+'h28',
+'s28',
+'d28',
+'q28',
+'b29',
+'h29',
+'s29',
+'d29',
+'q29',
+'b30',
+'h30',
+'s30',
+'d30',
+'q30',
+'b31',
+'h31',
+'s31',
+'d31',
+'q31',
+'nzcv'

+ 227 - 0
compiler/aarch64/ra64sup.inc

@@ -0,0 +1,227 @@
+{ don't edit, this file is generated from a64reg.dat }
+RS_NO = $00;
+RS_W0 = $00;
+RS_X0 = $00;
+RS_W1 = $01;
+RS_X1 = $01;
+RS_W2 = $02;
+RS_X2 = $02;
+RS_W3 = $03;
+RS_X3 = $03;
+RS_W4 = $04;
+RS_X4 = $04;
+RS_W5 = $05;
+RS_X5 = $05;
+RS_W6 = $06;
+RS_X6 = $06;
+RS_W7 = $07;
+RS_X7 = $07;
+RS_W8 = $08;
+RS_X8 = $08;
+RS_W9 = $09;
+RS_X9 = $09;
+RS_W10 = $0A;
+RS_X10 = $0A;
+RS_W11 = $0B;
+RS_X11 = $0B;
+RS_W12 = $0C;
+RS_X12 = $0C;
+RS_W13 = $0D;
+RS_X13 = $0D;
+RS_W14 = $0E;
+RS_X14 = $0E;
+RS_W15 = $0F;
+RS_X15 = $0F;
+RS_W16 = $10;
+RS_X16 = $10;
+RS_W17 = $11;
+RS_X17 = $11;
+RS_W18 = $12;
+RS_X18 = $12;
+RS_W19 = $13;
+RS_X19 = $13;
+RS_W20 = $14;
+RS_X20 = $14;
+RS_W21 = $15;
+RS_X21 = $15;
+RS_W22 = $16;
+RS_X22 = $16;
+RS_W23 = $17;
+RS_X23 = $17;
+RS_W24 = $18;
+RS_X24 = $18;
+RS_W25 = $19;
+RS_X25 = $19;
+RS_W26 = $1A;
+RS_X26 = $1A;
+RS_W27 = $1B;
+RS_X27 = $1B;
+RS_W28 = $1C;
+RS_X28 = $1C;
+RS_W29 = $1D;
+RS_X29 = $1D;
+RS_W30 = $1E;
+RS_X30 = $1E;
+RS_WZR = $1F;
+RS_XZR = $1F;
+RS_B0 = $00;
+RS_H0 = $00;
+RS_S0 = $00;
+RS_D0 = $00;
+RS_Q0 = $00;
+RS_B1 = $01;
+RS_H1 = $01;
+RS_S1 = $01;
+RS_D1 = $01;
+RS_Q1 = $01;
+RS_B2 = $02;
+RS_H2 = $02;
+RS_S2 = $02;
+RS_D2 = $02;
+RS_Q2 = $02;
+RS_B3 = $03;
+RS_H3 = $03;
+RS_S3 = $03;
+RS_D3 = $03;
+RS_Q3 = $03;
+RS_B4 = $04;
+RS_H4 = $04;
+RS_S4 = $04;
+RS_D4 = $04;
+RS_Q4 = $04;
+RS_B5 = $05;
+RS_H5 = $05;
+RS_S5 = $05;
+RS_D5 = $05;
+RS_Q5 = $05;
+RS_B6 = $06;
+RS_H6 = $06;
+RS_S6 = $06;
+RS_D6 = $06;
+RS_Q6 = $06;
+RS_B7 = $07;
+RS_H7 = $07;
+RS_S7 = $07;
+RS_D7 = $07;
+RS_Q7 = $07;
+RS_B8 = $08;
+RS_H8 = $08;
+RS_S8 = $08;
+RS_D8 = $08;
+RS_Q8 = $08;
+RS_B9 = $09;
+RS_H9 = $09;
+RS_S9 = $09;
+RS_D9 = $09;
+RS_Q9 = $09;
+RS_B10 = $0A;
+RS_H10 = $0A;
+RS_S10 = $0A;
+RS_D10 = $0A;
+RS_Q10 = $0A;
+RS_B11 = $0B;
+RS_H11 = $0B;
+RS_S11 = $0B;
+RS_D11 = $0B;
+RS_Q11 = $0B;
+RS_B12 = $0C;
+RS_H12 = $0C;
+RS_S12 = $0C;
+RS_D12 = $0C;
+RS_Q12 = $0C;
+RS_B13 = $0D;
+RS_H13 = $0D;
+RS_S13 = $0D;
+RS_D13 = $0D;
+RS_Q13 = $0D;
+RS_B14 = $0E;
+RS_H14 = $0E;
+RS_S14 = $0E;
+RS_D14 = $0E;
+RS_Q14 = $0E;
+RS_B15 = $0F;
+RS_H15 = $0F;
+RS_S15 = $0F;
+RS_D15 = $0F;
+RS_Q15 = $0F;
+RS_B16 = $10;
+RS_H16 = $10;
+RS_S16 = $10;
+RS_D16 = $10;
+RS_Q16 = $10;
+RS_B17 = $11;
+RS_H17 = $11;
+RS_S17 = $11;
+RS_D17 = $11;
+RS_Q17 = $11;
+RS_B18 = $12;
+RS_H18 = $12;
+RS_S18 = $12;
+RS_D18 = $12;
+RS_Q18 = $12;
+RS_B19 = $13;
+RS_H19 = $13;
+RS_S19 = $13;
+RS_D19 = $13;
+RS_Q19 = $13;
+RS_B20 = $14;
+RS_H20 = $14;
+RS_S20 = $14;
+RS_D20 = $14;
+RS_Q20 = $14;
+RS_B21 = $15;
+RS_H21 = $15;
+RS_S21 = $15;
+RS_D21 = $15;
+RS_Q21 = $15;
+RS_B22 = $16;
+RS_H22 = $16;
+RS_S22 = $16;
+RS_D22 = $16;
+RS_Q22 = $16;
+RS_B23 = $17;
+RS_H23 = $17;
+RS_S23 = $17;
+RS_D23 = $17;
+RS_Q23 = $17;
+RS_B24 = $18;
+RS_H24 = $18;
+RS_S24 = $18;
+RS_D24 = $18;
+RS_Q24 = $18;
+RS_B25 = $19;
+RS_H25 = $19;
+RS_S25 = $19;
+RS_D25 = $19;
+RS_Q25 = $19;
+RS_B26 = $1A;
+RS_H26 = $1A;
+RS_S26 = $1A;
+RS_D26 = $1A;
+RS_Q26 = $1A;
+RS_B27 = $1B;
+RS_H27 = $1B;
+RS_S27 = $1B;
+RS_D27 = $1B;
+RS_Q27 = $1B;
+RS_B28 = $1C;
+RS_H28 = $1C;
+RS_S28 = $1C;
+RS_D28 = $1C;
+RS_Q28 = $1C;
+RS_B29 = $1D;
+RS_H29 = $1D;
+RS_S29 = $1D;
+RS_D29 = $1D;
+RS_Q29 = $1D;
+RS_B30 = $1E;
+RS_H30 = $1E;
+RS_S30 = $1E;
+RS_D30 = $1E;
+RS_Q30 = $1E;
+RS_B31 = $1F;
+RS_H31 = $1F;
+RS_S31 = $1F;
+RS_D31 = $1F;
+RS_Q31 = $1F;
+RS_NZCV = $00;

+ 5 - 2
compiler/aasmbase.pas

@@ -48,7 +48,11 @@ interface
            the address of this code label is taken somewhere in the code
            so it must be taken care of it when creating pic
          }
-         AT_ADDR
+         AT_ADDR,
+         { Thread-local symbol (ELF targets) }
+         AT_TLS,
+         { GNU indirect function (ELF targets) }
+         AT_GNU_IFUNC
          );
 
        { is the label only there for getting an DataOffset (e.g. for i/o
@@ -204,7 +208,6 @@ interface
 implementation
 
     uses
-      SysUtils,
       verbose;
 
 

+ 7 - 5
compiler/aasmdata.pas

@@ -33,11 +33,10 @@ interface
 
     uses
        cutils,cclasses,
-       globtype,globals,systems,
-       cpuinfo,cpubase,
-       cgbase,cgutils,
+       globtype,systems,
+       cgbase,
        symtype,
-       aasmbase,ogbase;
+       aasmbase;
 
     type
       { Type of AsmLists. The order is important for the layout of the
@@ -484,7 +483,10 @@ implementation
     procedure TAsmData.getlabel(out l : TAsmLabel;alt:TAsmLabeltype);
       begin
         if (target_info.system in (systems_linux + systems_bsd)) and
-           (cs_create_smart in current_settings.moduleswitches) and
+           { the next condition was
+             (cs_create_smart in current_settings.moduleswitches) and
+             but if we create_smartlink_sections, this is useless }
+           (create_smartlink_library) and
            (alt = alt_dbgline) then
           l:=TAsmLabel.createglobal(AsmSymbolDict,name,FNextLabelNr[alt],alt)
         else

+ 212 - 26
compiler/aasmtai.pas

@@ -85,7 +85,10 @@ interface
 {$endif m68k}
 {$ifdef arm}
           ait_thumb_func,
+          ait_thumb_set,
 {$endif arm}
+          ait_set,
+          ait_weak,
           { used to split into tiny assembler files }
           ait_cutobject,
           ait_regalloc,
@@ -94,11 +97,13 @@ interface
           ait_marker,
           { used to describe a new location of a variable }
           ait_varloc,
-          { SEH directives used in ARM,MIPS and x86_64 COFF targets }
-          ait_seh_directive,
+{$ifdef JVM}
           { JVM only }
           ait_jvar,    { debug information for a local variable }
-          ait_jcatch   { exception catch clause }
+          ait_jcatch,  { exception catch clause }
+{$endif JVM}
+          { SEH directives used in ARM,MIPS and x86_64 COFF targets }
+          ait_seh_directive
           );
 
         taiconst_type = (
@@ -129,14 +134,20 @@ interface
           aitconst_darwin_dwarf_delta64,
           aitconst_darwin_dwarf_delta32,
           { ARM Thumb-2 only }
-          aitconst_half16bit { used for table jumps. The actual value is the 16bit value shifted left once }
+          aitconst_half16bit, { used for table jumps. The actual value is the 16bit value shifted left once }
+          { for use by dwarf debugger information }
+          aitconst_16bit_unaligned,
+          aitconst_32bit_unaligned,
+          aitconst_64bit_unaligned
         );
 
     const
 {$ifdef cpu64bitaddr}
        aitconst_ptr = aitconst_64bit;
+       aitconst_ptr_unaligned = aitconst_64bit_unaligned;
 {$else cpu64bitaddr}
        aitconst_ptr = aitconst_32bit;
+       aitconst_ptr_unaligned = aitconst_32bit_unaligned;
 {$endif cpu64bitaddr}
 
 {$ifdef cpu64bitalu}
@@ -181,15 +192,20 @@ interface
 {$endif m68k}
 {$ifdef arm}
           'thumb_func',
+          'thumb_set',
 {$endif arm}
+          'set',
+          'weak',
           'cut',
           'regalloc',
           'tempalloc',
           'marker',
           'varloc',
-          'seh_directive',
+{$ifdef JVM}
           'jvar',
-          'jcatch'
+          'jcatch',
+{$endif JVM}
+          'seh_directive'
           );
 
     type
@@ -198,10 +214,13 @@ interface
 {$ifdef arm}
        { ARM only }
        ,top_regset
-       ,top_shifterop
        ,top_conditioncode
        ,top_modeflags
+       ,top_specialreg
 {$endif arm}
+{$if defined(arm) or defined(aarch64)}
+       ,top_shifterop
+{$endif defined(arm) or defined(aarch64)}
 {$ifdef m68k}
        { m68k only }
        ,top_regset
@@ -241,11 +260,14 @@ interface
           { local varsym that will be inserted in pass_generate_code }
           top_local  : (localoper:plocaloper);
       {$ifdef arm}
-          top_regset : (regset:^tcpuregisterset; regtyp: tregistertype; subreg: tsubregister);
-          top_shifterop : (shifterop : pshifterop);
+          top_regset : (regset:^tcpuregisterset; regtyp: tregistertype; subreg: tsubregister; usermode: boolean);
           top_conditioncode : (cc : TAsmCond);
           top_modeflags : (modeflags : tcpumodeflags);
+          top_specialreg : (specialreg:tregister; specialflags:tspecialregflags);
       {$endif arm}
+      {$if defined(arm) or defined(aarch64)}
+          top_shifterop : (shifterop : pshifterop);
+      {$endif defined(arm) or defined(aarch64)}
       {$ifdef m68k}
           top_regset : (regset:^tcpuregisterset);
       {$endif m68k}
@@ -265,10 +287,13 @@ interface
         a new ait type!                                                              }
       SkipInstr = [ait_comment, ait_symbol,ait_section
                    ,ait_stab, ait_function_name, ait_force_line
-                   ,ait_regalloc, ait_tempalloc, ait_symbol_end 
+                   ,ait_regalloc, ait_tempalloc, ait_symbol_end
 				   ,ait_ent, ait_ent_end, ait_directive
-                   ,ait_varloc,ait_seh_directive
-                   ,ait_jvar, ait_jcatch];
+                   ,ait_varloc,
+{$ifdef JVM}
+                   ait_jvar, ait_jcatch,
+{$endif JVM}
+                   ait_seh_directive];
 
       { ait_* types which do not have line information (and hence which are of type
         tai, otherwise, they are of type tailineinfo }
@@ -280,11 +305,15 @@ interface
 					 ait_ent, ait_ent_end,
 {$ifdef arm}
                      ait_thumb_func,
+                     ait_thumb_set,
 {$endif arm}
+                     ait_set,ait_weak,
                      ait_real_32bit,ait_real_64bit,ait_real_80bit,ait_comp_64bit,ait_real_128bit,
                      ait_symbol,
-                     ait_seh_directive,
-                     ait_jvar,ait_jcatch
+{$ifdef JVM}
+                     ait_jvar, ait_jcatch,
+{$endif JVM}
+                     ait_seh_directive
                     ];
 
 
@@ -426,13 +455,13 @@ interface
           procedure derefimpl;override;
        end;
 
-	   tai_ent = class(tai)
-	      Name : string;
+       tai_ent = class(tai)
+          Name : string;
           Constructor Create (const ProcName : String);
        end;
 
        tai_ent_end = class(tai)
-	      Name : string;
+          Name : string;
           Constructor Create (const ProcName : String);
        end;
 
@@ -446,8 +475,13 @@ interface
 
        { Generates an assembler label }
        tai_label = class(tai)
-          is_global : boolean;
           labsym    : tasmlabel;
+          is_global : boolean;
+{$ifdef arm}
+          { set to true when the label has been moved by insertpcrelativedata to the correct location
+            so one label can be used multiple times }
+          moved     : boolean;
+{$endif arm}
           constructor Create(_labsym : tasmlabel);
           constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
           procedure ppuwrite(ppufile:tcompilerppufile);override;
@@ -513,6 +547,9 @@ interface
           constructor Create_64bit(_value : int64);
           constructor Create_32bit(_value : longint);
           constructor Create_16bit(_value : word);
+          constructor Create_64bit_unaligned(_value : int64);
+          constructor Create_32bit_unaligned(_value : longint);
+          constructor Create_16bit_unaligned(_value : word);
           constructor Create_8bit(_value : byte);
           constructor Create_char(size: integer; _value: dword);
           constructor Create_sleb128bit(_value : int64);
@@ -644,6 +681,8 @@ interface
        tai_regalloc = class(tai)
           reg     : tregister;
           ratype  : TRegAllocType;
+          { tells BuildLabelTableAndFixRegAlloc that the deallocation should be kept }
+          keep    : boolean;
           { reg(de)alloc belongs to this instruction, this
             is only used for automatic inserted (de)alloc for
             imaginary register and required for spilling code }
@@ -731,11 +770,16 @@ interface
         tai_align_class = class of tai_align_abstract;
 
         tai_varloc = class(tai)
+           oldlocation,
+           oldlocationhi,
            newlocation,
            newlocationhi : tregister;
            varsym : tsym;
            constructor create(sym : tsym;loc : tregister);
            constructor create64(sym : tsym;loc,lochi : tregister);
+{$ifdef cpu64bitalu}
+           constructor create128(sym : tsym;loc,lochi : tregister);
+{$endif cpu64bitalu}
            constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
            procedure ppuwrite(ppufile:tcompilerppufile);override;
            procedure buildderefimpl;override;
@@ -767,6 +811,7 @@ interface
         end;
         tai_seh_directive_class=class of tai_seh_directive;
 
+{$ifdef JVM}
         { JVM variable live range description }
         tai_jvar = class(tai)
           stackslot: longint;
@@ -791,6 +836,30 @@ interface
           procedure ppuwrite(ppufile:tcompilerppufile);override;
         end;
         tai_jcatch_class = class of tai_jcatch;
+{$endif JVM}
+
+        tai_set = class(tai)
+          sym,
+          value: pshortstring;
+          constructor create(const asym, avalue: string);
+          destructor destroy;override;
+          constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
+          procedure ppuwrite(ppufile:tcompilerppufile);override;
+        end;
+
+{$ifdef arm}
+        tai_thumb_set = class(tai_set)
+          constructor create(const asym, avalue: string);
+        end;
+{$endif arm}
+
+        tai_weak = class(tai)
+          sym: pshortstring;
+          constructor create(const asym: string);
+          destructor destroy;override;
+          constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
+          procedure ppuwrite(ppufile:tcompilerppufile);override;
+        end;
 
     var
       { array with all class types for tais }
@@ -819,8 +888,7 @@ implementation
     uses
       SysUtils,
       verbose,
-      globals,
-      fmodule;
+      globals;
 
     const
       pputaimarker = 254;
@@ -905,6 +973,69 @@ implementation
       end;
 
 
+    constructor tai_weak.create(const asym: string);
+      begin
+        inherited create;
+        typ:=ait_weak;
+        sym:=stringdup(asym);
+      end;
+
+    destructor tai_weak.destroy;
+      begin
+        stringdispose(sym);
+        inherited destroy;
+      end;
+
+    constructor tai_weak.ppuload(t: taitype; ppufile: tcompilerppufile);
+      begin
+        inherited ppuload(t,ppufile);
+        sym:=stringdup(ppufile.getstring);
+      end;
+
+    procedure tai_weak.ppuwrite(ppufile: tcompilerppufile);
+      begin
+        inherited ppuwrite(ppufile);
+        ppufile.putstring(sym^);
+      end;
+
+{$ifdef arm}
+    constructor tai_thumb_set.create(const asym, avalue: string);
+      begin
+        inherited create(asym, avalue);
+        typ:=ait_thumb_set;
+      end;
+{$endif arm}
+
+    constructor tai_set.create(const asym, avalue: string);
+      begin
+        inherited create;
+        typ:=ait_set;
+        sym:=stringdup(asym);
+        value:=stringdup(avalue);
+      end;
+
+    destructor tai_set.destroy;
+      begin
+        stringdispose(sym);
+        stringdispose(value);
+        inherited destroy;
+      end;
+
+    constructor tai_set.ppuload(t: taitype; ppufile: tcompilerppufile);
+      begin
+        inherited ppuload(t,ppufile);
+        sym:=stringdup(ppufile.getstring);
+        value:=stringdup(ppufile.getstring);
+      end;
+
+    procedure tai_set.ppuwrite(ppufile: tcompilerppufile);
+      begin
+        inherited ppuwrite(ppufile);
+        ppufile.putstring(sym^);
+        ppufile.putstring(value^);
+      end;
+
+
     constructor tai_varloc.create(sym: tsym; loc: tregister);
       begin
         inherited Create;
@@ -912,10 +1043,11 @@ implementation
         newlocation:=loc;
         newlocationhi:=NR_NO;
         varsym:=sym;
+        oldlocationhi:=NR_NO;
       end;
 
 
-    constructor tai_varloc.create64(sym: tsym; loc: tregister;lochi : tregister);
+    constructor tai_varloc.create64(sym: tsym; loc, lochi: tregister);
       begin
         inherited Create;
         typ:=ait_varloc;
@@ -925,6 +1057,18 @@ implementation
       end;
 
 
+{$ifdef cpu64bitalu}
+    constructor tai_varloc.create128(sym: tsym; loc, lochi: tregister);
+      begin
+        inherited Create;
+        typ:=ait_varloc;
+        newlocation:=loc;
+        newlocationhi:=lochi;
+        varsym:=sym;
+      end;
+{$endif cpu64bitalu}
+
+
     constructor tai_varloc.ppuload(t: taitype; ppufile: tcompilerppufile);
       begin
         inherited ppuload(t, ppufile);
@@ -1341,6 +1485,38 @@ implementation
          endsym:=nil;
       end;
 
+    constructor tai_const.Create_64bit_unaligned(_value : int64);
+      begin
+         inherited Create;
+         typ:=ait_const;
+         consttype:=aitconst_64bit_unaligned;
+         value:=_value;
+         sym:=nil;
+         endsym:=nil;
+      end;
+
+
+    constructor tai_const.Create_32bit_unaligned(_value : longint);
+      begin
+         inherited Create;
+         typ:=ait_const;
+         consttype:=aitconst_32bit_unaligned;
+         value:=_value;
+         sym:=nil;
+         endsym:=nil;
+      end;
+
+
+    constructor tai_const.Create_16bit_unaligned(_value : word);
+      begin
+         inherited Create;
+         typ:=ait_const;
+         consttype:=aitconst_16bit_unaligned;
+         value:=_value;
+         sym:=nil;
+         endsym:=nil;
+      end;
+
 
     constructor tai_const.Create_8bit(_value : byte);
       begin
@@ -1518,11 +1694,13 @@ implementation
         case consttype of
           aitconst_8bit :
             result:=1;
-          aitconst_16bit :
+          aitconst_16bit,aitconst_16bit_unaligned :
             result:=2;
-          aitconst_32bit,aitconst_darwin_dwarf_delta32:
+          aitconst_32bit,aitconst_darwin_dwarf_delta32,
+	  aitconst_32bit_unaligned:
             result:=4;
-          aitconst_64bit,aitconst_darwin_dwarf_delta64:
+          aitconst_64bit,aitconst_darwin_dwarf_delta64,
+	  aitconst_64bit_unaligned:
             result:=8;
           aitconst_secrel32_symbol,
           aitconst_rva_symbol :
@@ -2102,6 +2280,7 @@ implementation
         inherited ppuload(t,ppufile);
         ppufile.getdata(reg,sizeof(Tregister));
         ratype:=tregalloctype(ppufile.getbyte);
+        keep:=boolean(ppufile.getbyte);
       end;
 
 
@@ -2110,6 +2289,7 @@ implementation
         inherited ppuwrite(ppufile);
         ppufile.putdata(reg,sizeof(Tregister));
         ppufile.putbyte(byte(ratype));
+        ppufile.putbyte(byte(keep));
       end;
 
 
@@ -2285,6 +2465,10 @@ implementation
                 begin
                   new(ref);
                   ref^:=o.ref^;
+{$ifdef x86}
+                  if (ref^.segment<>NR_NO) and (ref^.segment<>NR_DS) then
+                    segprefix:=ref^.segment;
+{$endif x86}
                   if assigned(add_reg_instruction_hook) then
                     begin
                       add_reg_instruction_hook(self,ref^.base);
@@ -2389,14 +2573,14 @@ implementation
         { When the generic RA is used this needs to be overridden, we don't use
           virtual;abstract; to prevent a lot of warnings of unimplemented abstract methods
           when tai_cpu is created (PFV) }
-        internalerror(200404091);
+        internalerror(2004040901);
         result:=false;
       end;
 
 
     function tai_cpu_abstract.spilling_get_operation_type(opnr: longint): topertype;
       begin
-        internalerror(200404091);
+        internalerror(2004040902);
         result:=operand_readwrite;
       end;
 
@@ -2748,6 +2932,7 @@ implementation
       begin
       end;
 
+{$ifdef JVM}
 
 {****************************************************************************
                               tai_jvar
@@ -2838,6 +3023,7 @@ implementation
         ppufile.putasmsymbol(handlerlab);
       end;
 
+{$endif JVM}
 
 begin
 {$push}{$warnings off}

+ 142 - 16
compiler/aggas.pas

@@ -29,9 +29,8 @@ unit aggas;
 interface
 
     uses
-      cclasses,
       globtype,globals,
-      aasmbase,aasmtai,aasmdata,aasmcpu,
+      aasmbase,aasmtai,aasmdata,
       assemble;
 
     type
@@ -54,6 +53,7 @@ interface
         procedure WriteWeakSymbolDef(s: tasmsymbol); virtual;
         procedure WriteAixStringConst(hp: tai_string);
         procedure WriteAixIntConst(hp: tai_const);
+        procedure WriteUnalignedIntConst(hp: tai_const);
         procedure WriteDirectiveName(dir: TAsmDirective); virtual;
        public
         function MakeCmdLine: TCmdStr; override;
@@ -104,8 +104,14 @@ implementation
     uses
       SysUtils,
       cutils,cfileutl,systems,
-      fmodule,finput,verbose,
-      itcpugas,cpubase;
+      fmodule,verbose,
+{$ifdef TEST_WIN64_SEH}
+      itcpugas,
+{$endif TEST_WIN64_SEH}
+{$ifdef m68k}
+      cpuinfo,aasmcpu,
+{$endif m68k}
+      cpubase;
 
     const
       line_length = 70;
@@ -197,12 +203,38 @@ implementation
 
 
     const
-      ait_const2str : array[aitconst_128bit..aitconst_half16bit] of string[20]=(
+      ait_const2str : array[aitconst_128bit..aitconst_64bit_unaligned] of string[20]=(
         #9'.fixme128'#9,#9'.quad'#9,#9'.long'#9,#9'.short'#9,#9'.byte'#9,
         #9'.sleb128'#9,#9'.uleb128'#9,
-        #9'.rva'#9,#9'.secrel32'#9,#9'.quad'#9,#9'.long'#9,#9'.short'#9
+        #9'.rva'#9,#9'.secrel32'#9,#9'.quad'#9,#9'.long'#9,#9'.short'#9,
+        #9'.short'#9,#9'.long'#9,#9'.quad'#9
       );
 
+      ait_unaligned_consts = [aitconst_16bit_unaligned..aitconst_64bit_unaligned];
+
+      { Sparc type of unaligned pseudo-instructions }
+      use_ua_sparc_systems = [system_sparc_linux];
+      ait_ua_sparc_const2str : array[aitconst_16bit_unaligned..aitconst_64bit_unaligned]
+        of string[20]=(
+          #9'.uahalf'#9,#9'.uaword'#9,#9'.uaxword'#9
+        );
+
+      { Alpha type of unaligned pseudo-instructions }
+      use_ua_alpha_systems = [system_alpha_linux];
+      ait_ua_alpha_const2str : array[aitconst_16bit_unaligned..aitconst_64bit_unaligned]
+        of string[20]=(
+          #9'.uword'#9,#9'.ulong'#9,#9'.uquad'#9
+        );
+
+      { Generic unaligned pseudo-instructions, seems ELF specific }
+      use_ua_elf_systems = [system_mipsel_linux,system_mipseb_linux];
+      ait_ua_elf_const2str : array[aitconst_16bit_unaligned..aitconst_64bit_unaligned]
+        of string[20]=(
+          #9'.2byte'#9,#9'.4byte'#9,#9'.8byte'#9
+        );
+
+
+
 {****************************************************************************}
 {                          GNU Assembler writer                              }
 {****************************************************************************}
@@ -489,8 +521,7 @@ implementation
         case target_info.system of
          system_i386_OS2,
          system_i386_EMX,
-         system_m68k_amiga,  { amiga has old GNU AS (2.14), which blews up from .section (KB) }
-         system_m68k_linux: ;
+         system_m68k_amiga: ; { amiga has old GNU AS (2.14), which blews up from .section (KB) }
          system_powerpc_darwin,
          system_i386_darwin,
          system_i386_iphonesim,
@@ -529,9 +560,9 @@ implementation
                   AsmWriteln('__IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5');
                 system_arm_darwin:
                   if (cs_create_pic in current_settings.moduleswitches) then
-                    AsmWriteln('.section __TEXT,__picsymbolstub4,symbol_stubs,none,16')
+                    AsmWriteln('__TEXT,__picsymbolstub4,symbol_stubs,none,16')
                   else
-                    AsmWriteln('.section __TEXT,__symbol_stub4,symbol_stubs,none,12')
+                    AsmWriteln('__TEXT,__symbol_stub4,symbol_stubs,none,12')
                 { darwin/x86-64 uses RIP-based GOT addressing, no symbol stubs }
                 else
                   internalerror(2006031101);
@@ -608,15 +639,45 @@ implementation
         end;
 
 
-      procedure doalign(alignment: byte; use_op: boolean; fillop: byte; out last_align: longint);
+      procedure doalign(alignment: byte; use_op: boolean; fillop: byte; out last_align: longint;lasthp:tai);
         var
           i: longint;
+{$ifdef m68k}
+          instr : string;
+{$endif}
         begin
           last_align:=alignment;
           if alignment>1 then
             begin
               if not(target_info.system in (systems_darwin+systems_aix)) then
                 begin
+{$ifdef m68k}
+                  if assigned(lasthp) and
+                      (
+                        (lasthp.typ=ait_instruction) and
+                        (taicpu(lasthp).opcode<>A_JMP)
+                      ) or
+                      (
+                        (lasthp.typ=ait_label)
+                      ) then
+                    begin
+                      if ispowerof2(alignment,i) then
+                        begin
+                          { the Coldfire manual suggests the TBF instruction for
+                            alignments, but somehow QEMU does not interpret that
+                            correctly... }
+                          {if current_settings.cputype=cpu_coldfire then
+                            instr:='0x51fc'
+                          else}
+                            instr:='0x4e71';
+                          AsmWrite(#9'.balignw '+tostr(alignment)+','+instr);
+                        end
+                      else
+                        internalerror(2012102101);
+                    end
+                  else
+                    begin
+{$endif m68k}
                   AsmWrite(#9'.balign '+tostr(alignment));
                   if use_op then
                     AsmWrite(','+tostr(fillop))
@@ -625,6 +686,9 @@ implementation
                   else if LastSecType=sec_code then
                     AsmWrite(',0x90');
 {$endif x86}
+{$ifdef m68k}
+                    end;
+{$endif m68k}
                 end
               else
                 begin
@@ -640,6 +704,7 @@ implementation
 
     var
       ch       : char;
+      lasthp,
       hp       : tai;
       constdef : taiconst_type;
       s,t      : string;
@@ -667,6 +732,7 @@ implementation
       do_line:=(cs_asm_source in current_settings.globalswitches) or
                ((cs_lineinfo in current_settings.moduleswitches)
                  and (p=current_asmdata.asmlists[al_procedures]));
+      lasthp:=nil;
       hp:=tai(p.first);
       while assigned(hp) do
        begin
@@ -715,7 +781,7 @@ implementation
 
            ait_align :
              begin
-               doalign(tai_align_abstract(hp).aligntype,tai_align_abstract(hp).use_op,tai_align_abstract(hp).fillop,last_align);
+               doalign(tai_align_abstract(hp).aligntype,tai_align_abstract(hp).use_op,tai_align_abstract(hp).fillop,last_align,lasthp);
              end;
 
            ait_section :
@@ -781,9 +847,9 @@ implementation
                      begin
                        asmwrite(#9'.lcomm ');
                        asmwrite(ReplaceForbiddenAsmSymbolChars(tai_datablock(hp).sym.name));
-                       asmwrite(',_data.bss_[RW],');
+                       asmwrite(',');
                        asmwrite(tostr(tai_datablock(hp).size)+',');
-                       asmwriteln(tostr(last_align));
+                       asmwrite('_data.bss_');
                      end;
                  end
                else
@@ -903,7 +969,10 @@ implementation
                  aitconst_secrel32_symbol,
                  aitconst_darwin_dwarf_delta32,
                  aitconst_darwin_dwarf_delta64,
-                 aitconst_half16bit:
+                 aitconst_half16bit,
+                 aitconst_16bit_unaligned,
+                 aitconst_32bit_unaligned,
+                 aitconst_64bit_unaligned:
                    begin
                      { the AIX assembler (and for compatibility, the GNU
                        assembler when targeting AIX) automatically aligns
@@ -930,7 +999,16 @@ implementation
                        end
                      else
                        begin
-                         if not(target_info.system in systems_aix) or
+                         if (constdef in ait_unaligned_consts) and
+                            (target_info.system in use_ua_sparc_systems) then
+                           AsmWrite(ait_ua_sparc_const2str[constdef])
+                         else if (constdef in ait_unaligned_consts) and
+                            (target_info.system in use_ua_alpha_systems) then
+                           AsmWrite(ait_ua_alpha_const2str[constdef])
+                         else if (constdef in ait_unaligned_consts) and
+                                 (target_info.system in use_ua_elf_systems) then
+                           AsmWrite(ait_ua_elf_const2str[constdef])
+                          else if not(target_info.system in systems_aix) or
                             (constdef<>aitconst_64bit) then
                            AsmWrite(ait_const2str[constdef])
                          else
@@ -1246,7 +1324,19 @@ implementation
              begin
                AsmWriteLn(#9'.thumb_func');
              end;
+           ait_thumb_set:
+             begin
+               AsmWriteLn(#9'.thumb_set '+tai_thumb_set(hp).sym^+', '+tai_thumb_set(hp).value^);
+             end;
 {$endif arm}
+           ait_set:
+             begin
+               AsmWriteLn(#9'.set '+tai_set(hp).sym^+', '+tai_set(hp).value^);
+             end;
+           ait_weak:
+             begin
+               AsmWriteLn(#9'.weak '+tai_weak(hp).sym^);
+             end;
            ait_ent:
              begin
                AsmWrite(#9'.ent'#9);
@@ -1384,6 +1474,7 @@ implementation
            else
              internalerror(2006012201);
          end;
+         lasthp:=hp;
          hp:=tai(hp.next);
        end;
     end;
@@ -1517,6 +1608,41 @@ implementation
         end;
       end;
 
+    procedure TGNUAssembler.WriteUnalignedIntConst(hp: tai_const);
+      var
+        pos, size: longint;
+      begin
+        size:=tai_const(hp).size;
+        AsmWrite(#9'.byte'#9);
+        if target_info.endian=endian_big then
+          begin
+            pos:=size-1;
+            while pos>=0 do
+              begin
+                AsmWrite(tostr((tai_const(hp).value shr (pos*8)) and $ff));
+                dec(pos);
+                if pos>=0 then
+                  AsmWrite(', ')
+                else
+                  AsmLn;
+              end;
+          end
+        else
+          begin
+            pos:=0;
+            while pos<size do
+              begin
+                AsmWriteln(tostr((tai_const(hp).value shr (pos*8)) and $ff));
+                inc(pos);
+                if pos<=size then
+                  AsmWrite(', ')
+                else
+                  AsmLn;
+              end;
+          end;
+        AsmLn;
+      end;
+
 
     procedure TGNUAssembler.WriteDirectiveName(dir: TAsmDirective);
     begin

+ 5 - 4
compiler/agjasmin.pas

@@ -796,22 +796,22 @@ implementation
 
     function TJasminAssembler.ConstAssignmentValue(csym: tconstsym): ansistring;
       begin
+        result:='';
         { nil is the default value -> don't write explicitly }
         case csym.consttyp of
           constpointer:
             begin
               if csym.value.valueordptr<>0 then
                 internalerror(2011021206);
-              result:='';
             end;
           constnil:
-            result:='';
+            ;
         else
           begin
             { enums and sets are initialized as typed constants }
             if not assigned(csym.constdef) or
                not(csym.constdef.typ in [enumdef,setdef]) then
-              result:=' = '+ConstValue(csym)
+              result:=' = '+ConstValue(csym);
           end;
         end;
       end;
@@ -890,7 +890,7 @@ implementation
                 odt_javaclass:
                   kindname:='class static ';
                 odt_interfacejava:
-                  kindname:='interface ';
+                  kindname:='interface static abstract ';
                 else
                   internalerror(2011021702);
               end;
@@ -1228,6 +1228,7 @@ implementation
          flags : [];
          labelprefix : 'L';
          comment : ' ; ';
+         dollarsign : '$';
        );
 
 

+ 1 - 0
compiler/alpha/cgcpu.pas

@@ -162,6 +162,7 @@ end;
 procedure create_codegen;
   begin
     cg:=tcgalpha.create;
+    cg128:=tcg128.create;
   end;
 
 end.

+ 81 - 8
compiler/aopt.pas

@@ -25,6 +25,8 @@ Unit aopt;
 
 {$i fpcdefs.inc}
 
+{ $define DEBUG_OPTALLOC}
+
   Interface
 
     Uses
@@ -49,9 +51,18 @@ Unit aopt;
       End;
       TAsmOptimizerClass = class of TAsmOptimizer;
 
+      TAsmScheduler = class(TAoptObj)
+        { _AsmL is the PAasmOutpout list that has to be re-scheduled }
+        Constructor Create(_AsmL: TAsmList); virtual; reintroduce;
+        Procedure Optimize;
+        function SchedulerPass1Cpu(var p: tai): boolean; virtual; abstract;
+        procedure SchedulerPass1;
+      end;
+      TAsmSchedulerClass = class of TAsmScheduler;
+
     var
       casmoptimizer : TAsmOptimizerClass;
-      cpreregallocscheduler : TAsmOptimizerClass;
+      cpreregallocscheduler : TAsmSchedulerClass;
 
     procedure Optimize(AsmL:TAsmList);
     procedure PreRegallocSchedule(AsmL:TAsmList);
@@ -69,7 +80,7 @@ Unit aopt;
     Constructor TAsmOptimizer.create(_AsmL: TAsmList);
       Begin
         inherited create(_asml,nil,nil,nil);
-      {setup labeltable, always necessary}
+        { setup labeltable, always necessary }
         New(LabelInfo);
       End;
 
@@ -174,10 +185,11 @@ Unit aopt;
                         ExcludeRegFromUsedRegs(tai_regalloc(p).Reg,Regs);
                         hp1 := p;
                         hp2 := nil;
-                        While Not(FindRegAlloc(tai_regalloc(p).Reg, tai(hp1.Next))) And
+                        While Not(assigned(FindRegAlloc(tai_regalloc(p).Reg, tai(hp1.Next)))) And
                               GetNextInstruction(hp1, hp1) And
                               RegInInstruction(tai_regalloc(p).Reg, hp1) Do
                           hp2 := hp1;
+                        { move deallocations }
                         If hp2 <> nil Then
                           Begin
                             hp1 := tai(p.previous);
@@ -186,13 +198,23 @@ Unit aopt;
 {$endif DEBUG_OPTALLOC}
                             AsmL.Remove(p);
                             InsertLLItem(hp2, tai(hp2.Next), p);
+                            { don't remove this deallocation later on when merging dealloc/alloc pairs because
+                              it marks indenpendent use of a register
+
+                              This could be also achieved by a separate passes for merging first and then later
+                              moving but I did not choose this solution because it takes more time and code (FK) }
+                            tai_regalloc(p).keep:=true;
 {$ifdef DEBUG_OPTALLOC}
                             AsmL.InsertAfter(tai_comment.Create(strpnew('Moved deallocation of '+std_regname(tai_regalloc(p).Reg)+' here')),hp2);
 {$endif DEBUG_OPTALLOC}
                             p := hp1;
                           End
-                        else if findregalloc(tai_regalloc(p).reg, tai(p.next))
-                          and getnextinstruction(p,hp1) then
+                        { merge allocations/deallocations }
+                        else if assigned(findregalloc(tai_regalloc(p).reg, tai(p.next)))
+                          and getnextinstruction(p,hp1) and
+                          { don't merge deallocations/allocation which mark a new use of register, this
+                            enables more possibilities for the peephole optimizer }
+                          not(tai_regalloc(p).keep) then
                           begin
                             hp1 := tai(p.previous);
 {$ifdef DEBUG_OPTALLOC}
@@ -201,8 +223,6 @@ Unit aopt;
                             AsmL.remove(p);
                             p.free;
                             p := hp1;
-      //                      don't include here, since then the allocation will be removed when it's processed
-      //                      include(usedregs,supreg);
                           end;
                       End
                   End
@@ -303,6 +323,59 @@ Unit aopt;
       End;
 
 
+    constructor TAsmScheduler.Create(_AsmL: TAsmList);
+      begin
+        inherited create(_asml,nil,nil,nil);
+      end;
+
+
+    procedure TAsmScheduler.SchedulerPass1;
+      var
+        p,hp1,hp2 : tai;
+      begin
+        p:=BlockStart;
+        while p<>BlockEnd Do
+          begin
+            if SchedulerPass1Cpu(p) then
+              continue;
+            p:=tai(p.next);
+          end;
+      end;
+
+
+    procedure TAsmScheduler.Optimize;
+      Var
+        HP: tai;
+        pass: longint;
+      Begin
+        pass:=0;
+        BlockStart := tai(AsmL.First);
+        While Assigned(BlockStart) Do
+          Begin
+            { Peephole optimizations }
+            SchedulerPass1;
+            { continue where we left off, BlockEnd is either the start of an }
+            { assembler block or nil}
+            BlockStart:=BlockEnd;
+            While Assigned(BlockStart) And
+                  (BlockStart.typ = ait_Marker) And
+                  (tai_Marker(BlockStart).Kind = mark_AsmBlockStart) Do
+              Begin
+                { we stopped at an assembler block, so skip it    }
+                While GetNextInstruction(BlockStart, BlockStart) And
+                      ((BlockStart.Typ <> Ait_Marker) Or
+                       (tai_Marker(Blockstart).Kind <> mark_AsmBlockEnd)) Do;
+                { blockstart now contains a tai_marker(mark_AsmBlockEnd) }
+                If not(GetNextInstruction(BlockStart, HP) And
+                   ((HP.typ <> ait_Marker) Or
+                    (Tai_Marker(HP).Kind <> mark_AsmBlockStart))) Then
+                  { skip the next assembler block }
+                  blockStart := hp;
+              End
+          End;
+      End;
+
+
     procedure Optimize(AsmL:TAsmList);
       var
         p : TAsmOptimizer;
@@ -315,7 +388,7 @@ Unit aopt;
 
     procedure PreRegallocSchedule(AsmL:TAsmList);
       var
-        p : TAsmOptimizer;
+        p : TAsmScheduler;
       begin
         p:=cpreregallocscheduler.Create(AsmL);
         p.Optimize;

+ 29 - 0
compiler/aoptbase.pas

@@ -89,6 +89,11 @@ unit aoptbase;
         { create a paicpu Object that loads the contents of reg1 into reg2 }
         Function a_load_reg_reg(reg1, reg2: TRegister): taicpu; Virtual; Abstract;
 
+        { returns true if reg is used by any instruction between p1 and p2 }
+        Function RegUsedBetween(reg: TRegister; p1, p2: tai): Boolean;
+
+        { returns true if reg is modified by any instruction between p1 and p2 }
+        function RegModifiedBetween(reg: TRegister; p1, p2: tai): Boolean;
     end;
 
     function labelCanBeSkipped(p: tai_label): boolean;
@@ -241,6 +246,30 @@ unit aoptbase;
   End;
 
 
+  Function TAOptBase.RegUsedBetween(reg : TRegister;p1,p2 : tai) : Boolean;
+  Begin
+    Result:=false;
+    while assigned(p1) and assigned(p2) and GetNextInstruction(p1,p1) and (p1<>p2) do
+      if RegInInstruction(reg,p1) then
+        begin
+          Result:=true;
+          exit;
+        end;
+  end;
+
+
+  Function TAOptBase.RegModifiedBetween(reg : TRegister;p1,p2 : tai) : Boolean;
+  Begin
+    Result:=false;
+    while assigned(p1) and assigned(p2) and GetNextInstruction(p1,p1) and (p1<>p2) do
+      if RegModifiedByInstruction(reg,p1) then
+        begin
+          Result:=true;
+          exit;
+        end;
+  end;
+
+
   { ******************* Processor dependent stuff *************************** }
 
   Function TAOptBase.RegMaxSize(Reg: TRegister): TRegister;

+ 54 - 12
compiler/aoptobj.pas

@@ -80,7 +80,7 @@ Unit AoptObj;
         Procedure Clear;
         { update the info with the pairegalloc objects coming after
           p                                                         }
-        Procedure Update(p: Tai);
+        procedure Update(p: Tai; IgnoreNewAllocs: Boolean=false);
         { is Reg currently in use }
         Function IsUsed(Reg: TRegister): Boolean;
         { get all the currently used registers }
@@ -289,10 +289,18 @@ Unit AoptObj;
         { returns true if the operands o1 and o2 are completely equal }
         Function OpsEqual(const o1,o2:toper): Boolean;
 
-        { Returns true if a ait_alloc object for Reg is found in the block
+        { Returns the next ait_alloc object with ratype ra_dealloc for
+          Reg is found in the block
           of Tai's starting with StartPai and ending with the next "real"
-          instruction                                                      }
-        Function FindRegAlloc(Reg: TRegister; StartPai: Tai): Boolean;
+          instruction. If none is found, it returns
+          nil                                                                        }
+        Function FindRegAlloc(Reg: TRegister; StartPai: Tai): tai_regalloc;
+
+        { Returns the next ait_alloc object with ratype ra_dealloc
+          for Reg which is found in the block of Tai's starting with StartPai
+          and ending with the next "real" instruction. If none is found, it returns
+          nil                                                                        }
+        Function FindRegDeAlloc(Reg: TRegister; StartPai: Tai): tai_regalloc;
 
         { reg used after p? }
         function RegUsedAfterInstruction(reg: Tregister; p: tai; var AllUsedRegs: TAllUsedRegs): Boolean;
@@ -357,7 +365,7 @@ Unit AoptObj;
     {
       updates UsedRegs with the RegAlloc Information coming after P
     }
-    Procedure TUsedRegs.Update(p: Tai);
+    Procedure TUsedRegs.Update(p: Tai;IgnoreNewAllocs : Boolean = false);
       Begin
         { this code is normally not used because updating the register allocation information is done in
           TAOptObj.UpdateUsedRegs for speed reasons }
@@ -376,7 +384,8 @@ Unit AoptObj;
                 begin
                   case tai_regalloc(p).ratype of
                     ra_alloc :
-                      Include(UsedRegs, getsupreg(tai_regalloc(p).reg));
+                      if not(IgnoreNewAllocs) then
+                        Include(UsedRegs, getsupreg(tai_regalloc(p).reg));
                     ra_dealloc :
                       Exclude(UsedRegs, getsupreg(tai_regalloc(p).reg));
                   end;
@@ -920,7 +929,7 @@ Unit AoptObj;
       Begin
         TempP := hp;
         While Assigned(TempP) and
-             (TempP.typ In SkipInstr + [ait_label]) Do
+             (TempP.typ In SkipInstr + [ait_label,ait_align]) Do
           If (TempP.typ <> ait_Label) Or
              (Tai_label(TempP).labsym <> L)
             Then GetNextInstruction(TempP, TempP)
@@ -999,9 +1008,10 @@ Unit AoptObj;
           OpsEqual := False;
       End;
 
-      Function TAOptObj.FindRegAlloc(Reg: TRegister; StartPai: Tai): Boolean;
+
+      Function TAOptObj.FindRegAlloc(Reg: TRegister; StartPai: Tai): tai_regalloc;
       Begin
-        FindRegAlloc:=False;
+        Result:=nil;
         Repeat
           While Assigned(StartPai) And
                 ((StartPai.typ in (SkipInstr - [ait_regAlloc])) Or
@@ -1011,9 +1021,11 @@ Unit AoptObj;
           If Assigned(StartPai) And
              (StartPai.typ = ait_regAlloc) Then
             Begin
-              if (tai_regalloc(StartPai).ratype=ra_alloc) and (getsupreg(tai_regalloc(StartPai).Reg) = getsupreg(Reg)) then
+              if (tai_regalloc(StartPai).ratype=ra_alloc) and
+                (getregtype(tai_regalloc(StartPai).Reg) = getregtype(Reg)) and
+                (getsupreg(tai_regalloc(StartPai).Reg) = getsupreg(Reg)) then
                begin
-                 FindRegAlloc:=true;
+                 Result:=tai_regalloc(StartPai);
                  exit;
                end;
               StartPai := Tai(StartPai.Next);
@@ -1024,6 +1036,33 @@ Unit AoptObj;
       End;
 
 
+      function TAOptObj.FindRegDeAlloc(Reg: TRegister; StartPai: Tai): tai_regalloc;
+      Begin
+         Result:=nil;
+         Repeat
+           While Assigned(StartPai) And
+                 ((StartPai.typ in (SkipInstr - [ait_regAlloc])) Or
+                  ((StartPai.typ = ait_label) and
+                   Not(Tai_Label(StartPai).labsym.Is_Used))) Do
+             StartPai := Tai(StartPai.Next);
+           If Assigned(StartPai) And
+              (StartPai.typ = ait_regAlloc) Then
+             Begin
+               if (tai_regalloc(StartPai).ratype=ra_dealloc) and
+                 (getregtype(tai_regalloc(StartPai).Reg) = getregtype(Reg)) and
+                 (getsupreg(tai_regalloc(StartPai).Reg) = getsupreg(Reg)) then
+                begin
+                  Result:=tai_regalloc(StartPai);
+                  exit;
+                end;
+               StartPai := Tai(StartPai.Next);
+             End
+           else
+             exit;
+         Until false;
+       End;
+
+
       function TAOptObj.RegUsedAfterInstruction(reg: Tregister; p: tai;
        var AllUsedRegs: TAllUsedRegs): Boolean;
        begin
@@ -1203,7 +1242,10 @@ Unit AoptObj;
               InsertLLItem(tai(p.Previous),p,tai_comment.create(strpnew(GetAllocationString(UsedRegs))));
 {$endif DEBUG_OPTALLOC}
             if PeepHoleOptPass1Cpu(p) then
-              continue;
+              begin
+                UpdateUsedRegs(p);
+                continue;
+              end;
             case p.Typ Of
               ait_instruction:
                 begin

+ 144 - 12
compiler/arm/aasmcpu.pas

@@ -26,7 +26,7 @@ unit aasmcpu;
 interface
 
 uses
-  cclasses,globtype,globals,verbose,
+  globtype,globals,verbose,
   aasmbase,aasmtai,aasmdata,aasmsym,
   ogbase,
   symtype,
@@ -161,9 +161,10 @@ uses
          wideformat : boolean;
          roundingmode : troundingmode;
          procedure loadshifterop(opidx:longint;const so:tshifterop);
-         procedure loadregset(opidx:longint; regsetregtype: tregistertype; regsetsubregtype: tsubregister; const s:tcpuregisterset);
+         procedure loadregset(opidx:longint; regsetregtype: tregistertype; regsetsubregtype: tsubregister; const s:tcpuregisterset; ausermode: boolean=false);
          procedure loadconditioncode(opidx:longint;const cond:tasmcond);
          procedure loadmodeflags(opidx:longint;const flags:tcpumodeflags);
+         procedure loadspecialreg(opidx:longint;const areg:tregister; const aflags:tspecialregflags);
          constructor op_none(op : tasmop);
 
          constructor op_reg(op : tasmop;_op1 : tregister);
@@ -174,6 +175,7 @@ uses
          constructor op_reg_ref(op : tasmop;_op1 : tregister;const _op2 : treference);
          constructor op_reg_const(op:tasmop; _op1: tregister; _op2: aint);
 
+         constructor op_regset(op:tasmop; regtype: tregistertype; subreg: tsubregister; _op1: tcpuregisterset);
          constructor op_ref_regset(op:tasmop; _op1: treference; regtype: tregistertype; subreg: tsubregister; _op2: tcpuregisterset);
 
          constructor op_reg_reg_reg(op : tasmop;_op1,_op2,_op3 : tregister);
@@ -192,6 +194,9 @@ uses
          constructor op_modeflags(op: tasmop; flags: tcpumodeflags);
          constructor op_modeflags_const(op: tasmop; flags: tcpumodeflags; a: aint);
 
+         { MSR }
+         constructor op_specialreg_reg(op: tasmop; specialreg: tregister; specialregflags: tspecialregflags; _op2: tregister);
+
          { *M*LL }
          constructor op_reg_reg_reg_reg(op : tasmop;_op1,_op2,_op3,_op4 : tregister);
 
@@ -265,7 +270,7 @@ uses
 implementation
 
   uses
-    cutils,rgobj,itcpugas;
+    itcpugas,aoptcpu;
 
 
     procedure taicpu.loadshifterop(opidx:longint;const so:tshifterop);
@@ -286,7 +291,7 @@ implementation
       end;
 
 
-    procedure taicpu.loadregset(opidx:longint; regsetregtype: tregistertype; regsetsubregtype: tsubregister; const s:tcpuregisterset);
+    procedure taicpu.loadregset(opidx:longint; regsetregtype: tregistertype; regsetsubregtype: tsubregister; const s:tcpuregisterset; ausermode: boolean);
       var
         i : byte;
       begin
@@ -301,6 +306,7 @@ implementation
            regset^:=s;
            regtyp:=regsetregtype;
            subreg:=regsetsubregtype;
+           usermode:=ausermode;
            typ:=top_regset;
            case regsetregtype of
              R_INTREGISTER:
@@ -345,6 +351,19 @@ implementation
          end;
       end;
 
+    procedure taicpu.loadspecialreg(opidx: longint; const areg: tregister; const aflags: tspecialregflags);
+      begin
+        allocate_oper(opidx+1);
+        with oper[opidx]^ do
+         begin
+           if typ<>top_specialreg then
+             clearop(opidx);
+           specialreg:=areg;
+           specialflags:=aflags;
+           typ:=top_specialreg;
+         end;
+      end;
+
 {*****************************************************************************
                                  taicpu Constructors
 *****************************************************************************}
@@ -397,6 +416,13 @@ implementation
          loadconst(1,aint(_op2));
       end;
 
+    constructor taicpu.op_regset(op: tasmop; regtype: tregistertype; subreg: tsubregister; _op1: tcpuregisterset);
+      begin
+        inherited create(op);
+        ops:=1;
+        loadregset(0,regtype,subreg,_op1);
+      end;
+
 
     constructor taicpu.op_ref_regset(op:tasmop; _op1: treference; regtype: tregistertype; subreg: tsubregister; _op2: tcpuregisterset);
       begin
@@ -460,8 +486,8 @@ implementation
     constructor taicpu.op_cond(op: tasmop; cond: tasmcond);
       begin
         inherited create(op);
-        ops:=0;
-        condition := cond;
+        ops:=1;
+        loadconditioncode(0, cond);
       end;
 
     constructor taicpu.op_modeflags(op: tasmop; flags: tcpumodeflags);
@@ -479,6 +505,13 @@ implementation
         loadconst(1,a);
       end;
 
+    constructor taicpu.op_specialreg_reg(op: tasmop; specialreg: tregister; specialregflags: tspecialregflags; _op2: tregister);
+      begin
+        inherited create(op);
+        ops:=2;
+        loadspecialreg(0,specialreg,specialregflags);
+        loadreg(1,_op2);
+      end;
 
      constructor taicpu.op_reg_reg_sym_ofs(op : tasmop;_op1,_op2 : tregister; _op3: tasmsymbol;_op3ofs: longint);
        begin
@@ -645,7 +678,7 @@ implementation
       begin
         case opcode of
           A_ADC,A_ADD,A_AND,A_BIC,
-          A_EOR,A_CLZ,
+          A_EOR,A_CLZ,A_RBIT,
           A_LDR,A_LDRB,A_LDRBT,A_LDRH,A_LDRSB,
           A_LDRSH,A_LDRT,
           A_MOV,A_MVN,A_MLA,A_MUL,
@@ -702,7 +735,7 @@ implementation
               { check for pre/post indexed }
               result := operand_read;
           //Thumb2
-          A_LSL, A_LSR, A_ROR, A_ASR, A_SDIV, A_UDIV,A_MOVT:
+          A_LSL, A_LSR, A_ROR, A_ASR, A_SDIV, A_UDIV, A_MOVW, A_MOVT, A_MLS:
             if opnr in [0] then
               result:=operand_write
             else
@@ -818,6 +851,7 @@ implementation
         penalty,
         lastinspos,
         { increased for every data element > 4 bytes inserted }
+        currentsize,
         extradataoffset,
         limit: longint;
         curop : longint;
@@ -851,8 +885,9 @@ implementation
                           curdatatai:=tai(taicpu(curtai).oper[curop]^.ref^.symboldata);
                           if assigned(curdatatai) and
                             { move only if we're at the first reference of a label }
-                            (taicpu(curtai).oper[curop]^.ref^.offset=0) then
+                            not(tai_label(curdatatai).moved) then
                             begin
+                              tai_label(curdatatai).moved:=true;
                               { check if symbol already used. }
                               { if yes, reuse the symbol }
                               hp:=tai(curdatatai.next);
@@ -1063,13 +1098,110 @@ implementation
           end;
       end;
 
-    procedure finalizearmcode(list, listtoinsert: TAsmList);
+
+    function getMergedInstruction(FirstOp,LastOp:TAsmOp;InvertLast:boolean) : TAsmOp;
+      const
+        opTable: array[A_IT..A_ITTTT] of string =
+          ('T','TE','TT','TEE','TTE','TET','TTT',
+           'TEEE','TTEE','TETE','TTTE',
+           'TEET','TTET','TETT','TTTT');
+        invertedOpTable: array[A_IT..A_ITTTT] of string =
+          ('E','ET','EE','ETT','EET','ETE','EEE',
+           'ETTT','EETT','ETET','EEET',
+           'ETTE','EETE','ETEE','EEEE');
+      var
+        resStr : string;
+        i : TAsmOp;
       begin
-        insertpcrelativedata(list, listtoinsert);
+        if InvertLast then
+          resStr := opTable[FirstOp]+invertedOpTable[LastOp]
+        else
+          resStr := opTable[FirstOp]+opTable[LastOp];
+        if length(resStr) > 4 then
+          internalerror(2012100805);
+
+        for i := low(opTable) to high(opTable) do
+          if opTable[i] = resStr then
+            exit(i);
+
+        internalerror(2012100806);
+      end;
+
+    procedure foldITInstructions(list: TAsmList);
+      var
+        curtai,hp1 : tai;
+        levels,i : LongInt;
+      begin
+        curtai:=tai(list.First);
+        while assigned(curtai) do
+          begin
+            case curtai.typ of
+              ait_instruction:
+                if IsIT(taicpu(curtai).opcode) then
+                  begin
+                    levels := GetITLevels(taicpu(curtai).opcode);
+                    if levels < 4 then
+                      begin
+                        i:=levels;
+                        hp1:=tai(curtai.Next);
+                        while assigned(hp1) and
+                          (i > 0) do
+                          begin
+                            if hp1.typ=ait_instruction then
+                              begin
+                                dec(i);
+                                if (i = 0) and
+                                  mustbelast(hp1) then
+                                  begin
+                                    hp1:=nil;
+                                    break;
+                                  end;
+                              end;
+                            hp1:=tai(hp1.Next);
+                          end;
+
+                        if assigned(hp1) then
+                          begin
+                            // We are pointing at the first instruction after the IT block
+                            while assigned(hp1) and
+                              (hp1.typ<>ait_instruction) do
+                                hp1:=tai(hp1.Next);
+
+                            if assigned(hp1) and
+                              (hp1.typ=ait_instruction) and
+                              IsIT(taicpu(hp1).opcode) then
+                              begin
+                                if (levels+GetITLevels(taicpu(hp1).opcode) <= 4) and
+                                  ((taicpu(curtai).oper[0]^.cc=taicpu(hp1).oper[0]^.cc) or
+                                   (taicpu(curtai).oper[0]^.cc=inverse_cond(taicpu(hp1).oper[0]^.cc))) then
+                                  begin
+                                    taicpu(curtai).opcode:=getMergedInstruction(taicpu(curtai).opcode,
+                                                                                taicpu(hp1).opcode,
+                                                                                taicpu(curtai).oper[0]^.cc=inverse_cond(taicpu(hp1).oper[0]^.cc));
+
+                                    list.Remove(hp1);
+                                    hp1.Free;
+                                  end;
+                              end;
+                          end;
+                      end;
+                  end;
+            end;
 
+            curtai:=tai(curtai.Next);
+          end;
+      end;
+
+    procedure finalizearmcode(list, listtoinsert: TAsmList);
+      begin
         { Do Thumb-2 16bit -> 32bit transformations }
         if current_settings.cputype in cpu_thumb2 then
-          ensurethumb2encodings(list);
+          begin
+            ensurethumb2encodings(list);
+            foldITInstructions(list);
+          end;
+
+        insertpcrelativedata(list, listtoinsert);
       end;
 
     procedure InsertPData;

+ 47 - 10
compiler/arm/agarmgas.pas

@@ -30,9 +30,9 @@ unit agarmgas;
 
     uses
        globtype,
-       aasmtai,aasmdata,
+       aasmtai,
        aggas,
-       cpubase;
+       cpubase,cpuinfo;
 
     type
       TARMGNUAssembler=class(TGNUassembler)
@@ -54,13 +54,33 @@ unit agarmgas;
       gas_shiftmode2str : array[tshiftmode] of string[3] = (
         '','lsl','lsr','asr','ror','rrx');
 
+    const 
+      cputype_to_gas_march : array[tcputype] of string = (
+        '', // cpu_none
+        'armv3',
+        'armv4',
+        'armv4t',
+        'armv5',
+        'armv5t',
+        'armv5te',
+        'armv5tej',
+        'armv6',
+        'armv6k',
+        'armv6t2',
+        'armv6z',
+        'armv7',
+        'armv7-a',
+        'armv7-r',
+        'armv7-m',
+        'armv7e-m');
+
   implementation
 
     uses
        cutils,globals,verbose,
        systems,
        assemble,
-       cpuinfo,aasmcpu,
+       aasmcpu,
        itcpugas,
        cgbase,cgutils;
 
@@ -86,13 +106,14 @@ unit agarmgas;
           result:='-mfpu=vfpv3 '+result;
         if (current_settings.fputype = fpu_vfpv3_d16) then
           result:='-mfpu=vfpv3-d16 '+result;
+        if (current_settings.fputype = fpu_fpv4_s16) then
+          result:='-mfpu=fpv4-sp-d16 '+result;
 
         if current_settings.cputype=cpu_armv7m then
           result:='-march=armv7m -mthumb -mthumb-interwork '+result
-        else if current_settings.cputype=cpu_armv6 then
-          result:='-march=armv6 '+result
-        else if current_settings.cputype=cpu_armv7 then
-          result:='-march=armv7-a '+result;
+        // EDSP instructions in RTL require armv5te at least to not generate error
+        else if current_settings.cputype >= cpu_armv5te then
+          result:='-march='+cputype_to_gas_march[current_settings.cputype]+' '+result;
 
         if target_info.abi = abi_eabihf then
           { options based on what gcc uses on debian armhf }
@@ -215,6 +236,8 @@ unit agarmgas;
                     first:=false;
                   end;
               getopstr:=getopstr+'}';
+              if o.usermode then
+                getopstr:=getopstr+'^';
             end;
           top_conditioncode:
             getopstr:=cond2str[o.cc];
@@ -238,6 +261,18 @@ unit agarmgas;
               end
             else
               getopstr:=getreferencestring(o.ref^);
+          top_specialreg:
+            begin
+              getopstr:=gas_regname(o.specialreg);
+              if o.specialflags<>[] then
+                begin
+                  getopstr:=getopstr+'_';
+                  if srC in o.specialflags then getopstr:=getopstr+'c';
+                  if srX in o.specialflags then getopstr:=getopstr+'x';
+                  if srF in o.specialflags then getopstr:=getopstr+'f';
+                  if srS in o.specialflags then getopstr:=getopstr+'s';
+                end;
+            end
           else
             internalerror(2002070604);
         end;
@@ -259,8 +294,10 @@ unit agarmgas;
 
           if taicpu(hp).ops = 0 then
             s:=#9+gas_op2str[op]+' '+cond2str[taicpu(hp).condition]+oppostfix2str[taicpu(hp).oppostfix]
+          else if (taicpu(hp).opcode>=A_VABS) and (taicpu(hp).opcode<=A_VSUB) then
+            s:=#9+gas_op2str[op]+cond2str[taicpu(hp).condition]+oppostfix2str[taicpu(hp).oppostfix]
           else
-            s:=#9+gas_op2str[op]+oppostfix2str[taicpu(hp).oppostfix]+postfix+cond2str[taicpu(hp).condition]; // Conditional infixes are deprecated in unified syntax
+            s:=#9+gas_op2str[op]+oppostfix2str[taicpu(hp).oppostfix]+cond2str[taicpu(hp).condition]+postfix; // Conditional infixes are deprecated in unified syntax
         end
       else
         s:=#9+gas_op2str[op]+cond2str[taicpu(hp).condition]+oppostfix2str[taicpu(hp).oppostfix];
@@ -318,7 +355,7 @@ unit agarmgas;
             asmbin : 'as';
             asmcmd : '-o $OBJ $ASM';
             supported_targets : [system_arm_linux,system_arm_wince,system_arm_gba,system_arm_palmos,system_arm_nds,system_arm_embedded,system_arm_symbian];
-            flags : [af_allowdirect,af_needar,af_smartlink_sections];
+            flags : [af_needar,af_smartlink_sections];
             labelprefix : '.L';
             comment : '# ';
             dollarsign: '$';
@@ -331,7 +368,7 @@ unit agarmgas;
             asmbin : 'as';
             asmcmd : '-o $OBJ $ASM -arch $ARCH';
             supported_targets : [system_arm_darwin];
-            flags : [af_allowdirect,af_needar,af_smartlink_sections,af_supports_dwarf,af_stabs_use_function_absolute_addresses];
+            flags : [af_needar,af_smartlink_sections,af_supports_dwarf,af_stabs_use_function_absolute_addresses];
             labelprefix : 'L';
             comment : '# ';
             dollarsign: '$';

Fișier diff suprimat deoarece este prea mare
+ 714 - 146
compiler/arm/aoptcpu.pas


+ 1 - 1
compiler/arm/aoptcpub.pas

@@ -78,7 +78,7 @@ Const
 
 { the maximum number of operands an instruction has }
 
-  MaxOps = 3;
+  MaxOps = 4;
 
 {Oper index of operand that contains the source (reference) with a load }
 {instruction                                                            }

+ 37 - 5
compiler/arm/armatt.inc

@@ -50,6 +50,7 @@
 'mcr',
 'mla',
 'mov',
+'mrc',
 'mrs',
 'msr',
 'mnf',
@@ -206,6 +207,10 @@
 'sel',
 'setend',
 'sev',
+'asr',
+'lsr',
+'lsl',
+'ror',
 'shadd16',
 'shadd8',
 'shasx',
@@ -270,12 +275,8 @@
 'wfe',
 'wfi',
 'yield',
-'asr',
-'lsr',
-'lsl',
 'pop',
 'push',
-'ror',
 'sdiv',
 'udiv',
 'movt',
@@ -295,5 +296,36 @@
 'itett',
 'itttt',
 'tbb',
-'tbh'
+'tbh',
+'movw',
+'cbz',
+'cbnz',
+'vabs',
+'vadd',
+'vcmp',
+'vcmpe',
+'vcvt',
+'vdiv',
+'vldm',
+'vldr',
+'vmov',
+'vmrs',
+'vmsr',
+'vmul',
+'vmla',
+'vmls',
+'vnmla',
+'vnmls',
+'vfma',
+'vfms',
+'vfnma',
+'vfnms',
+'vneg',
+'vnmul',
+'vpop',
+'vpush',
+'vsqrt',
+'vstm',
+'vstr',
+'vsub'
 );

+ 32 - 0
compiler/arm/armatts.inc

@@ -295,5 +295,37 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufNONE
 );

+ 45 - 10
compiler/arm/armins.dat

@@ -235,7 +235,7 @@ reg32,imm8,fpureg        \xF0\x02\x01                   FPA
 [LOGcc]
 
 [MCR]
-reg32,mem32         \320\301\1\x13\110            ARM7
+; reg32,mem32         \320\301\1\x13\110            ARM7
 
 [MLAcc]
 reg32,reg32,reg32,reg32  \x15\x00\x20\x90               ARM7
@@ -247,7 +247,7 @@ reg32,reg32,reg32,reg32  \x15\x00\x20\x90               ARM7
 ; reg32,reg32,imm          \xA\x1\xA0                     ARM7
 ; reg32,imm                \xB\x3\xA0                     ARM7
 
-; [MRC]
+[MRC]
 ; reg32,reg32         \321\301\1\x13\110                  ARM7
 
 [MRScc]
@@ -618,6 +618,14 @@ reg32,reg32,reg32,reg32  \x16\x00\x80\x90		 ARM7
 
 [SEVcc]
 
+[ASRcc]
+
+[LSRcc]
+
+[LSLcc]
+
+[RORcc]
+
 [SHADD16cc]
 [SHADD8cc]
 [SHASXcc]
@@ -702,18 +710,10 @@ reg32,reg32,reg32,reg32  \x16\x00\x80\x90		 ARM7
 
 ; Thumb-2
 
-[ASRcc]
-
-[LSRcc]
-
-[LSLcc]
-
 [POP]
 
 [PUSH]
 
-[RORcc]
-
 [SDIVcc]
 
 [UDIVcc]
@@ -752,3 +752,38 @@ reg32,reg32,reg32,reg32  \x16\x00\x80\x90		 ARM7
 
 [TBB]
 [TBH]
+
+[MOVW]
+
+[CBZ]
+[CBNZ]
+
+; FPv4-s16 - ARMv7M floating point
+[VABS]
+[VADD]
+[VCMP]
+[VCMPE]
+[VCVT]
+[VDIV]
+[VLDM]
+[VLDR]
+[VMOV]
+[VMRS]
+[VMSR]
+[VMUL]
+[VMLA]
+[VMLS]
+[VNMLA]
+[VNMLS]
+[VFMA]
+[VFMS]
+[VFNMA]
+[VFNMS]
+[VNEG]
+[VNMUL]
+[VPOP]
+[VPUSH]
+[VSQRT]
+[VSTM]
+[VSTR]
+[VSUB]

+ 1 - 1
compiler/arm/armnop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from armins.dat }
-106;
+105;

+ 37 - 5
compiler/arm/armop.inc

@@ -50,6 +50,7 @@ A_LOG,
 A_MCR,
 A_MLA,
 A_MOV,
+A_MRC,
 A_MRS,
 A_MSR,
 A_MNF,
@@ -206,6 +207,10 @@ A_SBFX,
 A_SEL,
 A_SETEND,
 A_SEV,
+A_ASR,
+A_LSR,
+A_LSL,
+A_ROR,
 A_SHADD16,
 A_SHADD8,
 A_SHASX,
@@ -270,12 +275,8 @@ A_UXTH,
 A_WFE,
 A_WFI,
 A_YIELD,
-A_ASR,
-A_LSR,
-A_LSL,
 A_POP,
 A_PUSH,
-A_ROR,
 A_SDIV,
 A_UDIV,
 A_MOVT,
@@ -295,5 +296,36 @@ A_ITTET,
 A_ITETT,
 A_ITTTT,
 A_TBB,
-A_TBH
+A_TBH,
+A_MOVW,
+A_CBZ,
+A_CBNZ,
+A_VABS,
+A_VADD,
+A_VCMP,
+A_VCMPE,
+A_VCVT,
+A_VDIV,
+A_VLDM,
+A_VLDR,
+A_VMOV,
+A_VMRS,
+A_VMSR,
+A_VMUL,
+A_VMLA,
+A_VMLS,
+A_VNMLA,
+A_VNMLS,
+A_VFMA,
+A_VFMS,
+A_VFNMA,
+A_VFNMS,
+A_VNEG,
+A_VNMUL,
+A_VPOP,
+A_VPUSH,
+A_VSQRT,
+A_VSTM,
+A_VSTR,
+A_VSUB
 );

+ 38 - 1
compiler/arm/armreg.dat

@@ -107,5 +107,42 @@ D30,$04,$07,$1E,d30,0,0
 D31,$04,$07,$1F,d31,0,0
 
 ; special registers
-CPSR_C,$05,$00,$00,cpsr_c,0,0
+CPSR,$05,$00,$00,cpsr,0,0
 FPSCR,$05,$00,$01,fpscr,0,0
+SPSR,$05,$00,$02,spsr,0,0
+APSR_nzcv,$05,$00,$03,apsr_nzcv,0,0
+; coprocessor registers
+CR0,$05,$00,$04,cr0,0,0
+CR1,$05,$00,$05,cr1,0,0
+CR2,$05,$00,$06,cr2,0,0
+CR3,$05,$00,$07,cr3,0,0
+CR4,$05,$00,$08,cr4,0,0
+CR5,$05,$00,$09,cr5,0,0
+CR6,$05,$00,$0A,cr6,0,0
+CR7,$05,$00,$0B,cr7,0,0
+CR8,$05,$00,$0C,cr8,0,0
+CR9,$05,$00,$0D,cr9,0,0
+CR10,$05,$00,$0E,cr10,0,0
+CR11,$05,$00,$0F,cr11,0,0
+CR12,$05,$00,$10,cr12,0,0
+CR13,$05,$00,$11,cr13,0,0
+CR14,$05,$00,$12,cr14,0,0
+CR15,$05,$00,$13,cr15,0,0
+; coprocessors
+p15,$05,$00,$14,p15,0,0
+; Cortex-M3 special registers
+APSR,$05,$00,$15,apsr,0,0
+IPSR,$05,$00,$16,ipsr,0,0
+EPSR,$05,$00,$17,epsr,0,0
+IEPSR,$05,$00,$18,iepsr,0,0
+IAPSR,$05,$00,$19,iapsr,0,0
+EAPSR,$05,$00,$1A,eapsr,0,0
+PSR,$05,$00,$1B,psr,0,0
+MSP,$05,$00,$1C,msp,0,0
+PSP,$05,$00,$1D,psp,0,0
+PRIMASK,$05,$00,$1E,primask,0,0
+BASEPRI,$05,$00,$1F,basepri,0,0
+BASEPRI_MAX,$05,$00,$20,basepri_max,0,0
+FAULTMASK,$05,$00,$21,faultmask,0,0
+CONTROL,$05,$00,$22,control,0,0
+

+ 0 - 7
compiler/arm/armtab.inc

@@ -385,13 +385,6 @@
     code    : #240#2#1;
     flags   : if_fpa
   ),
-  (
-    opcode  : A_MCR;
-    ops     : 2;
-    optypes : (ot_reg32,ot_memory or ot_bits32,ot_none,ot_none);
-    code    : #208#193#1#19#72;
-    flags   : if_arm7
-  ),
   (
     opcode  : A_MLA;
     ops     : 4;

+ 455 - 139
compiler/arm/cgcpu.pas

@@ -32,7 +32,7 @@ unit cgcpu;
        cgbase,cgutils,cgobj,
        aasmbase,aasmcpu,aasmtai,aasmdata,
        parabase,
-       cpubase,cpuinfo,node,cg64f32,rgcpu;
+       cpubase,cpuinfo,cg64f32,rgcpu;
 
 
     type
@@ -114,7 +114,10 @@ unit cgcpu;
         procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle); override;
         { Transform unsupported methods into Internal errors }
         procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: TCGSize; src, dst: TRegister); override;
-      private
+
+        { try to generate optimized 32 Bit multiplication, returns true if successful generated }
+        function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
+
         { clear out potential overflow bits from 8 or 16 bit operations  }
         { the upper 24/16 bits of a register after an operation          }
         procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
@@ -158,6 +161,12 @@ unit cgcpu;
         procedure g_proc_exit(list : TAsmList;parasize : longint;nostackframe:boolean); override;
 
         function handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference; override;
+
+        procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize : tcgsize;reg1, reg2: tregister;shuffle : pmmshuffle); override;
+        procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
+        procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize : tcgsize;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
+        procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize;intreg, mmreg: tregister; shuffle: pmmshuffle); override;
+        procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize;mmreg, intreg: tregister; shuffle : pmmshuffle); override;
       end;
 
       tthumb2cg64farm = class(tcg64farm)
@@ -177,10 +186,10 @@ unit cgcpu;
 
 
     uses
-       globals,verbose,systems,cutils,sysutils,
+       globals,verbose,systems,cutils,
        aopt,aoptcpu,
        fmodule,
-       symconst,symsym,
+       symconst,symsym,symtable,
        tgobj,
        procinfo,cpupi,
        paramgr;
@@ -529,11 +538,10 @@ unit cgcpu;
         branchopcode: tasmop;
       begin
         { check not really correct: should only be used for non-Thumb cpus }
-        if (current_settings.cputype<cpu_armv5) or
-           (current_settings.cputype in cpu_thumb2) then
-          branchopcode:=A_BL
+        if CPUARM_HAS_BLX_LABEL in cpu_capabilities[current_settings.cputype] then
+          branchopcode:=A_BLX
         else
-          branchopcode:=A_BLX;
+          branchopcode:=A_BL;
         if target_info.system<>system_arm_darwin then
           if not weak then
             list.concat(taicpu.op_sym(branchopcode,current_asmdata.RefAsmSymbol(s)))
@@ -554,7 +562,7 @@ unit cgcpu;
     procedure tcgarm.a_call_reg(list : TAsmList;reg: tregister);
       begin
         { check not really correct: should only be used for non-Thumb cpus }
-        if (current_settings.cputype<cpu_armv5) then
+        if not(CPUARM_HAS_BLX in cpu_capabilities[current_settings.cputype]) then
           begin
             list.concat(taicpu.op_reg_reg(A_MOV,NR_R14,NR_PC));
             list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,reg));
@@ -654,6 +662,124 @@ unit cgcpu;
         end
       end;
 
+
+    function tcgarm.try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
+      var
+        multiplier : dword;
+        power : longint;
+        shifterop : tshifterop;
+        bitsset : byte;
+        negative : boolean;
+        first : boolean;
+        b,
+        cycles : byte;
+        maxeffort : byte;
+      begin
+        result:=true;
+        cycles:=0;
+        negative:=a<0;
+        shifterop.rs:=NR_NO;
+        shifterop.shiftmode:=SM_LSL;
+        if negative then
+          inc(cycles);
+        multiplier:=dword(abs(a));
+        bitsset:=popcnt(multiplier and $fffffffe);
+
+        { heuristics to estimate how much instructions are reasonable to replace the mul,
+          this is currently based on XScale timings }
+        { in the simplest case, we need a mov to load the constant and a mul to carry out the
+          actual multiplication, this requires min. 1+4 cycles
+
+          because the first shift imm. might cause a stall and because we need more instructions
+          when replacing the mul we generate max. 3 instructions to replace this mul }
+        maxeffort:=3;
+
+        { if the constant is not a shifter op, we need either some mov/mvn/bic/or sequence or
+          a ldr, so generating one more operation to replace this is beneficial }
+        if not(is_shifter_const(dword(a),b)) and not(is_shifter_const(not(dword(a)),b)) then
+          inc(maxeffort);
+
+        { if the upper 5 bits are all set or clear, mul is one cycle faster }
+        if ((dword(a) and $f8000000)=0) or ((dword(a) and $f8000000)=$f8000000) then
+          dec(maxeffort);
+
+        { if the upper 17 bits are all set or clear, mul is another cycle faster }
+        if ((dword(a) and $ffff8000)=0) or ((dword(a) and $ffff8000)=$ffff8000) then
+          dec(maxeffort);
+
+        { most simple cases }
+        if a=1 then
+          a_load_reg_reg(list,OS_32,OS_32,src,dst)
+        else if a=0 then
+          a_load_const_reg(list,OS_32,0,dst)
+        else if a=-1 then
+          a_op_reg_reg(list,OP_NEG,OS_32,src,dst)
+        { add up ?
+
+          basically, one add is needed for each bit being set in the constant factor
+          however, the least significant bit is for free, it can be hidden in the initial
+          instruction
+        }
+        else if (bitsset+cycles<=maxeffort) and
+          (bitsset<=popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)) then
+          begin
+            first:=true;
+            while multiplier<>0 do
+              begin
+                shifterop.shiftimm:=BsrDWord(multiplier);
+                if odd(multiplier) then
+                  begin
+                    list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,shifterop));
+                    dec(multiplier);
+                  end
+                else
+                  if first then
+                    list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
+                  else
+                    list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,src,shifterop));
+                first:=false;
+                dec(multiplier,1 shl shifterop.shiftimm);
+              end;
+            if negative then
+              list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
+          end
+        { subtract from the next greater power of two? }
+        else if popcnt(dword(nextpowerof2(multiplier,power)-multiplier) and $fffffffe)+cycles+1<=maxeffort then
+          begin
+            first:=true;
+            while multiplier<>0 do
+              begin
+                if first then
+                  begin
+                    multiplier:=(1 shl power)-multiplier;
+                    shifterop.shiftimm:=power;
+                  end
+                else
+                  shifterop.shiftimm:=BsrDWord(multiplier);
+
+                if odd(multiplier) then
+                  begin
+                    list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,shifterop));
+                    dec(multiplier);
+                  end
+                else
+                  if first then
+                    list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,shifterop))
+                  else
+                    begin
+                      list.concat(taicpu.op_reg_reg_reg_shifterop(A_SUB,dst,dst,src,shifterop));
+                      dec(multiplier,1 shl shifterop.shiftimm);
+                    end;
+                first:=false;
+              end;
+            if negative then
+              list.concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,0));
+          end
+        else
+          result:=false;
+      end;
+
+
     procedure tcgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
       var
         shift : byte;
@@ -716,9 +842,12 @@ unit cgcpu;
                    ));
                 end
               else}
-              list.concat(setoppostfix(
-                  taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))
-              ));
+                begin
+                  if cgsetflags or setflags then
+                    a_reg_alloc(list,NR_DEFAULTFLAGS);
+                  list.concat(setoppostfix(
+                    taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
+                end;
               if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
                 begin
                   ovloc.loc:=LOC_FLAGS;
@@ -763,6 +892,10 @@ unit cgcpu;
                 so.shiftimm:=l1;
                 list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
               end
+            else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
+              begin
+                { nothing to do on success }
+              end
             { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
               Just using mov x, #0 might allow some easier optimizations down the line. }
             else if (op = OP_AND) and (dword(a)=0) then
@@ -856,6 +989,7 @@ unit cgcpu;
                     end
                   else
                     list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
+                  a_reg_alloc(list,NR_DEFAULTFLAGS);
                   if op=OP_IMUL then
                     begin
                       shifterop_reset(so);
@@ -888,9 +1022,12 @@ unit cgcpu;
                 end;
             end;
           else
-            list.concat(setoppostfix(
-                taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))
-              ));
+            begin
+              if cgsetflags or setflags then
+                a_reg_alloc(list,NR_DEFAULTFLAGS);
+              list.concat(setoppostfix(
+                taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
+            end;
         end;
         maybeadjustresult(list,op,size,dst);
       end;
@@ -944,51 +1081,7 @@ unit cgcpu;
             )
            ) then
           begin
-            reference_reset(tmpref,4);
-
-            { load symbol }
-            tmpreg:=getintregister(list,OS_INT);
-            if assigned(ref.symbol) then
-              begin
-                current_asmdata.getjumplabel(l);
-                cg.a_label(current_procinfo.aktlocaldata,l);
-                tmpref.symboldata:=current_procinfo.aktlocaldata.last;
-
-                current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
-
-                { load consts entry }
-                tmpref.symbol:=l;
-                tmpref.base:=NR_R15;
-                list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
-
-                { in case of LDF/STF, we got rid of the NR_R15 }
-                if is_pc(ref.base) then
-                  ref.base:=NR_NO;
-                if is_pc(ref.index) then
-                  ref.index:=NR_NO;
-              end
-            else
-              a_load_const_reg(list,OS_ADDR,ref.offset,tmpreg);
-
-            if (ref.base<>NR_NO) then
-              begin
-                if ref.index<>NR_NO then
-                  begin
-                    list.concat(taicpu.op_reg_reg_reg(A_ADD,tmpreg,ref.base,tmpreg));
-                    ref.base:=tmpreg;
-                  end
-                else
-                  begin
-                    ref.index:=tmpreg;
-                    ref.shiftimm:=0;
-                    ref.signindex:=1;
-                    ref.shiftmode:=SM_None;
-                  end;
-              end
-            else
-              ref.base:=tmpreg;
-            ref.offset:=0;
-            ref.symbol:=nil;
+            fixref(list,ref);
           end;
 
         { fold if there is base, index and offset, however, don't fold
@@ -1366,6 +1459,7 @@ unit cgcpu;
         tmpreg : tregister;
         b : byte;
       begin
+        a_reg_alloc(list,NR_DEFAULTFLAGS);
         if is_shifter_const(a,b) then
           list.concat(taicpu.op_reg_const(A_CMP,reg,a))
         { CMN reg,0 and CMN reg,$80000000 are different from CMP reg,$ffffffff
@@ -1379,18 +1473,40 @@ unit cgcpu;
             list.concat(taicpu.op_reg_reg(A_CMP,reg,tmpreg));
           end;
         a_jmp_cond(list,cmp_op,l);
+        a_reg_dealloc(list,NR_DEFAULTFLAGS);
       end;
 
 
     procedure tcgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: TCGSize; src, dst: TRegister);
       begin
-        Comment(V_Error,'tcgarm.a_bit_scan_reg_reg method not implemented');
+        if reverse then
+          begin
+            list.Concat(taicpu.op_reg_reg(A_CLZ,dst,src));
+            list.Concat(taicpu.op_reg_reg_const(A_RSB,dst,dst,31));
+            list.Concat(taicpu.op_reg_reg_const(A_AND,dst,dst,255));
+          end
+        { it is decided during the compilation of the system unit if this code is used or not 
+          so no additional check for rbit is needed                                           }
+        else
+          begin
+            list.Concat(taicpu.op_reg_reg(A_RBIT,dst,src));
+            list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));            
+            a_reg_alloc(list,NR_DEFAULTFLAGS);
+            list.Concat(taicpu.op_reg_const(A_CMP,dst,32));
+            if current_settings.cputype in cpu_thumb2 then
+              list.Concat(taicpu.op_cond(A_IT, C_EQ));
+            list.Concat(setcondition(taicpu.op_reg_const(A_MOV,dst,$ff),C_EQ));
+            a_reg_dealloc(list,NR_DEFAULTFLAGS);
+          end;
       end;
 
+
     procedure tcgarm.a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel);
       begin
+        a_reg_alloc(list,NR_DEFAULTFLAGS);
         list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
         a_jmp_cond(list,cmp_op,l);
+        a_reg_dealloc(list,NR_DEFAULTFLAGS);
       end;
 
 
@@ -1808,7 +1924,7 @@ unit cgcpu;
 
                 if regs=[] then
                   begin
-                    if (current_settings.cputype<cpu_armv5) then
+                    if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
                       list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
                     else
                       list.concat(taicpu.op_reg(A_BX,NR_R14))
@@ -1829,7 +1945,7 @@ unit cgcpu;
                 list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_EA));
               end;
           end
-        else if (current_settings.cputype<cpu_armv5) then
+        else if not(CPUARM_HAS_BX in cpu_capabilities[current_settings.cputype]) then
           list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14))
         else
           list.concat(taicpu.op_reg(A_BX,NR_R14))
@@ -1899,6 +2015,7 @@ unit cgcpu;
         tmpreg : tregister;
         tmpref : treference;
         l : tasmlabel;
+        indirection_done : boolean;
       begin
         { absolute symbols can't be handled directly, we've to store the symbol reference
           in the text segment and access it pc relative
@@ -1916,16 +2033,42 @@ unit cgcpu;
         cg.a_label(current_procinfo.aktlocaldata,l);
         tmpref.symboldata:=current_procinfo.aktlocaldata.last;
 
+        indirection_done:=false;
         if assigned(ref.symbol) then
-          current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset))
+          begin
+            if (target_info.system=system_arm_darwin) and
+               (ref.symbol.bind in [AB_EXTERNAL,AB_WEAK_EXTERNAL,AB_PRIVATE_EXTERN,AB_COMMON]) then
+              begin
+                tmpreg:=g_indirect_sym_load(list,ref.symbol.name,asmsym2indsymflags(ref.symbol));
+                if ref.offset<>0 then
+                  a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
+                indirection_done:=true;
+              end
+            else
+              current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset))
+          end
         else
           current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(ref.offset));
 
         { load consts entry }
-        tmpreg:=getintregister(list,OS_INT);
-        tmpref.symbol:=l;
-        tmpref.base:=NR_PC;
-        list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
+        if not indirection_done then
+          begin
+            tmpreg:=getintregister(list,OS_INT);
+            tmpref.symbol:=l;
+            tmpref.base:=NR_PC;
+            list.concat(taicpu.op_reg_ref(A_LDR,tmpreg,tmpref));
+          end;
+
+        { This routine can be called with PC as base/index in case the offset
+          was too large to encode in a load/store. In that case, the entire
+          absolute expression has been re-encoded in a new constpool entry, and
+          we have to remove the use of PC from the original reference (the code
+          above made everything relative to the value loaded from the new
+          constpool entry) }
+        if is_pc(ref.base) then
+          ref.base:=NR_NO;
+        if is_pc(ref.index) then
+          ref.index:=NR_NO;
 
         if (ref.base<>NR_NO) then
           begin
@@ -1942,8 +2085,8 @@ unit cgcpu;
                   ref.signindex:=1;
                   ref.shiftmode:=SM_None;
                 end
-                else
-                  ref.base:=tmpreg;
+              else
+                ref.base:=tmpreg;
           end
         else
           ref.base:=tmpreg;
@@ -1955,13 +2098,15 @@ unit cgcpu;
     procedure tcgarm.g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
       var
         paraloc1,paraloc2,paraloc3 : TCGPara;
+        pd : tprocdef;
       begin
+        pd:=search_system_proc('MOVE');
         paraloc1.init;
         paraloc2.init;
         paraloc3.init;
-        paramanager.getintparaloc(pocall_default,1,voidpointertype,paraloc1);
-        paramanager.getintparaloc(pocall_default,2,voidpointertype,paraloc2);
-        paramanager.getintparaloc(pocall_default,3,ptrsinttype,paraloc3);
+        paramanager.getintparaloc(pd,1,paraloc1);
+        paramanager.getintparaloc(pd,2,paraloc2);
+        paramanager.getintparaloc(pd,3,paraloc3);
         a_load_const_cgpara(list,OS_SINT,len,paraloc3);
         a_loadaddr_ref_cgpara(list,dest,paraloc2);
         a_loadaddr_ref_cgpara(list,source,paraloc1);
@@ -2009,9 +2154,11 @@ unit cgcpu;
           dstref.offset:=size;
           r:=getintregister(list,size2opsize[size]);
           a_load_ref_reg(list,size2opsize[size],size2opsize[size],srcref,r);
+          a_reg_alloc(list,NR_DEFAULTFLAGS);
           list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,countreg,countreg,1),PF_S));
           a_load_reg_ref(list,size2opsize[size],size2opsize[size],r,dstref);
           a_jmp_flags(list,F_NE,l);
+          a_reg_dealloc(list,NR_DEFAULTFLAGS);
           srcref.offset:=1;
           dstref.offset:=1;
           case count mod size of
@@ -2247,6 +2394,7 @@ unit cgcpu;
               hflags:=ovloc.resflags;
               inverse_flags(hflags);
               cg.a_jmp_flags(list,hflags,hl);
+              cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
             end;
           else
             internalerror(200409281);
@@ -2713,8 +2861,10 @@ unit cgcpu;
         case op of
           OP_NEG:
             begin
+              cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
               list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
               list.concat(taicpu.op_reg_reg_const(A_RSC,regdst.reghi,regsrc.reghi,0));
+              cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
             end;
           OP_NOT:
             begin
@@ -2786,11 +2936,15 @@ unit cgcpu;
               OP_ADD:
                 begin
                   if is_shifter_const(lo(value),b) then
-                    list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
+                    begin
+                      cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
+                      list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
+                    end
                   else
                     begin
                       tmpreg:=cg.getintregister(list,OS_32);
                       cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
+                      cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
                       list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
                     end;
 
@@ -2806,11 +2960,15 @@ unit cgcpu;
               OP_SUB:
                 begin
                   if is_shifter_const(lo(value),b) then
-                    list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
+                    begin
+                      cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
+                      list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,lo(value)),PF_S))
+                    end
                   else
                     begin
                       tmpreg:=cg.getintregister(list,OS_32);
                       cg.a_load_const_reg(list,OS_32,lo(value),tmpreg);
+                      cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
                       list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
                     end;
 
@@ -2849,11 +3007,15 @@ unit cgcpu;
               OP_ADD:
                 begin
                   if is_shifter_const(aint(lo(value)),b) then
-                    list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
+                    begin
+                      cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
+                      list.concat(setoppostfix(taicpu.op_reg_reg_const(A_ADD,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
+                    end
                   else
                     begin
                       tmpreg:=cg.getintregister(list,OS_32);
                       cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
+                      cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
                       list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
                     end;
 
@@ -2869,11 +3031,15 @@ unit cgcpu;
               OP_SUB:
                 begin
                   if is_shifter_const(aint(lo(value)),b) then
-                    list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
+                    begin
+                      cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
+                      list.concat(setoppostfix(taicpu.op_reg_reg_const(A_SUB,regdst.reglo,regsrc.reglo,aint(lo(value))),PF_S))
+                    end
                   else
                     begin
                       tmpreg:=cg.getintregister(list,OS_32);
                       cg.a_load_const_reg(list,OS_32,aint(lo(value)),tmpreg);
+                      cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
                       list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc.reglo,tmpreg),PF_S));
                     end;
 
@@ -2906,11 +3072,13 @@ unit cgcpu;
             case op of
               OP_ADD:
                 begin
+                  cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
                   list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
                   list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi),PF_S));
                 end;
               OP_SUB:
                 begin
+                  cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
                   list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
                   list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi),PF_S));
                 end;
@@ -2939,13 +3107,17 @@ unit cgcpu;
                 end;
               OP_ADD:
                 begin
+                  cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
                   list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ADD,regdst.reglo,regsrc1.reglo,regsrc2.reglo),PF_S));
                   list.concat(taicpu.op_reg_reg_reg(A_ADC,regdst.reghi,regsrc1.reghi,regsrc2.reghi));
+                  cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
                 end;
               OP_SUB:
                 begin
+                  cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
                   list.concat(setoppostfix(taicpu.op_reg_reg_reg(A_SUB,regdst.reglo,regsrc2.reglo,regsrc1.reglo),PF_S));
                   list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,regsrc2.reghi,regsrc1.reghi));
+                  cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
                 end;
               else
                 internalerror(2003083101);
@@ -2967,10 +3139,17 @@ unit cgcpu;
           rg[R_INTREGISTER]:=trgintcputhumb2.create(R_INTREGISTER,R_SUBWHOLE,
               [RS_R0,RS_R1,RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,
                RS_R10,RS_R12,RS_R14],first_int_imreg,[]);
-        rg[R_FPUREGISTER]:=trgcputhumb2.create(R_FPUREGISTER,R_SUBNONE,
+        rg[R_FPUREGISTER]:=trgcpu.create(R_FPUREGISTER,R_SUBNONE,
             [RS_F0,RS_F1,RS_F2,RS_F3,RS_F4,RS_F5,RS_F6,RS_F7],first_fpu_imreg,[]);
-        rg[R_MMREGISTER]:=trgcputhumb2.create(R_MMREGISTER,R_SUBNONE,
-            [RS_S0,RS_S1,RS_R2,RS_R3,RS_R4,RS_S31],first_mm_imreg,[]);
+
+        if current_settings.fputype=fpu_fpv4_s16 then
+          rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBFD,
+              [RS_D0,RS_D1,RS_D2,RS_D3,RS_D4,RS_D5,RS_D6,RS_D7,
+               RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15
+              ],first_mm_imreg,[])
+        else
+          rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBNONE,
+              [RS_S0,RS_S1,RS_R2,RS_R3,RS_R4,RS_S31],first_mm_imreg,[]);
       end;
 
 
@@ -3004,24 +3183,12 @@ unit cgcpu;
        begin
           if not(size in [OS_8,OS_S8,OS_16,OS_S16,OS_32,OS_S32]) then
             internalerror(2002090902);
-          if is_shifter_const(a,imm_shift) then
+          if is_thumb_imm(a) then
             list.concat(taicpu.op_reg_const(A_MOV,reg,a))
-          { loading of constants with mov and orr }
-          else if (is_shifter_const(a-byte(a),imm_shift)) then
-            begin
-              list.concat(taicpu.op_reg_const(A_MOV,reg,a-byte(a)));
-              list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,byte(a)));
-            end
-          else if (is_shifter_const(a-word(a),imm_shift)) and (is_shifter_const(word(a),imm_shift)) then
-            begin
-              list.concat(taicpu.op_reg_const(A_MOV,reg,a-word(a)));
-              list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,word(a)));
-            end
-          else if (is_shifter_const(a-(dword(a) shl 8) shr 8,imm_shift)) and (is_shifter_const((dword(a) shl 8) shr 8,imm_shift)) then
-            begin
-              list.concat(taicpu.op_reg_const(A_MOV,reg,a-(dword(a) shl 8) shr 8));
-              list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,(dword(a) shl 8) shr 8));
-            end
+          else if is_thumb_imm(not(a)) then
+            list.concat(taicpu.op_reg_const(A_MVN,reg,not(a)))
+          else if (a and $FFFF)=a then
+            list.concat(taicpu.op_reg_const(A_MOVW,reg,a))
           else
             begin
                reference_reset(hr,4);
@@ -3032,6 +3199,7 @@ unit cgcpu;
                current_procinfo.aktlocaldata.concat(tai_const.Create_32bit(longint(a)));
 
                hr.symbol:=l;
+               hr.base:=NR_PC;
                list.concat(taicpu.op_reg_ref(A_LDR,reg,hr));
             end;
        end;
@@ -3266,13 +3434,18 @@ unit cgcpu;
                 begin
                   tmpreg:=getintregister(list,size);
                   a_load_const_reg(list, size, a, tmpreg);
-                  list.concat(setoppostfix(taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))
-                   ));
+                  if cgsetflags or setflags then
+                    a_reg_alloc(list,NR_DEFAULTFLAGS);
+                  list.concat(setoppostfix(
+                    taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmop[op],dst,src,tmpreg),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
                 end
               else
-              list.concat(setoppostfix(
-                  taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))
-              ));
+                begin
+                  if cgsetflags or setflags then
+                    a_reg_alloc(list,NR_DEFAULTFLAGS);
+                  list.concat(setoppostfix(
+                    taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,a),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
+                end;
               if (cgsetflags or setflags) and (size in [OS_8,OS_16,OS_32]) then
                 begin
                   ovloc.loc:=LOC_FLAGS;
@@ -3307,6 +3480,35 @@ unit cgcpu;
                 so.shiftimm:=l1;
                 list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
               end
+            { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
+            else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
+              begin
+                if l1>32 then{does this ever happen?}
+                  internalerror(201205181);
+                shifterop_reset(so);
+                so.shiftmode:=SM_LSL;
+                so.shiftimm:=l1;
+                list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
+              end
+            else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
+              begin
+                { nothing to do on success }
+              end
+            { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
+              Just using mov x, #0 might allow some easier optimizations down the line. }
+            else if (op = OP_AND) and (dword(a)=0) then
+              list.concat(taicpu.op_reg_const(A_MOV,dst,0))
+            { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
+            else if (op = OP_AND) and (not(dword(a))=0) then
+              list.concat(taicpu.op_reg_reg(A_MOV,dst,src))
+            { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
+              broader range of shifterconstants.}
+            {else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
+              list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))}
+            else if (op = OP_AND) and is_thumb_imm(a) then
+              list.concat(taicpu.op_reg_reg_const(A_MOV,dst,src,dword(a)))
+            else if (op = OP_AND) and is_thumb_imm(not(dword(a))) then
+              list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
             else
               begin
                 tmpreg:=getintregister(list,size);
@@ -3374,6 +3576,7 @@ unit cgcpu;
                         end
                       else
                         list.concat(taicpu.op_reg_reg_reg_reg(asmop,dst,overflowreg,src2,src1));
+                      a_reg_alloc(list,NR_DEFAULTFLAGS);
                       if op=OP_IMUL then
                         begin
                            shifterop_reset(so);
@@ -3406,9 +3609,12 @@ unit cgcpu;
                    end;
               end;
            else
-              list.concat(setoppostfix(
-                   taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmopThumb2[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))
-                ));
+             begin
+               if cgsetflags or setflags then
+                 a_reg_alloc(list,NR_DEFAULTFLAGS);
+               list.concat(setoppostfix(
+                 taicpu.op_reg_reg_reg(op_reg_reg_opcg2asmopThumb2[op],dst,src2,src1),toppostfix(ord(cgsetflags or setflags)*ord(PF_S))));
+             end;
         end;
         maybeadjustresult(list,op,size,dst);
       end;
@@ -3587,55 +3793,42 @@ unit cgcpu;
                 inc(stackmisalignment,4);
 
             stackmisalignment:=stackmisalignment mod current_settings.alignment.localalignmax;
-            if (current_procinfo.framepointer=NR_STACK_POINTER_REG) then
+
+            LocalSize:=current_procinfo.calc_stackframe_size;
+            if (LocalSize<>0) or
+               ((stackmisalignment<>0) and
+                ((pi_do_call in current_procinfo.flags) or
+                 (po_assembler in current_procinfo.procdef.procoptions))) then
               begin
-                LocalSize:=current_procinfo.calc_stackframe_size;
-                if (LocalSize<>0) or
-                   ((stackmisalignment<>0) and
-                    ((pi_do_call in current_procinfo.flags) or
-                     (po_assembler in current_procinfo.procdef.procoptions))) then
+                localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
+                if not(is_shifter_const(LocalSize,shift)) then
                   begin
-                    localsize:=align(localsize+stackmisalignment,current_settings.alignment.localalignmax)-stackmisalignment;
-                    if not(is_shifter_const(LocalSize,shift)) then
-                      begin
-                        a_reg_alloc(list,NR_R12);
-                        a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
-                        list.concat(taicpu.op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R12));
-                        a_reg_dealloc(list,NR_R12);
-                      end
-                    else
-                      begin
-                        list.concat(taicpu.op_reg_reg_const(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,LocalSize));
-                      end;
-                  end;
-
-                if regs=[] then
-                  list.concat(taicpu.op_reg_reg(A_MOV,NR_R15,NR_R14))
+                    a_reg_alloc(list,NR_R12);
+                    a_load_const_reg(list,OS_ADDR,LocalSize,NR_R12);
+                    list.concat(taicpu.op_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_R12));
+                    a_reg_dealloc(list,NR_R12);
+                  end
                 else
                   begin
-                    reference_reset(ref,4);
-                    ref.index:=NR_STACK_POINTER_REG;
-                    ref.addressmode:=AM_PREINDEXED;
-                    list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
+                    a_reg_dealloc(list,NR_R12);
+                    list.concat(taicpu.op_reg_const(A_ADD,NR_STACK_POINTER_REG,LocalSize));
                   end;
-              end
+              end;
+
+            if regs=[] then
+              list.concat(taicpu.op_reg_reg(A_MOV,NR_R15,NR_R14))
             else
               begin
-                { restore int registers and return }
-                list.concat(taicpu.op_reg_reg(A_MOV, NR_STACK_POINTER_REG, NR_FRAME_POINTER_REG));
-                { Add 4 to SP to make it point to an "imaginary PC" which the paramanager assumes is there(for normal ARM) }
-                list.concat(taicpu.op_reg_const(A_ADD, NR_STACK_POINTER_REG, 4));
-
                 reference_reset(ref,4);
                 ref.index:=NR_STACK_POINTER_REG;
-                list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_DB));
+                ref.addressmode:=AM_PREINDEXED;
+                list.concat(setoppostfix(taicpu.op_ref_regset(A_LDM,ref,R_INTREGISTER,R_SUBWHOLE,regs),PF_FD));
               end;
           end
         else
           list.concat(taicpu.op_reg_reg(A_MOV,NR_PC,NR_R14));
       end;
 
-
    function Tthumb2cgarm.handle_load_store(list:TAsmList;op: tasmop;oppostfix : toppostfix;reg:tregister;ref: treference):treference;
       var
         tmpreg : tregister;
@@ -3797,6 +3990,127 @@ unit cgcpu;
         Result := ref;
       end;
 
+     procedure Tthumb2cgarm.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
+      var
+        instr: taicpu;
+      begin
+        if (fromsize=OS_F32) and
+          (tosize=OS_F32) then
+          begin
+            instr:=setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32);
+            list.Concat(instr);
+            add_move_instruction(instr);
+          end
+        else if (fromsize=OS_F64) and
+          (tosize=OS_F64) then
+          begin
+            //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,tregister(longint(reg2)+1),tregister(longint(reg1)+1)), PF_F32));
+            //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VMOV,reg2,reg1), PF_F32));
+          end
+        else if (fromsize=OS_F32) and
+          (tosize=OS_F64) then
+          //list.Concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,reg2,reg1), PF_F32))
+          begin
+            //list.concat(nil);
+          end;
+      end;
+
+     procedure Tthumb2cgarm.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
+      var
+        href: treference;
+        tmpreg: TRegister;
+        so: tshifterop;
+      begin
+        href:=ref;
+
+        if (href.base<>NR_NO) and
+          (href.index<>NR_NO) then
+          begin
+            tmpreg:=getintregister(list,OS_INT);
+            if href.shiftmode<>SM_None then
+              begin
+                so.rs:=href.index;
+                so.shiftimm:=href.shiftimm;
+                so.shiftmode:=href.shiftmode;
+                list.concat(taicpu.op_reg_reg_shifterop(A_ADD,tmpreg,href.base,so));
+              end
+            else
+              a_op_reg_reg_reg(list,OP_ADD,OS_INT,href.index,href.base,tmpreg);
+
+            reference_reset_base(href,tmpreg,href.offset,0);
+          end;
+
+        if assigned(href.symbol) then
+          begin
+            tmpreg:=getintregister(list,OS_INT);
+            a_loadaddr_ref_reg(list,href,tmpreg);
+
+            reference_reset_base(href,tmpreg,0,0);
+          end;
+
+        if fromsize=OS_F32 then
+          list.Concat(setoppostfix(taicpu.op_reg_ref(A_VLDR,reg,href), PF_F32))
+        else
+          list.Concat(setoppostfix(taicpu.op_reg_ref(A_VLDR,reg,href), PF_F64));
+      end;
+
+     procedure Tthumb2cgarm.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
+      var
+        href: treference;
+        so: tshifterop;
+        tmpreg: TRegister;
+      begin
+        href:=ref;
+
+        if (href.base<>NR_NO) and
+          (href.index<>NR_NO) then
+          begin
+            tmpreg:=getintregister(list,OS_INT);
+            if href.shiftmode<>SM_None then
+              begin
+                so.rs:=href.index;
+                so.shiftimm:=href.shiftimm;
+                so.shiftmode:=href.shiftmode;
+                list.concat(taicpu.op_reg_reg_shifterop(A_ADD,tmpreg,href.base,so));
+              end
+            else
+              a_op_reg_reg_reg(list,OP_ADD,OS_INT,href.index,href.base,tmpreg);
+
+            reference_reset_base(href,tmpreg,href.offset,0);
+          end;
+
+        if assigned(href.symbol) then
+          begin
+            tmpreg:=getintregister(list,OS_INT);
+            a_loadaddr_ref_reg(list,href,tmpreg);
+
+            reference_reset_base(href,tmpreg,0,0);
+          end;
+
+        if fromsize=OS_F32 then
+          list.Concat(setoppostfix(taicpu.op_reg_ref(A_VSTR,reg,href), PF_32))
+        else
+          list.Concat(setoppostfix(taicpu.op_reg_ref(A_VSTR,reg,href), PF_64));
+      end;
+
+     procedure Tthumb2cgarm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
+      begin
+        if //(shuffle=nil) and
+          (tosize=OS_F32) then
+          list.Concat(taicpu.op_reg_reg(A_VMOV,mmreg,intreg))
+        else
+          internalerror(2012100813);
+      end;
+
+     procedure Tthumb2cgarm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
+      begin
+        if //(shuffle=nil) and
+          (fromsize=OS_F32) then
+          list.Concat(taicpu.op_reg_reg(A_VMOV,intreg,mmreg))
+        else
+          internalerror(2012100814);
+      end;
+
 
     procedure tthumb2cg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
       var tmpreg: tregister;
@@ -3804,10 +4118,12 @@ unit cgcpu;
         case op of
           OP_NEG:
             begin
+              cg.a_reg_alloc(list,NR_DEFAULTFLAGS);
               list.concat(setoppostfix(taicpu.op_reg_reg_const(A_RSB,regdst.reglo,regsrc.reglo,0),PF_S));
               tmpreg:=cg.getintregister(list,OS_32);
               list.concat(taicpu.op_reg_const(A_MOV,tmpreg,0));
               list.concat(taicpu.op_reg_reg_reg(A_SBC,regdst.reghi,tmpreg,regsrc.reghi));
+              cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
             end;
           else
             inherited a_op64_reg_reg(list, op, size, regsrc, regdst);

+ 95 - 7
compiler/arm/cpubase.pas

@@ -25,15 +25,15 @@
 }
 unit cpubase;
 
+{$define USEINLINE}
+
 {$i fpcdefs.inc}
 
   interface
 
     uses
-      cutils,cclasses,
       globtype,globals,
       cpuinfo,
-      aasmbase,
       cgbase
       ;
 
@@ -44,6 +44,9 @@ unit cpubase;
 
     type
       TAsmOp= {$i armop.inc}
+      {This is a bit of a hack, because there are more than 256 ARM Assembly Ops
+       But FPC currently can't handle more than 256 elements in a set.}
+      TCommonAsmOps = Set of A_None .. A_UQASX;
 
       { This should define the array of instructions as string }
       op2strtable=array[tasmop] of string[11];
@@ -134,7 +137,11 @@ unit cpubase;
         { multiple load/store vfp address modes }
         PF_IAD,PF_DBD,PF_FDD,PF_EAD,
         PF_IAS,PF_DBS,PF_FDS,PF_EAS,
-        PF_IAX,PF_DBX,PF_FDX,PF_EAX
+        PF_IAX,PF_DBX,PF_FDX,PF_EAX,
+        { FPv4 postfixes }
+        PF_32,PF_64,PF_F32,PF_F64,
+        PF_F32S32,PF_F32U32,
+        PF_S32F32,PF_U32F32
       );
 
       TOpPostfixes = set of TOpPostfix;
@@ -147,14 +154,17 @@ unit cpubase;
         PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,PF_None,
         PF_S,PF_D,PF_E,PF_None,PF_None);
 
-      oppostfix2str : array[TOpPostfix] of string[3] = ('',
+      oppostfix2str : array[TOpPostfix] of string[8] = ('',
         's',
         'd','e','p','ep',
         'b','sb','bt','h','sh','t',
         'ia','ib','da','db','fd','fa','ed','ea',
         'iad','dbd','fdd','ead',
         'ias','dbs','fds','eas',
-        'iax','dbx','fdx','eax');
+        'iax','dbx','fdx','eax',
+        '.32','.64','.f32','.f64',
+        '.f32.s32','.f32.u32',
+        '.s32.f32','.u32.f32');
 
       roundingmode2str : array[TRoundingMode] of string[1] = ('',
         'p','m','z');
@@ -210,12 +220,15 @@ unit cpubase;
       tcpumodeflag = (mfA, mfI, mfF);
       tcpumodeflags = set of tcpumodeflag;
 
+      tspecialregflag = (srC, srX, srS, srF);
+      tspecialregflags = set of tspecialregflag;
+
 {*****************************************************************************
                                  Constants
 *****************************************************************************}
 
     const
-      max_operands = 4;
+      max_operands = 6;
 
       maxintregs = 15;
       maxfpuregs = 8;
@@ -291,6 +304,9 @@ unit cpubase;
       { Offset where the parent framepointer is pushed }
       PARENT_FRAMEPOINTER_OFFSET = 0;
 
+      NR_DEFAULTFLAGS = NR_CPSR;
+      RS_DEFAULTFLAGS = RS_CPSR;
+
       { Low part of 64bit return value }
       function NR_FUNCTION_RESULT64_LOW_REG: tregister;{$ifdef USEINLINE}inline;{$endif USEINLINE}
       function RS_FUNCTION_RESULT64_LOW_REG: shortint;{$ifdef USEINLINE}inline;{$endif USEINLINE}
@@ -347,9 +363,13 @@ unit cpubase;
     function is_pc(const r : tregister) : boolean; {$ifdef USEINLINE}inline;{$endif USEINLINE}
 
     function is_shifter_const(d : aint;var imm_shift : byte) : boolean;
+    function is_thumb_imm(d : aint) : boolean; { Doesn't handle ROR_C detection }
     function split_into_shifter_const(value : aint;var imm1: dword; var imm2: dword):boolean;
     function dwarf_reg(r:tregister):shortint;
 
+    function IsIT(op: TAsmOp) : boolean;
+    function GetITLevels(op: TAsmOp) : longint;
+
   implementation
 
     uses
@@ -357,7 +377,7 @@ unit cpubase;
 
 
     const
-      std_regname_table : array[tregisterindex] of string[7] = (
+      std_regname_table : TRegNameTable = (
         {$i rarmstd.inc}
       );
 
@@ -529,6 +549,43 @@ unit cpubase;
         result:=false;
       end;
 
+    function is_thumb_imm(d: aint): boolean;
+      var
+        t : aint;
+        i : longint;
+        imm : byte;
+      begin
+        result:=false;
+        if (d and $FF) = d then
+          begin
+            result:=true;
+            exit;
+          end;
+        if ((d and $FF00FF00) = 0) and
+           ((d shr 16)=(d and $FFFF)) then
+          begin
+            result:=true;
+            exit;
+          end;
+        if ((d and $00FF00FF) = 0) and
+           ((d shr 16)=(d and $FFFF)) then
+          begin
+            result:=true;
+            exit;
+          end;
+        if ((d shr 16)=(d and $FFFF)) and
+           ((d shr 8)=(d and $FF)) then
+          begin
+            result:=true;
+            exit;
+          end;
+        if is_shifter_const(d,imm) then
+          begin
+            result:=true;
+            exit;
+          end;
+      end;
+
     function split_into_shifter_const(value : aint;var imm1: dword; var imm2: dword) : boolean;
       var
         d, i, i2: Dword;
@@ -595,4 +652,35 @@ unit cpubase;
         result:=RS_R0;
     end;
 
+    function IsIT(op: TAsmOp) : boolean;
+      begin
+        case op of
+          A_IT,
+          A_ITE, A_ITT,
+          A_ITEE, A_ITTE, A_ITET, A_ITTT,
+          A_ITEEE, A_ITTEE, A_ITETE, A_ITTTE,
+          A_ITEET, A_ITTET, A_ITETT, A_ITTTT:
+            result:=true;
+        else
+          result:=false;
+        end;
+      end;
+
+    function GetITLevels(op: TAsmOp) : longint;
+      begin
+        case op of
+          A_IT:
+            result:=1;
+          A_ITE, A_ITT:
+            result:=2;
+          A_ITEE, A_ITTE, A_ITET, A_ITTT:
+            result:=3;
+          A_ITEEE, A_ITTEE, A_ITETE, A_ITTTE,
+          A_ITEET, A_ITTET, A_ITETT, A_ITTTT:
+            result:=4;
+        else
+          result:=0;
+        end;
+      end;
+
 end.

+ 882 - 0
compiler/arm/cpuelf.pas

@@ -0,0 +1,882 @@
+{
+    Copyright (c) 2012 by Sergei Gorelkin
+
+    Includes ELF-related code specific to ARM
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit cpuelf;
+
+interface
+
+{$i fpcdefs.inc}
+
+implementation
+
+  uses
+    globtype,cutils,cclasses,
+    verbose, elfbase,
+    systems,aasmbase,ogbase,ogelf,assemble;
+
+  type
+    TElfExeOutputARM=class(TElfExeOutput)
+    private
+      procedure MaybeWriteGOTEntry(reltyp:byte;relocval:aint;objsym:TObjSymbol);
+    protected
+      procedure WriteFirstPLTEntry;override;
+      procedure WritePLTEntry(exesym:TExeSymbol);override;
+      procedure WriteIndirectPLTEntry(exesym:TExeSymbol);override;
+      procedure GOTRelocPass1(objsec:TObjSection;var idx:longint);override;
+      procedure DoRelocationFixup(objsec:TObjSection);override;
+    end;
+
+  const
+    { Relocation types }
+    R_ARM_NONE  = 0;
+    R_ARM_PC24  = 1;       // deprecated
+    R_ARM_ABS32 = 2;
+    R_ARM_REL32 = 3;
+    R_ARM_LDR_PC_G0 = 4;
+    R_ARM_ABS16 = 5;
+    R_ARM_ABS12 = 6;
+    R_ARM_THM_ABS5 = 7;
+    R_ARM_ABS8  = 8;
+    R_ARM_SBREL32 = 9;
+    R_ARM_THM_CALL = 10;
+    R_ARM_THM_PC8 = 11;
+    R_ARM_BREL_ADJ = 12;
+    R_ARM_TLS_DESC = 13;
+    { 14,15,16 are obsolete }
+    R_ARM_TLS_DTPMOD32 = 17;
+    R_ARM_TLS_DTPOFF32 = 18;
+    R_ARM_TLS_TPOFF32 = 19;
+    R_ARM_COPY = 20;
+    R_ARM_GLOB_DAT = 21;
+    R_ARM_JUMP_SLOT = 22;
+    R_ARM_RELATIVE = 23;
+    R_ARM_GOTOFF32 = 24;
+    R_ARM_BASE_PREL = 25;
+    R_ARM_GOT_BREL = 26;
+    R_ARM_PLT32 = 27;      // deprecated
+    R_ARM_CALL = 28;
+    R_ARM_JUMP24 = 29;
+    R_ARM_THM_JUMP24 = 30;
+    R_ARM_BASE_ABS = 31;
+    { 32,33,34 are obsolete }
+    R_ARM_LDR_SBREL_11_0 = 35;    // deprecated
+    R_ARM_ALU_SBREL_19_12 = 36;   // deprecated
+    R_ARM_ALU_SBREL_27_20 = 37;   // deprecated
+    R_ARM_TARGET1 = 38;
+    R_ARM_SBREL31 = 39;           // deprecated
+    R_ARM_V4BX = 40;
+    R_ARM_TARGET2 = 41;
+    R_ARM_PREL31 = 42;
+    R_ARM_MOVW_ABS_NC = 43;
+    R_ARM_MOVT_ABS = 44;
+    R_ARM_MOVW_PREL_NC = 45;
+    R_ARM_MOVT_PREL = 46;
+    R_ARM_THM_MOVW_ABS_NC = 47;
+    R_ARM_THM_MOVT_ABS    = 48;
+    R_ARM_THM_MOVW_PREL_NC = 49;
+    R_ARM_THM_MOVT_PREL = 50;
+    R_ARM_THM_JUMP19 = 51;
+    R_ARM_THM_JUMP6 = 52;
+    R_ARM_THM_ALU_PREL_11_0 = 53;
+    R_ARM_THM_PC12     = 54;
+    R_ARM_ABS32_NOI    = 55;
+    R_ARM_REL32_NOI    = 56;
+    R_ARM_ALU_PC_G0_NC = 57;
+    R_ARM_ALU_PC_G0    = 58;
+    R_ARM_ALU_PC_G1_NC = 59;
+    R_ARM_ALU_PC_G1    = 60;
+    R_ARM_ALU_PC_G2    = 61;
+    R_ARM_LDR_PC_G1    = 62;
+    R_ARM_LDR_PC_G2    = 63;
+    R_ARM_LDRS_PC_G0   = 64;
+    R_ARM_LDRS_PC_G1   = 65;
+    R_ARM_LDRS_PC_G2   = 66;
+    R_ARM_LDC_PC_G0    = 67;
+    R_ARM_LDC_PC_G1    = 68;
+    R_ARM_LDC_PC_G2    = 69;
+    R_ARM_ALU_SB_G0_NC = 70;
+    R_ARM_ALU_SB_G0    = 71;
+    R_ARM_ALU_SB_G1_NC = 72;
+    R_ARM_ALU_SB_G1    = 73;
+    R_ARM_ALU_SB_G2    = 74;
+    R_ARM_LDR_SB_G0    = 75;
+    R_ARM_LDR_SB_G1    = 76;
+    R_ARM_LDR_SB_G2    = 77;
+    R_ARM_LDRS_SB_G0   = 78;
+    R_ARM_LDRS_SB_G1   = 79;
+    R_ARM_LDRS_SB_G2   = 80;
+    R_ARM_LDC_SB_G0    = 81;
+    R_ARM_LDC_SB_G1    = 82;
+    R_ARM_LDC_SB_G2    = 83;
+    R_ARM_MOVW_BREL_NC = 84;
+    R_ARM_MOVT_BREL    = 85;
+    R_ARM_MOVW_BREL    = 86;
+    R_ARM_THM_MOVW_BREL_NC = 87;
+    R_ARM_THM_MOVT_BREL = 88;
+    R_ARM_THM_MOVW_BREL = 89;
+    R_ARM_TLS_GOTDESC   = 90;
+    R_ARM_TLS_CALL      = 91;
+    R_ARM_TLS_DESCSEQ   = 92;
+    R_ARM_THM_TLS_CALL  = 93;
+    R_ARM_PLT32_ABS     = 94;
+    R_ARM_GOT_ABS = 95;
+    R_ARM_GOT_PREL = 96;
+    R_ARM_GOT_BREL12 = 97;
+    R_ARM_GOTOFF12 = 98;
+    R_ARM_GOTRELAX = 99;
+    R_ARM_GNU_VTENTRY = 100;   // deprecated - old C++ abi
+    R_ARM_GNU_VTINHERIT = 101; // deprecated - old C++ abi
+    R_ARM_THM_JUMP11 = 102;
+    R_ARM_THM_JUMP8  = 103;
+    R_ARM_TLS_GD32   = 104;
+    R_ARM_TLS_LDM32  = 105;
+    R_ARM_TLS_LDO32  = 106;
+    R_ARM_TLS_IE32   = 107;
+    R_ARM_TLS_LE32   = 108;
+    R_ARM_TLS_LDO12  = 109;
+    R_ARM_TLS_LE12   = 110;
+    R_ARM_TLS_IE12GP = 111;
+    { 112-127 are for private experiments }
+    { 128 is obsolete }
+    R_ARM_THM_TLS_DESCSEQ = 129;
+    R_ARM_IRELATIVE = 160;
+
+    { Section types }
+    SHT_ARM_EXIDX          = $70000001;
+    SHT_ARM_PREEMPTMAP     = $70000002;
+    SHT_ARM_ATTRIBUTES     = $70000003;
+    SHT_ARM_DEBUGOVERLAY   = $70000004;
+    SHT_ARM_OVERLAYSECTION = $70000005;
+
+    TCB_SIZE = 8;
+
+  { Using short identifiers to save typing. This ARM thing has more relocations
+    than it has instructions... }
+  const
+    g0=1;
+    g1=2;
+    g2=3;
+    gpmask=3;
+    pc=4;
+    nc=8;
+    thm=16;
+
+  type
+    TArmRelocProp=record
+      name: PChar;
+      flags: byte;      // bits 0,1: group, bit 2: PC-relative, bit 3: unchecked,
+                        // bit 4: THUMB
+    end;
+
+  const
+    relocprops: array[0..111] of TArmRelocProp = (
+      (name: 'R_ARM_NONE';     flags: 0),                //
+      (name: 'R_ARM_PC24';     flags: pc),               //
+      (name: 'R_ARM_ABS32';    flags: 0),                //
+      (name: 'R_ARM_REL32';    flags: pc),               //
+      (name: 'R_ARM_LDR_PC_G0'; flags: g0+pc),           //
+      (name: 'R_ARM_ABS16';    flags: 0),
+      (name: 'R_ARM_ABS12';    flags: 0),
+      (name: 'R_ARM_THM_ABS5'; flags: thm),
+      (name: 'R_ARM_ABS8';     flags: 0),
+      (name: 'R_ARM_SBREL32';  flags: 0),
+      (name: 'R_ARM_THM_CALL'; flags: thm),
+      (name: 'R_ARM_THM_PC8';  flags: pc+thm),
+      (name: 'R_ARM_BREL_ADJ'; flags: 0),
+      (name: 'R_ARM_TLS_DESC'; flags: 0),
+      (name: 'obsolete(14)';   flags: 0),
+      (name: 'obsolete(15)';   flags: 0),
+      (name: 'obsolete(16)';   flags: 0),
+      (name: 'R_ARM_TLS_DTPMOD32'; flags: 0),
+      (name: 'R_ARM_TLS_DTPOFF32'; flags: 0),
+      (name: 'R_ARM_TLS_TPOFF32';  flags: 0),
+      (name: 'R_ARM_COPY';     flags: 0),
+      (name: 'R_ARM_GLOB_DAT'; flags: 0),
+      (name: 'R_ARM_JUMP_SLOT'; flags: 0),
+      (name: 'R_ARM_RELATIVE'; flags: 0),
+      (name: 'R_ARM_GOTOFF32'; flags: 0),
+      (name: 'R_ARM_BASE_PREL'; flags: pc),              //
+      (name: 'R_ARM_GOT_BREL'; flags: 0),                //
+      (name: 'R_ARM_PLT32';    flags: pc),               //
+      (name: 'R_ARM_CALL';     flags: pc),               //
+      (name: 'R_ARM_JUMP24';   flags: pc),               //
+      (name: 'R_ARM_THM_JUMP24'; flags: thm),
+      (name: 'R_ARM_BASE_ABS'; flags: 0),
+      (name: 'obsolete(32)';   flags: 0),
+      (name: 'obsolete(33)';   flags: 0),
+      (name: 'obsolete(34)';   flags: 0),
+      (name: 'R_ARM_LDR_SBREL_11_0'; flags: g0),
+      (name: 'R_ARM_ALU_SBREL_19_12'; flags: g1),
+      (name: 'R_ARM_ALU_SBREL_27_20'; flags: g2),
+      (name: 'R_ARM_TARGET1';  flags: 0),
+      (name: 'R_ARM_SBREL31';  flags: 0),
+      (name: 'R_ARM_V4BX';     flags: 0),
+      (name: 'R_ARM_TARGET2';  flags: 0),
+      (name: 'R_ARM_PREL31';   flags: 0),
+      (name: 'R_ARM_MOVW_ABS_NC'; flags: nc),
+      (name: 'R_ARM_MOVT_ABS'; flags: 0),
+      (name: 'R_ARM_MOVW_PREL_NC'; flags: nc),
+      (name: 'R_ARM_MOVT_PREL'; flags: 0),
+      (name: 'R_ARM_THM_MOVW_ABS_NC';  flags: nc+thm),
+      (name: 'R_ARM_THM_MOVT_ABS';     flags: thm),
+      (name: 'R_ARM_THM_MOVW_PREL_NC'; flags: nc+thm),
+      (name: 'R_ARM_THM_MOVT_PREL';    flags: thm),
+      (name: 'R_ARM_THM_JUMP19';       flags: thm),
+      (name: 'R_ARM_THM_JUMP6';        flags: thm),
+      (name: 'R_ARM_THM_ALU_PREL_11_0'; flags: thm+pc),
+      (name: 'R_ARM_THM_PC12';         flags: thm+pc),
+      (name: 'R_ARM_ABS32_NOI';    flags: 0),
+      (name: 'R_ARM_REL32_NOI';    flags: pc),
+      (name: 'R_ARM_ALU_PC_G0_NC'; flags: pc+g0+nc),     //
+      (name: 'R_ARM_ALU_PC_G0';    flags: pc+g0),        //
+      (name: 'R_ARM_ALU_PC_G1_NC'; flags: pc+g1+nc),     //
+      (name: 'R_ARM_ALU_PC_G1';    flags: pc+g1),        //
+      (name: 'R_ARM_ALU_PC_G2';    flags: pc+g2),        //
+      (name: 'R_ARM_LDR_PC_G1';    flags: pc+g1),        //
+      (name: 'R_ARM_LDR_PC_G2';    flags: pc+g2),        //
+      (name: 'R_ARM_LDRS_PC_G0';   flags: pc+g0),        //
+      (name: 'R_ARM_LDRS_PC_G1';   flags: pc+g1),        //
+      (name: 'R_ARM_LDRS_PC_G2';   flags: pc+g2),        //
+      (name: 'R_ARM_LDC_PC_G0';    flags: pc+g0),        //
+      (name: 'R_ARM_LDC_PC_G1';    flags: pc+g1),        //
+      (name: 'R_ARM_LDC_PC_G2';    flags: pc+g2),        //
+      (name: 'R_ARM_ALU_SB_G0_NC'; flags: g0+nc),        //
+      (name: 'R_ARM_ALU_SB_G0';    flags: g0),           //
+      (name: 'R_ARM_ALU_SB_G1_NC'; flags: g1+nc),        //
+      (name: 'R_ARM_ALU_SB_G1';    flags: g1),           //
+      (name: 'R_ARM_ALU_SB_G2';    flags: g2),           //
+      (name: 'R_ARM_LDR_SB_G0';    flags: g0),           //
+      (name: 'R_ARM_LDR_SB_G1';    flags: g1),           //
+      (name: 'R_ARM_LDR_SB_G2';    flags: g2),           //
+      (name: 'R_ARM_LDRS_SB_G0';   flags: g0),           //
+      (name: 'R_ARM_LDRS_SB_G1';   flags: g1),           //
+      (name: 'R_ARM_LDRS_SB_G2';   flags: g2),           //
+      (name: 'R_ARM_LDC_SB_G0';    flags: g0),           //
+      (name: 'R_ARM_LDC_SB_G1';    flags: g1),           //
+      (name: 'R_ARM_LDC_SB_G2';    flags: g2),           //
+      (name: 'R_ARM_MOVW_BREL_NC'; flags: nc),
+      (name: 'R_ARM_MOVT_BREL';    flags: 0),
+      (name: 'R_ARM_MOVW_BREL';    flags: 0),
+      (name: 'R_ARM_THM_MOVW_BREL_NC'; flags: nc+thm),
+      (name: 'R_ARM_THM_MOVT_BREL'; flags: thm),
+      (name: 'R_ARM_THM_MOVW_BREL'; flags: thm),
+      (name: 'R_ARM_TLS_GOTDESC';   flags: 0),
+      (name: 'R_ARM_TLS_CALL';      flags: 0),
+      (name: 'R_ARM_TLS_DESCSEQ';   flags: 0),
+      (name: 'R_ARM_THM_TLS_CALL';  flags: 0),
+      (name: 'R_ARM_PLT32_ABS';     flags: 0),
+      (name: 'R_ARM_GOT_ABS';       flags: 0),
+      (name: 'R_ARM_GOT_PREL';      flags: pc),          //
+      (name: 'R_ARM_GOT_BREL12';    flags: 0),
+      (name: 'R_ARM_GOTOFF12';      flags: 0),
+      (name: 'R_ARM_GOTRELAX';      flags: 0),
+      (name: 'R_ARM_GNU_VTENTRY';   flags: 0),
+      (name: 'R_ARM_GNU_VTINHERIT'; flags: 0),
+      (name: 'R_ARM_THM_JUMP11';    flags: thm),
+      (name: 'R_ARM_THM_JUMP8';     flags: thm),
+      (name: 'R_ARM_TLS_GD32';      flags: 0),
+      (name: 'R_ARM_TLS_LDM32';     flags: 0),
+      (name: 'R_ARM_TLS_LDO32';     flags: 0),
+      (name: 'R_ARM_TLS_IE32';      flags: 0),
+      (name: 'R_ARM_TLS_LE32';      flags: 0),
+      (name: 'R_ARM_TLS_LDO12';     flags: 0),
+      (name: 'R_ARM_TLS_LE12';      flags: 0),
+      (name: 'R_ARM_TLS_IE12GP';    flags: 0)
+    );
+
+{****************************************************************************
+                              ELF Target methods
+****************************************************************************}
+
+  function elf_arm_encodereloc(objrel:TObjRelocation):byte;
+    begin
+      case objrel.typ of
+        RELOC_NONE:
+          result:=R_ARM_NONE;
+        RELOC_ABSOLUTE:
+          result:=R_ARM_ABS32;
+        RELOC_RELATIVE:
+          result:=R_ARM_REL32;
+      else
+        result:=0;
+        InternalError(2012110602);
+      end;
+    end;
+
+  function elf_arm_relocname(reltyp:byte):string;
+    begin
+      if reltyp<=high(relocprops) then
+        result:=relocprops[reltyp].name
+      else
+        case reltyp of
+          112..127:
+            result:='R_ARM_PRIVATE_'+tostr(reltyp-112);
+          R_ARM_THM_TLS_DESCSEQ:
+            result:='R_ARM_THM_TLS_DESCSEQ';
+          R_ARM_IRELATIVE:
+            result:='R_ARM_IRELATIVE';
+        else
+          result:='unknown ('+tostr(reltyp)+')';
+        end;
+    end;
+
+  procedure elf_arm_loadreloc(objrel:TObjRelocation);
+    begin
+      if (objrel.ftype=R_ARM_V4BX) then
+        objrel.flags:=objrel.flags or rf_nosymbol;
+    end;
+
+  function elf_arm_loadsection(objinput:TElfObjInput;objdata:TObjData;const shdr:TElfsechdr;shindex:longint):boolean;
+    var
+      secname:string;
+    begin
+      case shdr.sh_type of
+        SHT_ARM_EXIDX,
+        SHT_ARM_PREEMPTMAP,
+        SHT_ARM_ATTRIBUTES:
+          begin
+            objinput.CreateSection(shdr,shindex,objdata,secname);
+            result:=true;
+          end;
+      else
+        writeln(hexstr(shdr.sh_type,8));
+        result:=false;
+      end;
+    end;
+
+{****************************************************************************
+                              TELFExeOutputARM
+****************************************************************************}
+
+  function group_reloc_mask(value:longword;n:longint;out final_residual:longword):longword;
+    var
+      i:longint;
+      g_n:longword;
+      shift:longint;
+    begin
+      result:=0;
+      for i:=0 to n do
+        begin
+          if (value=0) then
+            shift:=0
+          else
+            { MSB in the residual, aligned to a 2-bit boundary }
+            shift:=max(0,(bsrdword(value) and (not 1))-6);
+
+          { Calculate plain g_n and encode it into constant+rotation form }
+          g_n:=value and ($ff shl shift);
+          result:=(g_n shr shift);
+          if (g_n>$FF) then
+            result:=result or ((32-shift) div 2) shl 8;
+
+          { Mask away the processed part of residual }
+          value:=value and (not g_n);
+        end;
+      final_residual:=value;
+    end;
+
+
+  procedure TElfExeOutputARM.MaybeWriteGOTEntry(reltyp:byte;relocval:aint;objsym:TObjSymbol);
+    var
+      gotoff,tmp:aword;
+    begin
+      gotoff:=objsym.exesymbol.gotoffset;
+      if gotoff=0 then
+        InternalError(2012060902);
+
+      { the GOT slot itself, and a dynamic relocation for it }
+      { TODO: only data symbols must get here }
+      if gotoff=gotobjsec.Data.size+sizeof(pint) then
+        begin
+          gotobjsec.write(relocval,sizeof(pint));
+
+          tmp:=gotobjsec.mempos+gotoff-sizeof(pint);
+          if (objsym.exesymbol.dynindex>0) then
+            begin
+              WriteDynRelocEntry(tmp,R_ARM_GLOB_DAT,objsym.exesymbol.dynindex,0)
+            end
+          else if IsSharedLibrary then
+            WriteDynRelocEntry(tmp,R_ARM_RELATIVE,0,relocval);
+        end;
+    end;
+
+
+  procedure TElfExeOutputARM.WriteFirstPLTEntry;
+    begin
+      pltobjsec.WriteBytes(
+        #$04#$E0#$2D#$E5+       // str   lr, [sp, #-4]!
+        #$04#$E0#$9F#$E5+       // ldr   lr, [pc, #4]
+        #$0E#$E0#$8F#$E0+       // add   lr, pc, lr
+        #$08#$F0#$BE#$E5);      // ldr   pc, [lr, #8]!
+                                // .long _GLOBAL_OFFSET_TABLE-.
+      pltobjsec.writeReloc_internal(gotpltobjsec,0,4,RELOC_RELATIVE);
+    end;
+
+
+  procedure TElfExeOutputARM.WritePLTEntry(exesym: TExeSymbol);
+    var
+      tmp: longword;
+      sym:TObjSymbol;
+    begin
+      { TODO: it may be beneficial to postpone processing until after mempos pass,
+        and calculate instructions directly, instead of messing with complex relocations. }
+      { Group relocation to "section+offset" with REL-style is impossible, because the
+        offset has be encoded into instructions, and it is only possible for offsets
+        representable as shifter constants. Therefore we need to define a symbol
+        (and risk a name conflict, to some degree) }
+      internalobjdata.setsection(gotpltobjsec);
+      sym:=internalobjdata.SymbolDefine(exesym.name+'_ptr',AB_LOCAL,AT_DATA);
+      pltobjsec.WriteBytes(
+        #$08#$C0#$4F#$E2+      // add ip,pc,#:pc_g0_nc:sym-8
+        #$04#$C0#$4C#$E2+      // add ip,ip,#:pc_g1_nc:sym-4
+        #$00#$F0#$BC#$E5);     // ldr pc,[ip,#:pc_g2:sym]!
+
+      pltobjsec.addrawReloc(pltobjsec.size-12,sym,R_ARM_ALU_PC_G0_NC);
+      pltobjsec.addrawReloc(pltobjsec.size-8,sym,R_ARM_ALU_PC_G1_NC);
+      pltobjsec.addrawReloc(pltobjsec.size-4,sym,R_ARM_LDR_PC_G2);
+
+      { .got.plt slot initially points to the first PLT entry }
+      gotpltobjsec.writeReloc_internal(pltobjsec,0,sizeof(pint),RELOC_ABSOLUTE);
+      { write a .rel.plt entry (Elf32_rel record) }
+      pltrelocsec.writeReloc_internal(gotpltobjsec,gotpltobjsec.size-sizeof(pint),sizeof(pint),RELOC_ABSOLUTE);
+      tmp:=(exesym.dynindex shl 8) or R_ARM_JUMP_SLOT;
+      pltrelocsec.write(tmp,sizeof(tmp));
+      if ElfTarget.relocs_use_addend then
+        pltrelocsec.writezeros(sizeof(pint));
+    end;
+
+
+  procedure TElfExeOutputARM.WriteIndirectPLTEntry(exesym: TExeSymbol);
+    begin
+      inherited WriteIndirectPLTEntry(exesym);
+    end;
+
+
+  procedure TElfExeOutputARM.GOTRelocPass1(objsec:TObjSection;var idx:longint);
+    var
+      objreloc:TObjRelocation;
+      exesym:TExeSymbol;
+      objsym:TObjSymbol;
+      reltyp:byte;
+    begin
+      objreloc:=TObjRelocation(objsec.ObjRelocations[idx]);
+      if (ObjReloc.flags and rf_raw)=0 then
+        reltyp:=ElfTarget.encodereloc(ObjReloc)
+      else
+        reltyp:=ObjReloc.ftype;
+
+      case reltyp of
+        // Any call or jump can go through PLT, no x86-like segregation here.
+        R_ARM_PC24,
+        R_ARM_CALL,
+        R_ARM_JUMP24,
+        R_ARM_PREL31,
+        R_ARM_THM_CALL,
+        R_ARM_THM_JUMP24,
+        R_ARM_THM_JUMP19,
+        R_ARM_PLT32:
+          begin
+            if (objreloc.symbol=nil) or (objreloc.symbol.exesymbol=nil) then
+              exit;
+            exesym:=objreloc.symbol.exesymbol;
+            exesym.objsymbol.refs:=exesym.objsymbol.refs or symref_plt;
+          end;
+
+        R_ARM_ABS32:
+          if Assigned(ObjReloc.symbol.exesymbol) then
+            begin
+              objsym:=ObjReloc.symbol.exesymbol.ObjSymbol;
+              if (oso_executable in objsec.SecOptions) or
+                not (oso_write in objsec.SecOptions) then
+                  objsym.refs:=objsym.refs or symref_from_text;
+            end;
+      end;
+
+      case reltyp of
+        R_ARM_ABS32:
+          begin
+            if not IsSharedLibrary then
+              exit;
+            if (oso_executable in objsec.SecOptions) or
+               not (oso_write in objsec.SecOptions) then
+              hastextrelocs:=True;
+            dynrelocsec.alloc(dynrelocsec.shentsize);
+            objreloc.flags:=objreloc.flags or rf_dynamic;
+          end;
+
+        //R_ARM_GOT_ABS,
+        //R_ARM_GOT_PREL,
+        //R_ARM_GOT_BREL12,
+        R_ARM_GOT_BREL:
+          begin
+            AllocGOTSlot(objreloc.symbol);
+          end;
+
+        R_ARM_TLS_IE32:
+          AllocGOTSlot(objreloc.symbol);
+
+      end;
+    end;
+
+
+  procedure TElfExeOutputARM.DoRelocationFixup(objsec:TObjSection);
+  var
+    i,zero:longint;
+    objreloc: TObjRelocation;
+    tmp,
+    address,
+    relocval : aint;
+    relocsec : TObjSection;
+    data: TDynamicArray;
+    reltyp: byte;
+    group:longint;
+    rotation:longint;
+    residual,g_n:longword;
+    curloc: aword;
+  begin
+    data:=objsec.data;
+    for i:=0 to objsec.ObjRelocations.Count-1 do
+      begin
+        objreloc:=TObjRelocation(objsec.ObjRelocations[i]);
+        case objreloc.typ of
+          RELOC_NONE:
+            continue;
+          RELOC_ZERO:
+            begin
+              data.Seek(objreloc.dataoffset);
+              zero:=0;
+              data.Write(zero,4);
+              continue;
+            end;
+        end;
+
+        if (objreloc.flags and rf_raw)=0 then
+          reltyp:=ElfTarget.encodereloc(objreloc)
+        else
+          reltyp:=objreloc.ftype;
+
+        { TODO: TARGET1 and TARGET2 are intended to be configured via commandline }
+        if (reltyp=R_ARM_TARGET1) then
+          reltyp:=R_ARM_ABS32;             { may be ABS32 or REL32 }
+        if (reltyp=R_ARM_TARGET2) then
+          reltyp:=R_ARM_ABS32;             { may be ABS32,REL32 or GOT_PREL }
+
+        if ElfTarget.relocs_use_addend then
+          address:=objreloc.orgsize
+        else
+          begin
+            data.Seek(objreloc.dataoffset);
+            data.Read(address,4);
+          end;
+        if assigned(objreloc.symbol) then
+          begin
+            relocsec:=objreloc.symbol.objsection;
+            relocval:=objreloc.symbol.address;
+          end
+        else if assigned(objreloc.objsection) then
+          begin
+            relocsec:=objreloc.objsection;
+            relocval:=objreloc.objsection.mempos
+          end
+        else if (reltyp=R_ARM_V4BX) then
+          continue        // ignore for now
+        else
+          internalerror(2012060702);
+
+        { Only debug sections are allowed to have relocs pointing to unused sections }
+        if assigned(relocsec) and not (relocsec.used and assigned(relocsec.exesection)) and
+           not (oso_debug in objsec.secoptions) then
+          begin
+            writeln(objsec.fullname,' references ',relocsec.fullname);
+            internalerror(2012060703);
+          end;
+
+        curloc:=objsec.mempos+objreloc.dataoffset;
+        if (relocsec=nil) or (relocsec.used) then
+          case reltyp of
+
+            R_ARM_ABS32:
+              begin
+                if (objreloc.flags and rf_dynamic)<>0 then
+                  begin
+                    if (objreloc.symbol=nil) or
+                       (objreloc.symbol.exesymbol=nil) or
+                       (objreloc.symbol.exesymbol.dynindex=0) then
+                      begin
+                        address:=address+relocval;
+                        WriteDynRelocEntry(objreloc.dataoffset+objsec.mempos,R_ARM_RELATIVE,0,address);
+                      end
+                    else
+                      { Don't modify address in this case, as it serves as addend for RTLD }
+                      WriteDynRelocEntry(objreloc.dataoffset+objsec.mempos,R_ARM_ABS32,objreloc.symbol.exesymbol.dynindex,0);
+                  end
+                else
+                  address:=address+relocval;
+              end;
+
+            R_ARM_REL32:
+              begin
+                address:=address+relocval-curloc;
+              end;
+
+            R_ARM_PC24,
+            R_ARM_PLT32,
+            R_ARM_JUMP24,
+            R_ARM_CALL:
+              begin
+                { R_ARM_PC24 is deprecated in favour of R_ARM_JUMP24 and R_ARM_CALL,
+                  which allow to distinguish opcodes without examining them.
+                  Difference is:
+                  1) when target is Thumb, BL can be changed to BLX, while B has
+                  to go via thunking code.
+                  2) when target is unresolved weak symbol, CALL must be changed to NOP,
+                  while JUMP24 behavior is unspecified. }
+                tmp:=sarlongint((address and $00FFFFFF) shl 8,6);
+                tmp:=tmp+relocval-curloc;
+                // TODO: check overflow
+                address:=(address and $FF000000) or ((tmp and $3FFFFFE) shr 2);
+              end;
+
+            R_ARM_BASE_PREL:    { GOTPC }
+              address:=address+gotsymbol.address-curloc;
+
+            R_ARM_GOT_BREL:     { GOT32 }
+              begin
+                MaybeWriteGOTEntry(reltyp,relocval,objreloc.symbol);
+                address:=address+gotobjsec.mempos+objreloc.symbol.exesymbol.gotoffset-sizeof(pint)-gotsymbol.address;
+              end;
+
+            R_ARM_GOTOFF32:
+              address:=address+relocval-gotsymbol.address;
+
+            R_ARM_ALU_PC_G0_NC,
+            R_ARM_ALU_PC_G1_NC,
+            R_ARM_ALU_PC_G0,
+            R_ARM_ALU_PC_G1,
+            R_ARM_ALU_PC_G2,
+            R_ARM_ALU_SB_G0_NC,
+            R_ARM_ALU_SB_G1_NC,
+            R_ARM_ALU_SB_G0,
+            R_ARM_ALU_SB_G1,
+            R_ARM_ALU_SB_G2:
+              begin
+                group:=(relocprops[reltyp].flags and gpmask)-1;
+                if group<0 then
+                  InternalError(2012112601);
+
+                if (not ElfTarget.relocs_use_addend) then
+                  begin
+                    { initial addend must be determined by parsing the instruction }
+                    tmp:=address and $FF;
+                    rotation:=(address and $F00) shr 7;  { is in multpile of 2 bits }
+                    if rotation<>0 then
+                      tmp:=RorDword(tmp,rotation);
+                    case (address and $1E00000) of
+                      1 shl 23: ;           { ADD instruction }
+                      1 shl 22: tmp:=-tmp;  { SUB instruction }
+                    else
+                      Comment(v_error,'Group ALU relocations are permitted only for ADD or SUB instructions');
+                      continue;
+                    end;
+                  end
+                else  { TODO: must read the instruction anyway!! }
+                  tmp:=address;
+
+                if (relocprops[reltyp].flags and pc)<>0 then
+                  tmp:=tmp+relocval-curloc
+                else
+                  tmp:=tmp+relocval{-SB};  { assuming zero segment base }
+
+                g_n:=group_reloc_mask(abs(tmp),group,residual);
+                {TODO: check for overflow}
+
+                address:=address and $FF1FF000 or g_n;
+                { set opcode depending on the sign of resulting value }
+                if tmp<0 then
+                  address:=address or (1 shl 22)
+                else
+                  address:=address or (1 shl 23);
+              end;
+
+            R_ARM_LDR_PC_G0,
+            R_ARM_LDR_PC_G1,
+            R_ARM_LDR_PC_G2,
+            R_ARM_LDR_SB_G0,
+            R_ARM_LDR_SB_G1,
+            R_ARM_LDR_SB_G2:
+              begin
+                group:=(relocprops[reltyp].flags and gpmask)-1;
+                if group<0 then
+                  InternalError(2012112602);
+
+                if (not ElfTarget.relocs_use_addend) then
+                  begin
+                    tmp:=(address and $FFF);
+                    if (address and (1 shl 23))=0 then
+                      tmp:=-tmp;
+                  end
+                else   { TODO: must read the instruction anyway }
+                  tmp:=address;
+
+                if (relocprops[reltyp].flags and pc)<>0 then
+                  tmp:=tmp+relocval-curloc
+                else
+                  tmp:=tmp+relocval{-SB};  { assuming zero segment base }
+
+                group_reloc_mask(abs(tmp),group-1,residual);
+                if residual>$FFF then
+                  InternalError(2012112603);  { TODO: meaningful overflow error message }
+
+                address:=address and $FF7FF000 or residual;
+                if tmp>=0 then
+                  address:=address or (1 shl 23);
+              end;
+
+            R_ARM_LDRS_PC_G0,
+            R_ARM_LDRS_PC_G1,
+            R_ARM_LDRS_PC_G2,
+            R_ARM_LDRS_SB_G0,
+            R_ARM_LDRS_SB_G1,
+            R_ARM_LDRS_SB_G2:
+              begin
+                group:=(relocprops[reltyp].flags and gpmask)-1;
+                if group<0 then
+                  InternalError(2012112606);
+
+                if (not ElfTarget.relocs_use_addend) then
+                  begin
+                    tmp:=((address and $F00) shr 4) or (address and $F);
+                    if (address and (1 shl 23))=0 then
+                      tmp:=-tmp;
+                  end
+                else { TODO: must read the instruction anyway }
+                  tmp:=address;
+
+                if (relocprops[reltyp].flags and pc)<>0 then
+                  tmp:=tmp+relocval-curloc
+                else
+                  tmp:=tmp+relocval{-SB};  { assuming zero segment base }
+
+                group_reloc_mask(abs(tmp),group-1,residual);
+                if (residual>$FF) then
+                  InternalError(2012112607); { TODO: meaningful overflow error message }
+
+                address:=address and $FF7FF0F0 or ((residual and $F0) shl 4) or (residual and $F);
+                if tmp>=0 then
+                  address:=address or (1 shl 23);
+              end;
+
+            R_ARM_LDC_PC_G0,
+            R_ARM_LDC_PC_G1,
+            R_ARM_LDC_PC_G2,
+            R_ARM_LDC_SB_G0,
+            R_ARM_LDC_SB_G1,
+            R_ARM_LDC_SB_G2:
+              begin
+                group:=(relocprops[reltyp].flags and gpmask)-1;
+                if group<0 then
+                  InternalError(2012112604);
+
+                if (not ElfTarget.relocs_use_addend) then
+                  begin
+                    tmp:=(address and $FF) shl 2;
+                    if (address and (1 shl 23))=0 then
+                      tmp:=-tmp;
+                  end
+                else { TODO: must read the instruction anyway }
+                  tmp:=address;
+
+                if (relocprops[reltyp].flags and pc)<>0 then
+                  tmp:=tmp+relocval-curloc
+                else
+                  tmp:=tmp+relocval{-SB};  { assuming zero segment base }
+
+                group_reloc_mask(abs(tmp),group-1,residual);
+                { residual must be divisible by 4 and fit into 8 bits after having been divided }
+                if ((residual and 3)<>0) or (residual>$3FF) then
+                  InternalError(2012112605);  { TODO: meaningful overflow error message }
+
+                address:=address and $FF7FFF00 or (residual shr 2);
+                if tmp>=0 then
+                  address:=address or (1 shl 23);
+              end;
+
+            R_ARM_TLS_IE32:
+              begin
+                relocval:=relocval-tlsseg.mempos+align_aword(TCB_SIZE,tlsseg.align);
+                MaybeWriteGOTEntry(reltyp,relocval,objreloc.symbol);
+                { resolves to PC-relative offset to GOT slot }
+                relocval:=gotobjsec.mempos+objreloc.symbol.exesymbol.gotoffset-sizeof(pint);
+                address:=address+relocval-curloc;
+              end;
+
+            R_ARM_TLS_LE32:
+              if IsSharedLibrary then
+                { TODO: error message saying "recompile with -Cg" isn't correct. Or is it? }
+                ReportNonDSOReloc(reltyp,objsec,objreloc)
+              else
+                address:=relocval-tlsseg.mempos+align_aword(TCB_SIZE,tlsseg.align);
+
+          else
+            begin
+              writeln(objreloc.ftype);
+              internalerror(200604014);
+            end;
+          end
+        else           { not relocsec.Used }
+          address:=0;  { Relocation in debug section points to unused section, which is eliminated by linker }
+
+        data.Seek(objreloc.dataoffset);
+        data.Write(address,4);
+      end;
+    end;
+
+
+{*****************************************************************************
+                                    Initialize
+*****************************************************************************}
+
+  const
+    elf_target_arm: TElfTarget =
+      (
+        max_page_size:     $8000;
+        exe_image_base:    $8000;
+        machine_code:      EM_ARM;
+        relocs_use_addend: false;
+        dyn_reloc_codes: (
+          R_ARM_RELATIVE,
+          R_ARM_GLOB_DAT,
+          R_ARM_JUMP_SLOT,
+          R_ARM_COPY,
+          R_ARM_IRELATIVE
+        );
+        relocname:         @elf_arm_relocName;
+        encodereloc:       @elf_arm_encodeReloc;
+        loadreloc:         @elf_arm_loadReloc;
+        loadsection:       @elf_arm_loadSection;
+      );
+
+initialization
+  ElfTarget:=elf_target_arm;
+  ElfExeOutputClass:=TElfExeOutputARM;
+
+end.
+

+ 205 - 198
compiler/arm/cpuinfo.pas

@@ -37,9 +37,17 @@ Type
        cpu_armv4t,
        cpu_armv5,
        cpu_armv5t,
+       cpu_armv5te,
+       cpu_armv5tej,
        cpu_armv6,
+       cpu_armv6k,
+       cpu_armv6t2,
+       cpu_armv6z,
        cpu_armv7,
-       cpu_armv7m
+       cpu_armv7a,
+       cpu_armv7r,
+       cpu_armv7m,
+       cpu_armv7em
       );
 
 Const
@@ -57,7 +65,8 @@ Type
       fpu_fpa11,
       fpu_vfpv2,
       fpu_vfpv3,
-      fpu_vfpv3_d16
+      fpu_vfpv3_d16,
+      fpu_fpv4_s16
      );
 
    tcontrollertype =
@@ -81,9 +90,38 @@ Type
       ct_at91sam7xc256,
 		
       { STMicroelectronics }
-      ct_stm32f103rb,
-      ct_stm32f103re,
-      ct_stm32f103c4t,
+      ct_stm32f100x4, // LD&MD value line, 4=16,6=32,8=64,b=128
+      ct_stm32f100x6,
+      ct_stm32f100x8,
+      ct_stm32f100xB,
+      ct_stm32f100xC, // HD value line, r=512,d=384,c=256
+      ct_stm32f100xD,
+      ct_stm32f100xE,
+      ct_stm32f101x4, // LD Access line, 4=16,6=32
+      ct_stm32f101x6,
+      ct_stm32f101x8, // MD Access line, 8=64,B=128
+      ct_stm32f101xB,
+      ct_stm32f101xC, // HD Access line, C=256,D=384,E=512
+      ct_stm32f101xD,
+      ct_stm32f101xE,
+      ct_stm32f101xF, // XL Access line, F=768,G=1M
+      ct_stm32f101xG,
+      ct_stm32f102x4, // LD usb access line, 4=16,6=32
+      ct_stm32f102x6,
+      ct_stm32f102x8, // MD usb access line, 8=64,B=128
+      ct_stm32f102xB,
+      ct_stm32f103x4, // LD performance line, 4=16,6=32
+      ct_stm32f103x6,
+      ct_stm32f103x8, // MD performance line, 8=64,B=128
+      ct_stm32f103xB,
+      ct_stm32f103xC, // HD performance line, C=256,D=384,E=512
+      ct_stm32f103xD,
+      ct_stm32f103xE,
+      ct_stm32f103xF, // XL performance line, F=768,G=1M
+      ct_stm32f103xG,
+      ct_stm32f107x8, // MD and HD connectivity line, 8=64,B=128,C=256
+      ct_stm32f107xB,
+      ct_stm32f107xC,
 
       { TI - Fury Class - 64 K Flash, 16 K SRAM Devices }
       ct_lm3s1110,
@@ -198,9 +236,17 @@ Const
      'ARMV4T',
      'ARMV5',
      'ARMV5T',
+     'ARMV5TE',
+     'ARMV5TEJ',
      'ARMV6',
+     'ARMV6K',
+     'ARMV6T2',
+     'ARMV6Z',
      'ARMV7',
-     'ARMV7M'
+     'ARMV7A',
+     'ARMV7R',
+     'ARMV7M',
+     'ARMV7EM'
    );
 
    fputypestr : array[tfputype] of string[9] = ('',
@@ -211,7 +257,8 @@ Const
      'FPA11',
      'VFPV2',
      'VFPV3',
-     'VFPV3_D16'
+     'VFPV3_D16',
+     'FPV4_S16'
    );
 
 
@@ -223,7 +270,6 @@ Const
    ((
    	controllertypestr:'';
         controllerunitstr:'';
-        interruptvectors:0;
         flashbase:0;
         flashsize:0;
         srambase:0;
@@ -233,8 +279,7 @@ Const
         (
     	controllertypestr:'LPC2114';
         controllerunitstr:'LPC21x4';
-        interruptvectors:8;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$40000000;
         sramsize:$00004000
@@ -243,8 +288,7 @@ Const
         (
     	controllertypestr:'LPC2124';
         controllerunitstr:'LPC21x4';
-        interruptvectors:8;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$40000000;
         sramsize:$00004000
@@ -253,8 +297,7 @@ Const
         (
     	controllertypestr:'LPC2194';
         controllerunitstr:'LPC21x4';
-        interruptvectors:8;
-    	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$40000000;
         sramsize:$00004000
@@ -263,8 +306,7 @@ Const
         (
     	controllertypestr:'LPC1754';
         controllerunitstr:'LPC1754';
-        interruptvectors:12;
-    	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00020000;
         srambase:$10000000;
         sramsize:$00004000
@@ -273,8 +315,7 @@ Const
         (
     	controllertypestr:'LPC1756';
         controllerunitstr:'LPC1756';
-        interruptvectors:12;
-    	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$10000000;
         sramsize:$00004000
@@ -283,8 +324,7 @@ Const
         (
     	controllertypestr:'LPC1758';
         controllerunitstr:'LPC1758';
-        interruptvectors:12;
-    	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00080000;
         srambase:$10000000;
         sramsize:$00008000
@@ -293,8 +333,7 @@ Const
         (
     	controllertypestr:'LPC1764';
         controllerunitstr:'LPC1764';
-        interruptvectors:12;
-    	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00020000;
         srambase:$10000000;
         sramsize:$00004000
@@ -303,8 +342,7 @@ Const
         (
     	controllertypestr:'LPC1766';
         controllerunitstr:'LPC1766';
-        interruptvectors:12;
-    	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$10000000;
         sramsize:$00008000
@@ -313,8 +351,7 @@ Const
         (
     	controllertypestr:'LPC1768';
         controllerunitstr:'LPC1768';
-        interruptvectors:12;
-    	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00080000;
         srambase:$10000000;
         sramsize:$00008000
@@ -323,7 +360,6 @@ Const
         (
     	controllertypestr:'AT91SAM7S256';
         controllerunitstr:'AT91SAM7x256';
-        interruptvectors:8;
         flashbase:$00000000;
         flashsize:$00040000;
         srambase:$00200000;
@@ -333,7 +369,6 @@ Const
         (
     	controllertypestr:'AT91SAM7SE256';
         controllerunitstr:'AT91SAM7x256';
-        interruptvectors:8;
         flashbase:$00000000;
         flashsize:$00040000;
         srambase:$00200000;
@@ -343,7 +378,6 @@ Const
         (
     	controllertypestr:'AT91SAM7X256';
         controllerunitstr:'AT91SAM7x256';
-        interruptvectors:8;
         flashbase:$00000000;
         flashsize:$00040000;
         srambase:$00200000;
@@ -353,51 +387,52 @@ Const
         (
     	controllertypestr:'AT91SAM7XC256';
         controllerunitstr:'AT91SAM7x256';
-        interruptvectors:8;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$00200000;
         sramsize:$00010000
         ),
 
-      	// ct_stm32f103rb,
-        (
-    	controllertypestr:'STM32F103RB';
-        controllerunitstr:'STM32F103';
-        interruptvectors:12;
-        flashbase:$08000000;
-        flashsize:$00020000;
-        srambase:$20000000;
-        sramsize:$00005000
-        ),
-        // ct_stm32f103re,
-        (
-    	controllertypestr:'STM32F103RE';
-        controllerunitstr:'STM32F103';
-        interruptvectors:12;
-        flashbase:$08000000;
-        flashsize:$00080000;
-        srambase:$20000000;
-        sramsize:$00010000
-        ),
-        // ct_stm32f103re,
-        (
-    	controllertypestr:'STM32F103C4T';
-        controllerunitstr:'STM32F103';
-        interruptvectors:12;
-        flashbase:$08000000;
-        flashsize:$00004000;
-        srambase:$20000000;
-        sramsize:$00001800
-        ),
+      { STM32F1 series }
+      	(controllertypestr:'STM32F100X4';     controllerunitstr:'STM32F10X_LD';     flashbase:$08000000; flashsize:$00004000; srambase:$20000000; sramsize:$00001000),
+        (controllertypestr:'STM32F100X6';     controllerunitstr:'STM32F10X_LD';     flashbase:$08000000; flashsize:$00008000; srambase:$20000000; sramsize:$00001000),
+        (controllertypestr:'STM32F100X8';     controllerunitstr:'STM32F10X_MD';     flashbase:$08000000; flashsize:$00010000; srambase:$20000000; sramsize:$00002000),
+        (controllertypestr:'STM32F100XB';     controllerunitstr:'STM32F10X_MD';     flashbase:$08000000; flashsize:$00020000; srambase:$20000000; sramsize:$00002000),
+        (controllertypestr:'STM32F100XC';     controllerunitstr:'STM32F10X_HD';     flashbase:$08000000; flashsize:$00040000; srambase:$20000000; sramsize:$00006000),
+        (controllertypestr:'STM32F100XD';     controllerunitstr:'STM32F10X_HD';     flashbase:$08000000; flashsize:$00060000; srambase:$20000000; sramsize:$00008000),
+        (controllertypestr:'STM32F100XE';     controllerunitstr:'STM32F10X_HD';     flashbase:$08000000; flashsize:$00080000; srambase:$20000000; sramsize:$00008000),
+        (controllertypestr:'STM32F101X4';     controllerunitstr:'STM32F10X_LD';     flashbase:$08000000; flashsize:$00004000; srambase:$20000000; sramsize:$00001000),
+        (controllertypestr:'STM32F101X6';     controllerunitstr:'STM32F10X_LD';     flashbase:$08000000; flashsize:$00008000; srambase:$20000000; sramsize:$00001800),
+        (controllertypestr:'STM32F101X8';     controllerunitstr:'STM32F10X_MD';     flashbase:$08000000; flashsize:$00010000; srambase:$20000000; sramsize:$00002800),
+        (controllertypestr:'STM32F101XB';     controllerunitstr:'STM32F10X_MD';     flashbase:$08000000; flashsize:$00020000; srambase:$20000000; sramsize:$00004000),
+        (controllertypestr:'STM32F101XC';     controllerunitstr:'STM32F10X_HD';     flashbase:$08000000; flashsize:$00040000; srambase:$20000000; sramsize:$00008000),
+        (controllertypestr:'STM32F101XD';     controllerunitstr:'STM32F10X_HD';     flashbase:$08000000; flashsize:$00060000; srambase:$20000000; sramsize:$0000C000),
+        (controllertypestr:'STM32F101XE';     controllerunitstr:'STM32F10X_HD';     flashbase:$08000000; flashsize:$00080000; srambase:$20000000; sramsize:$0000C000),
+        (controllertypestr:'STM32F101XF';     controllerunitstr:'STM32F10X_XL';     flashbase:$08000000; flashsize:$000C0000; srambase:$20000000; sramsize:$00014000),
+        (controllertypestr:'STM32F101XG';     controllerunitstr:'STM32F10X_XL';     flashbase:$08000000; flashsize:$00100000; srambase:$20000000; sramsize:$00014000),
+        (controllertypestr:'STM32F102X4';     controllerunitstr:'STM32F10X_LD';     flashbase:$08000000; flashsize:$00004000; srambase:$20000000; sramsize:$00001000),
+        (controllertypestr:'STM32F102X6';     controllerunitstr:'STM32F10X_LD';     flashbase:$08000000; flashsize:$00008000; srambase:$20000000; sramsize:$00001800),
+        (controllertypestr:'STM32F102X8';     controllerunitstr:'STM32F10X_MD';     flashbase:$08000000; flashsize:$00010000; srambase:$20000000; sramsize:$00002800),
+        (controllertypestr:'STM32F102XB';     controllerunitstr:'STM32F10X_MD';     flashbase:$08000000; flashsize:$00020000; srambase:$20000000; sramsize:$00004000),
+        (controllertypestr:'STM32F103X4';     controllerunitstr:'STM32F10X_LD';     flashbase:$08000000; flashsize:$00004000; srambase:$20000000; sramsize:$00001000),
+        (controllertypestr:'STM32F103X6';     controllerunitstr:'STM32F10X_LD';     flashbase:$08000000; flashsize:$00008000; srambase:$20000000; sramsize:$00002800),
+        (controllertypestr:'STM32F103X8';     controllerunitstr:'STM32F10X_MD';     flashbase:$08000000; flashsize:$00010000; srambase:$20000000; sramsize:$00005000),
+        (controllertypestr:'STM32F103XB';     controllerunitstr:'STM32F10X_MD';     flashbase:$08000000; flashsize:$00020000; srambase:$20000000; sramsize:$00005000),
+        (controllertypestr:'STM32F103XC';     controllerunitstr:'STM32F10X_HD';     flashbase:$08000000; flashsize:$00040000; srambase:$20000000; sramsize:$0000C000),
+        (controllertypestr:'STM32F103XD';     controllerunitstr:'STM32F10X_HD';     flashbase:$08000000; flashsize:$00060000; srambase:$20000000; sramsize:$00010000),
+        (controllertypestr:'STM32F103XE';     controllerunitstr:'STM32F10X_HD';     flashbase:$08000000; flashsize:$00080000; srambase:$20000000; sramsize:$00010000),
+        (controllertypestr:'STM32F103XF';     controllerunitstr:'STM32F10X_XL';     flashbase:$08000000; flashsize:$000C0000; srambase:$20000000; sramsize:$00018000),
+        (controllertypestr:'STM32F103XG';     controllerunitstr:'STM32F10X_XL';     flashbase:$08000000; flashsize:$00100000; srambase:$20000000; sramsize:$00018000),
+        (controllertypestr:'STM32F107X8';     controllerunitstr:'STM32F10X_CONN';   flashbase:$08000000; flashsize:$00010000; srambase:$20000000; sramsize:$00010000),
+        (controllertypestr:'STM32F107XB';     controllerunitstr:'STM32F10X_CONN';   flashbase:$08000000; flashsize:$00020000; srambase:$20000000; sramsize:$00010000),
+        (controllertypestr:'STM32F107XC';     controllerunitstr:'STM32F10X_CONN';   flashbase:$08000000; flashsize:$00040000; srambase:$20000000; sramsize:$00010000),
 
       { TI - 64 K Flash, 16 K SRAM Devices }
       	// ct_lm3s1110,
         (
     	controllertypestr:'LM3S1110';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00010000;
         srambase:$20000000;
         sramsize:$00004000
@@ -406,8 +441,7 @@ Const
         (
     	controllertypestr:'LM3S1133';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00010000;
         srambase:$20000000;
         sramsize:$00004000
@@ -416,8 +450,7 @@ Const
         (
     	controllertypestr:'LM3S1138';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00010000;
         srambase:$20000000;
         sramsize:$00004000
@@ -426,8 +459,7 @@ Const
         (
     	controllertypestr:'LM3S1150';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00010000;
         srambase:$20000000;
         sramsize:$00004000
@@ -436,8 +468,7 @@ Const
         (
     	controllertypestr:'LM3S1162';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00010000;
         srambase:$20000000;
         sramsize:$00004000
@@ -446,8 +477,7 @@ Const
         (
     	controllertypestr:'LM3S1165';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00010000;
         srambase:$20000000;
         sramsize:$00004000
@@ -456,8 +486,7 @@ Const
         (
     	controllertypestr:'LM3S1166';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00010000;
         srambase:$20000000;
         sramsize:$00004000
@@ -466,8 +495,7 @@ Const
         (
     	controllertypestr:'LM3S2110';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00010000;
         srambase:$20000000;
         sramsize:$00004000
@@ -476,8 +504,7 @@ Const
         (
     	controllertypestr:'LM3S2139';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00010000;
         srambase:$20000000;
         sramsize:$00004000
@@ -486,8 +513,7 @@ Const
         (
     	controllertypestr:'LM3S6100';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00010000;
         srambase:$20000000;
         sramsize:$00004000
@@ -496,8 +522,7 @@ Const
         (
     	controllertypestr:'LM3S6110';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00010000;
         srambase:$20000000;
         sramsize:$00004000
@@ -508,8 +533,7 @@ Const
         (
     	controllertypestr:'LM3S1601';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00020000;
         srambase:$20000000;
         sramsize:$00008000
@@ -518,8 +542,7 @@ Const
         (
     	controllertypestr:'LM3S1608';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00020000;
         srambase:$20000000;
         sramsize:$00008000
@@ -528,8 +551,7 @@ Const
         (
     	controllertypestr:'LM3S1620';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00020000;
         srambase:$20000000;
         sramsize:$00008000
@@ -538,8 +560,7 @@ Const
         (
     	controllertypestr:'LM3S1635';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00020000;
         srambase:$20000000;
         sramsize:$00008000
@@ -548,8 +569,7 @@ Const
         (
     	controllertypestr:'LM3S1636';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00020000;
         srambase:$20000000;
         sramsize:$00008000
@@ -558,8 +578,7 @@ Const
         (
     	controllertypestr:'LM3S1637';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00020000;
         srambase:$20000000;
         sramsize:$00008000
@@ -568,8 +587,7 @@ Const
         (
     	controllertypestr:'LM3S1651';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00020000;
         srambase:$20000000;
         sramsize:$00008000
@@ -578,8 +596,7 @@ Const
         (
     	controllertypestr:'LM3S2601';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00020000;
         srambase:$20000000;
         sramsize:$00008000
@@ -588,8 +605,7 @@ Const
         (
     	controllertypestr:'LM3S2608';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00020000;
         srambase:$20000000;
         sramsize:$00008000
@@ -598,8 +614,7 @@ Const
         (
     	controllertypestr:'LM3S2620';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00020000;
         srambase:$20000000;
         sramsize:$00008000
@@ -608,8 +623,7 @@ Const
         (
     	controllertypestr:'LM3S2637';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00020000;
         srambase:$20000000;
         sramsize:$00008000
@@ -618,8 +632,7 @@ Const
         (
     	controllertypestr:'LM3S2651';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00020000;
         srambase:$20000000;
         sramsize:$00008000
@@ -628,8 +641,7 @@ Const
         (
     	controllertypestr:'LM3S6610';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00020000;
         srambase:$20000000;
         sramsize:$00008000
@@ -638,8 +650,7 @@ Const
         (
     	controllertypestr:'LM3S6611';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00020000;
         srambase:$20000000;
         sramsize:$00008000
@@ -648,8 +659,7 @@ Const
         (
     	controllertypestr:'LM3S6618';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00020000;
         srambase:$20000000;
         sramsize:$00008000
@@ -658,8 +668,7 @@ Const
         (
     	controllertypestr:'LM3S6633';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00020000;
         srambase:$20000000;
         sramsize:$00008000
@@ -668,8 +677,7 @@ Const
         (
     	controllertypestr:'LM3S6637';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00020000;
         srambase:$20000000;
         sramsize:$00008000
@@ -678,8 +686,7 @@ Const
         (
     	controllertypestr:'LM3S8630';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00020000;
         srambase:$20000000;
         sramsize:$00008000
@@ -690,8 +697,7 @@ Const
         (
     	controllertypestr:'LM3S1911';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -700,8 +706,7 @@ Const
         (
     	controllertypestr:'LM3S1918';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -710,8 +715,7 @@ Const
         (
     	controllertypestr:'LM3S1937';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -720,8 +724,7 @@ Const
         (
     	controllertypestr:'LM3S1958';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -730,8 +733,7 @@ Const
         (
     	controllertypestr:'LM3S1960';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -740,8 +742,7 @@ Const
         (
     	controllertypestr:'LM3S1968';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -750,8 +751,7 @@ Const
         (
     	controllertypestr:'LM3S1969';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -760,8 +760,7 @@ Const
         (
     	controllertypestr:'LM3S2911';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -770,8 +769,7 @@ Const
         (
     	controllertypestr:'LM3S2918';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -780,8 +778,7 @@ Const
         (
     	controllertypestr:'LM3S2919';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -790,8 +787,7 @@ Const
         (
     	controllertypestr:'LM3S2939';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -800,8 +796,7 @@ Const
         (
     	controllertypestr:'LM3S2948';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -810,8 +805,7 @@ Const
         (
     	controllertypestr:'LM3S2950';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -820,8 +814,7 @@ Const
         (
     	controllertypestr:'LM3S2965';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -830,8 +823,7 @@ Const
         (
     	controllertypestr:'LM3S6911';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -840,8 +832,7 @@ Const
         (
     	controllertypestr:'LM3S6918';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -850,8 +841,7 @@ Const
         (
     	controllertypestr:'LM3S6938';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -860,8 +850,7 @@ Const
         (
     	controllertypestr:'LM3S6950';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -870,8 +859,7 @@ Const
         (
     	controllertypestr:'LM3S6952';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -880,8 +868,7 @@ Const
         (
     	controllertypestr:'LM3S6965';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -890,8 +877,7 @@ Const
         (
     	controllertypestr:'LM3S8930';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -900,8 +886,7 @@ Const
         (
     	controllertypestr:'LM3S8933';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -910,8 +895,7 @@ Const
         (
     	controllertypestr:'LM3S8938';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -920,8 +904,7 @@ Const
         (
     	controllertypestr:'LM3S8962';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -930,8 +913,7 @@ Const
         (
     	controllertypestr:'LM3S8970';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -940,8 +922,7 @@ Const
         (
     	controllertypestr:'LM3S8971';
         controllerunitstr:'LM3FURY';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -952,8 +933,7 @@ Const
         (
     	controllertypestr:'LM3S5951';
         controllerunitstr:'LM3TEMPEST';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -962,8 +942,7 @@ Const
         (
     	controllertypestr:'LM3S5956';
         controllerunitstr:'LM3TEMPEST';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -972,8 +951,7 @@ Const
         (
     	controllertypestr:'LM3S1B21';
         controllerunitstr:'LM3TEMPEST';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -982,8 +960,7 @@ Const
         (
     	controllertypestr:'LM3S2B93';
         controllerunitstr:'LM3TEMPEST';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -992,8 +969,7 @@ Const
         (
     	controllertypestr:'LM3S5B91';
         controllerunitstr:'LM3TEMPEST';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -1002,8 +978,7 @@ Const
         (
     	controllertypestr:'LM3S9B81';
         controllerunitstr:'LM3TEMPEST';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -1012,8 +987,7 @@ Const
         (
     	controllertypestr:'LM3S9B90';
         controllerunitstr:'LM3TEMPEST';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -1022,8 +996,7 @@ Const
         (
     	controllertypestr:'LM3S9B92';
         controllerunitstr:'LM3TEMPEST';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -1032,8 +1005,7 @@ Const
         (
     	controllertypestr:'LM3S9B95';
         controllerunitstr:'LM3TEMPEST';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -1042,8 +1014,7 @@ Const
         (
     	controllertypestr:'LM3S9B96';
         controllerunitstr:'LM3TEMPEST';
-        interruptvectors:72;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00040000;
         srambase:$20000000;
         sramsize:$00010000
@@ -1053,8 +1024,7 @@ Const
         (
     	controllertypestr:'SC32442B';
         controllerunitstr:'sc32442b';
-        interruptvectors:7;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00000000;
         srambase:$00000000;
         sramsize:$08000000
@@ -1064,15 +1034,14 @@ Const
         (
     	controllertypestr:'THUMB2_BARE';
         controllerunitstr:'THUMB2_BARE';
-        interruptvectors:128;
-	flashbase:$00000000;
+        flashbase:$00000000;
         flashsize:$00100000;
         srambase:$20000000;
         sramsize:$00100000
         )
     );
 
-   vfp_scalar = [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16];
+   vfp_scalar = [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16,fpu_fpv4_s16];
 
    { Supported optimizations, only used for information }
    supported_optimizerswitches = genericlevel1optimizerswitches+
@@ -1085,9 +1054,47 @@ Const
 
    level1optimizerswitches = genericlevel1optimizerswitches;
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches +
-     [cs_opt_regvar,cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse {,cs_opt_scheduler}];
-   level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [{,cs_opt_loopunroll}];
+     [cs_opt_regvar,cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse];
+   level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [cs_opt_scheduler{,cs_opt_loopunroll}];
+   level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [];
+
+ type
+   tcpuflags =
+      (CPUARM_HAS_BX,         { CPU supports the BX instruction                           }
+       CPUARM_HAS_BLX,        { CPU supports the BLX rX instruction                       }
+       CPUARM_HAS_BLX_LABEL,  { CPU supports the BLX <label> instruction                  }
+       CPUARM_HAS_CLZ,        { CPU supports the CLZ instruction                          }
+       CPUARM_HAS_EDSP,       { CPU supports the PLD,STRD,LDRD,MCRR and MRRC instructions }
+       CPUARM_HAS_REV,        { CPU supports the REV instruction                          }
+       CPUARM_HAS_RBIT,       { CPU supports the RBIT instruction                         }
+       CPUARM_HAS_DMB,        { CPU has memory barrier instructions (DMB, DSB, ISB)       }
+       CPUARM_HAS_LDREX,
+       CPUARM_HAS_IDIV
+      );
+
+ const
+   cpu_capabilities : array[tcputype] of set of tcpuflags =
+     ( { cpu_none     } [],
+       { cpu_armv3    } [],
+       { cpu_armv4    } [],
+       { cpu_armv4t   } [CPUARM_HAS_BX],
+       { cpu_armv5    } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ],
+       { cpu_armv5t   } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ],
+       { cpu_armv5te  } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP],
+       { cpu_armv5tej } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP],
+       { cpu_armv6    } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_LDREX],
+       { cpu_armv6k   } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_LDREX],
+       { cpu_armv6t2  } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_RBIT,CPUARM_HAS_LDREX],
+       { cpu_armv6z   } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_LDREX],
+       { the identifier armv7 is should not be used, it is considered being equal to armv7a }
+       { cpu_armv7    } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_RBIT,CPUARM_HAS_LDREX,CPUARM_HAS_DMB],
+       { cpu_armv7a   } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_RBIT,CPUARM_HAS_LDREX,CPUARM_HAS_DMB],
+       { cpu_armv7r   } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_RBIT,CPUARM_HAS_LDREX,CPUARM_HAS_DMB],
+       { cpu_armv7m   } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_RBIT,CPUARM_HAS_LDREX,CPUARM_HAS_IDIV,CPUARM_HAS_DMB],
+       { cpu_armv7em  } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_RBIT,CPUARM_HAS_LDREX,CPUARM_HAS_IDIV,CPUARM_HAS_DMB]
+     );
 
 Implementation
 
 end.
+

+ 19 - 10
compiler/arm/cpupara.pas

@@ -28,9 +28,8 @@ unit cpupara;
 
     uses
        globtype,globals,
-       aasmtai,aasmdata,
        cpuinfo,cpubase,cgbase,cgutils,
-       symconst,symbase,symtype,symdef,parabase,paramgr;
+       symconst,symtype,symdef,parabase,paramgr;
 
     type
        tarmparamanager = class(tparamanager)
@@ -38,8 +37,8 @@ unit cpupara;
           function get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;override;
           function get_volatile_registers_mm(calloption : tproccalloption):tcpuregisterset;override;
           function push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;override;
-          function ret_in_param(def : tdef;calloption : tproccalloption) : boolean;override;
-          procedure getintparaloc(calloption : tproccalloption; nr : longint; def : tdef; var cgpara : tcgpara);override;
+          function ret_in_param(def:tdef;pd:tabstractprocdef):boolean;override;
+          procedure getintparaloc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
           function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
           function get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
@@ -53,7 +52,6 @@ unit cpupara;
 
     uses
        verbose,systems,cutils,
-       rgobj,
        defutil,symsym,symtable;
 
 
@@ -78,12 +76,14 @@ unit cpupara;
       end;
 
 
-    procedure tarmparamanager.getintparaloc(calloption : tproccalloption; nr : longint; def : tdef; var cgpara : tcgpara);
+    procedure tarmparamanager.getintparaloc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);
       var
         paraloc : pcgparalocation;
+        def : tdef;
       begin
         if nr<1 then
           internalerror(2002070801);
+        def:=tparavarsym(pd.paras[nr-1]).vardef;
         cgpara.reset;
         cgpara.size:=def_cgsize(def);
         cgpara.intsize:=tcgsize2size[cgpara.size];
@@ -124,7 +124,7 @@ unit cpupara;
                 getparaloc:=LOC_MMREGISTER
               else if (calloption in [pocall_cdecl,pocall_cppdecl,pocall_softfloat]) or
                  (cs_fp_emulation in current_settings.moduleswitches) or
-                 (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16]) then
+                 (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16,fpu_fpv4_s16]) then
                 { the ARM eabi also allows passing VFP values via VFP registers,
                   but Mac OS X doesn't seem to do that and linux only does it if
                   built with the "-mfloat-abi=hard" option }
@@ -201,12 +201,21 @@ unit cpupara;
       end;
 
 
-    function tarmparamanager.ret_in_param(def : tdef;calloption : tproccalloption) : boolean;
+    function tarmparamanager.ret_in_param(def:tdef;pd:tabstractprocdef):boolean;
       var
         i: longint;
         sym: tsym;
         fpufield: boolean;
       begin
+        { this must be system independent safecall and record constructor result
+          is always return in param }
+        if (tf_safecall_exceptions in target_info.flags) and
+           (pd.proccalloption=pocall_safecall) or
+           ((pd.proctypeoption=potype_constructor)and is_record(def)) then
+          begin
+            result:=true;
+            exit;
+          end;
         case def.typ of
           recorddef:
             begin
@@ -277,7 +286,7 @@ unit cpupara;
             else
               result:=false
           else
-            result:=inherited ret_in_param(def,calloption);
+            result:=inherited ret_in_param(def,pd);
         end;
       end;
 
@@ -608,7 +617,7 @@ unit cpupara;
               end
             else if (p.proccalloption in [pocall_softfloat]) or
                (cs_fp_emulation in current_settings.moduleswitches) or
-               (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16]) then
+               (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16,fpu_fpv4_s16]) then
               begin
                 case retcgsize of
                   OS_64,

+ 17 - 3
compiler/arm/cpupi.pas

@@ -47,9 +47,8 @@ unit cpupi;
     uses
        globals,systems,
        cpubase,
-       aasmtai,aasmdata,
        tgobj,
-       symconst,symsym,paramgr,
+       symconst,paramgr,
        cgbase,cgutils,
        cgobj;
 
@@ -63,13 +62,20 @@ unit cpupi;
           is especially a problem when taking the address of a local. For now,
           this extra memory should hurt less than generating all local contants with offsets
           >256 as non shifter constants }
+        if (po_nostackframe in procdef.procoptions) then
+          begin
+             { maxpushedparasize sghould be zero,
+               if not we will get an error later. }
+             tg.setfirsttemp(maxpushedparasize);
+             exit;
+          end;
         if tg.direction = -1 then
           begin
             if (target_info.system<>system_arm_darwin) then
               { Non-Darwin, worst case: r4-r10,r11,r13,r14,r15 is saved -> -28-16, but we
                 always adjust the frame pointer to point to the first stored
                 register (= last register in list above) -> + 4 }
-              tg.setfirsttemp(-28-16+4)
+              tg.setfirsttemp(-28-16)
             else
               { on Darwin first r4-r7,r14 are saved, then r7 is adjusted to
                 point to the saved r7, and next r8,r10,r11 gets saved -> -24
@@ -118,6 +124,14 @@ unit cpupi;
                 if r in regs then
                   inc(floatsavesize,8);
             end;
+          fpu_fpv4_s16:
+            begin
+              floatsavesize:=0;
+              regs:=cg.rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall);
+              for r:=RS_D0 to RS_D15 do
+                if r in regs then
+                  inc(floatsavesize,8);
+            end;
         end;
         floatsavesize:=align(floatsavesize,max(current_settings.alignment.localalignmin,4));
         result:=Align(tg.direction*tg.lasttemp,max(current_settings.alignment.localalignmin,4))+maxpushedparasize+aint(floatsavesize);

+ 2 - 0
compiler/arm/cputarg.pas

@@ -72,6 +72,8 @@ implementation
     {$endif}
 
       ,ogcoff
+      ,ogelf
+      ,cpuelf
 
 {**************************************
         Assembler Readers

+ 2 - 2
compiler/arm/itcpugas.pas

@@ -43,10 +43,10 @@ interface
 implementation
 
     uses
-      cutils,verbose;
+      rgbase;
 
     const
-      gas_regname_table : array[tregisterindex] of string[7] = (
+      gas_regname_table : TRegNameTable = (
         {$i rarmstd.inc}
       );
 

+ 174 - 26
compiler/arm/narmadd.pas

@@ -35,6 +35,7 @@ interface
        public
           function pass_1 : tnode;override;
        protected
+          function first_addfloat: tnode; override;
           procedure second_addfloat;override;
           procedure second_cmpfloat;override;
           procedure second_cmpordinal;override;
@@ -45,16 +46,13 @@ interface
   implementation
 
     uses
-      globtype,systems,
-      cutils,verbose,globals,
-      constexp,
-      symconst,symdef,paramgr,
-      aasmbase,aasmtai,aasmdata,aasmcpu,defutil,htypechk,
-      cgbase,cgutils,cgcpu,
-      cpuinfo,pass_1,pass_2,regvars,procinfo,
-      cpupara,
-      ncon,nset,nadd,
-      ncgutil,tgobj,rgobj,rgcpu,cgobj,cg64f32,
+      globtype,verbose,globals,
+      constexp,symdef,symtable,symtype,
+      aasmbase,aasmdata,aasmcpu,defutil,htypechk,
+      cgbase,cgutils,
+      cpuinfo,pass_1,procinfo,
+      ncon,nadd,ncnv,ncal,nmat,
+      ncgutil,cgobj,
       hlcgobj
       ;
 
@@ -212,6 +210,36 @@ interface
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,
                  location.register,left.location.register,right.location.register));
             end;
+          fpu_fpv4_s16:
+            begin
+              { force mmreg as location, left right doesn't matter
+                as both will be in a fpureg }
+              location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
+              location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,true);
+
+              location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
+              if left.location.loc<>LOC_CMMREGISTER then
+                location.register:=left.location.register
+              else if right.location.loc<>LOC_CMMREGISTER then
+                location.register:=right.location.register
+              else
+                location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+
+              case nodetype of
+                addn :
+                  op:=A_VADD;
+                muln :
+                  op:=A_VMUL;
+                subn :
+                  op:=A_VSUB;
+                slashn :
+                  op:=A_VDIV;
+                else
+                  internalerror(2009111401);
+              end;
+
+              current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(op, location.register,left.location.register,right.location.register), PF_F32));
+            end;
           fpu_soft:
             { this case should be handled already by pass1 }
             internalerror(200308252);
@@ -242,6 +270,7 @@ interface
               location_force_fpureg(current_asmdata.CurrAsmList,left.location,true);
               location_force_fpureg(current_asmdata.CurrAsmList,right.location,true);
 
+              cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
               if nodetype in [equaln,unequaln] then
                 current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_CMF,
                    left.location.register,right.location.register),
@@ -269,8 +298,24 @@ interface
                 op:=A_FCMPED;
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,
                 left.location.register,right.location.register));
+              cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
               current_asmdata.CurrAsmList.concat(taicpu.op_none(A_FMSTAT));
             end;
+          fpu_fpv4_s16:
+            begin
+              location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
+              location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,true);
+
+              if nodetype in [equaln,unequaln] then
+                op:=A_VCMP
+              else
+                op:=A_VCMPE;
+
+              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,
+                left.location.register,right.location.register));
+              cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
+              current_asmdata.CurrAsmList.Concat(taicpu.op_reg_reg(A_VMRS, NR_APSR_nzcv, NR_FPSCR));
+            end;
           fpu_soft:
             { this case should be handled already by pass1 }
             internalerror(2009112404);
@@ -284,36 +329,54 @@ interface
     procedure tarmaddnode.second_cmpsmallset;
       var
         tmpreg : tregister;
+        b: byte;
       begin
         pass_left_right;
 
         location_reset(location,LOC_FLAGS,OS_NO);
 
+        if (not(nf_swapped in flags) and
+            (nodetype = lten)) or
+           ((nf_swapped in flags) and
+            (nodetype = gten)) then
+          swapleftright;
+
+        (* Try to keep right as a constant *)
+        if (right.location.loc <> LOC_CONSTANT) or
+          not(is_shifter_const(right.location.value, b)) then
+          hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
         hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
-        hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
 
         case nodetype of
-          equaln:
-            begin
-              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,left.location.register,right.location.register));
-              location.resflags:=F_EQ;
-            end;
+          equaln,
           unequaln:
             begin
-              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,left.location.register,right.location.register));
-              location.resflags:=F_NE;
+              cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
+              if right.location.loc = LOC_CONSTANT then
+                current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,left.location.register,right.location.value))
+              else
+                current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,left.location.register,right.location.register));
+              if nodetype = equaln then
+                location.resflags:=F_EQ
+              else
+                location.resflags:=F_NE;
             end;
           lten,
           gten:
             begin
-              if (not(nf_swapped in flags) and
-                  (nodetype = lten)) or
-                 ((nf_swapped in flags) and
-                  (nodetype = gten)) then
-                swapleftright;
               tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
-              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_AND,tmpreg,left.location.register,right.location.register));
-              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,tmpreg,right.location.register));
+              if right.location.loc = LOC_CONSTANT then
+                begin
+                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_const(A_AND,tmpreg,left.location.register,right.location.value));
+                  cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
+                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,tmpreg,right.location.value));
+                end
+              else
+                begin
+                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_AND,tmpreg,left.location.register,right.location.register));
+                  cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
+                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,tmpreg,right.location.register));
+                end;
               location.resflags:=F_EQ;
             end;
           else
@@ -342,6 +405,7 @@ interface
             if not(right.location.loc in [LOC_CREGISTER,LOC_REGISTER]) then
               hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
             dummyreg:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
+            cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
             current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ORR,dummyreg,right.location.register64.reglo,right.location.register64.reghi),PF_S));
           end
         else if (nodetype in [equaln,unequaln]) and
@@ -352,6 +416,7 @@ interface
             if not(left.location.loc in [LOC_CREGISTER,LOC_REGISTER]) then
               hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
             dummyreg:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
+            cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
             current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg_reg(A_ORR,dummyreg,left.location.register64.reglo,left.location.register64.reghi),PF_S));
           end
         else
@@ -364,6 +429,7 @@ interface
               begin
                 location_reset(location,LOC_FLAGS,OS_NO);
                 location.resflags:=getresflags(unsigned);
+                cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                 current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,left.location.register64.reghi,right.location.register64.reghi));
                 if current_settings.cputype in cpu_thumb2 then
                   begin
@@ -379,6 +445,7 @@ interface
             { operation requiring proper N, Z and V flags ? }
               begin
                 location_reset(location,LOC_JUMP,OS_NO);
+                cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                 current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,left.location.register64.reghi,right.location.register64.reghi));
                 { the jump the sequence is a little bit hairy }
                 case nodetype of
@@ -388,6 +455,7 @@ interface
                         { cheat a little bit for the negative test }
                         toggleflag(nf_swapped);
                         cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(false),current_procinfo.CurrFalseLabel);
+                        cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                         toggleflag(nf_swapped);
                      end;
                    lten,gten:
@@ -404,14 +472,17 @@ interface
                         else
                           nodetype:=ltn;
                         cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(unsigned),current_procinfo.CurrFalseLabel);
+                        cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                         nodetype:=oldnodetype;
                      end;
                 end;
+                cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                 current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,left.location.register64.reglo,right.location.register64.reglo));
                 { the comparisaion of the low dword have to be
                    always unsigned!                            }
                 cg.a_jmp_flags(current_asmdata.CurrAsmList,getresflags(true),current_procinfo.CurrTrueLabel);
                 cg.a_jmp_always(current_asmdata.CurrAsmList,current_procinfo.CurrFalseLabel);
+                cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
               end;
           end;
       end;
@@ -436,6 +507,83 @@ interface
           end;
       end;
 
+    function tarmaddnode.first_addfloat: tnode;
+      var
+        procname: string[31];
+        { do we need to reverse the result ? }
+        notnode : boolean;
+        fdef : tdef;
+      begin
+        result := nil;
+        notnode := false;
+
+        if current_settings.fputype = fpu_fpv4_s16 then
+          begin
+            case tfloatdef(left.resultdef).floattype of
+              s32real:
+                begin
+                  result:=nil;
+                  notnode:=false;
+                end;
+              s64real:
+                begin
+                  fdef:=search_system_type('FLOAT64').typedef;
+                  procname:='float64';
+
+                  case nodetype of
+                    addn:
+                      procname:=procname+'_add';
+                    muln:
+                      procname:=procname+'_mul';
+                    subn:
+                      procname:=procname+'_sub';
+                    slashn:
+                      procname:=procname+'_div';
+                    ltn:
+                      procname:=procname+'_lt';
+                    lten:
+                      procname:=procname+'_le';
+                    gtn:
+                      begin
+                        procname:=procname+'_le';
+                        notnode:=true;
+                      end;
+                    gten:
+                      begin
+                        procname:=procname+'_lt';
+                        notnode:=true;
+                      end;
+                    equaln:
+                      procname:=procname+'_eq';
+                    unequaln:
+                      begin
+                        procname:=procname+'_eq';
+                        notnode:=true;
+                      end;
+                    else
+                      CGMessage3(type_e_operator_not_supported_for_types,node2opstr(nodetype),left.resultdef.typename,right.resultdef.typename);
+                  end;
+
+                  if nodetype in [ltn,lten,gtn,gten,equaln,unequaln] then
+                    resultdef:=pasbool8type;
+                  result:=ctypeconvnode.create_internal(ccallnode.createintern(procname,ccallparanode.create(
+                      ctypeconvnode.create_internal(right,fdef),
+                      ccallparanode.create(
+                        ctypeconvnode.create_internal(left,fdef),nil))),resultdef);
+
+                  left:=nil;
+                  right:=nil;
+
+                  { do we need to reverse the result }
+                  if notnode then
+                    result:=cnotnode.create(result);
+                end;
+            end;
+          end
+        else
+          result:=inherited first_addfloat;
+      end;
+
 
     procedure tarmaddnode.second_cmpordinal;
       var
@@ -448,7 +596,7 @@ interface
 
         unsigned:=not(is_signed(left.resultdef)) or
                   not(is_signed(right.resultdef));
-
+        cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
         if right.location.loc = LOC_CONSTANT then
           begin
              if is_shifter_const(right.location.value,b) then

+ 3 - 5
compiler/arm/narmcal.pas

@@ -26,7 +26,7 @@ unit narmcal;
 interface
 
     uses
-      symdef,node,ncal,ncgcal;
+      symdef,ncal,ncgcal;
 
     type
        tarmcallnode = class(tcgcallnode)
@@ -38,10 +38,8 @@ implementation
   uses
     verbose,globtype,globals,aasmdata,
     symconst,
-    cgbase,
-    cpubase,cpuinfo,
+    cgbase,cpuinfo,
     ncgutil,
-    paramgr,
     systems;
 
   procedure tarmcallnode.set_result_location(realresdef: tstoreddef);
@@ -49,7 +47,7 @@ implementation
       if (realresdef.typ=floatdef) and 
          (target_info.abi <> abi_eabihf) and
          ((cs_fp_emulation in current_settings.moduleswitches) or
-          (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16])) then
+          (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16,fpu_fpv4_s16])) then
         begin
           { keep the fpu values in integer registers for now, the code
             generator will move them to memory or an mmregister when necessary

+ 71 - 9
compiler/arm/narmcnv.pas

@@ -26,12 +26,13 @@ unit narmcnv;
 interface
 
     uses
-      node,ncnv,ncgcnv,defcmp;
+      node,ncnv,ncgcnv;
 
     type
        tarmtypeconvnode = class(tcgtypeconvnode)
          protected
            function first_int_to_real: tnode;override;
+           function first_real_to_real: tnode; override;
          { procedure second_int_to_int;override; }
          { procedure second_string_to_string;override; }
          { procedure second_cstring_to_pchar;override; }
@@ -57,15 +58,12 @@ interface
 implementation
 
    uses
-      verbose,globtype,globals,systems,
-      symconst,symdef,aasmbase,aasmtai,aasmdata,
+      verbose,globtype,globals,symdef,aasmbase,aasmtai,aasmdata,symtable,
       defutil,
       cgbase,cgutils,
-      pass_1,pass_2,procinfo,
-      ncon,ncal,
+      pass_1,pass_2,procinfo,ncal,
       ncgutil,
-      cpubase,cpuinfo,aasmcpu,
-      rgobj,tgobj,cgobj,hlcgobj,cgcpu;
+      cpubase,cpuinfo,aasmcpu,cgobj,hlcgobj,cgcpu;
 
 
 {*****************************************************************************
@@ -76,7 +74,8 @@ implementation
       var
         fname: string[19];
       begin
-        if cs_fp_emulation in current_settings.moduleswitches then
+        if (cs_fp_emulation in current_settings.moduleswitches) or
+          (current_settings.fputype=fpu_fpv4_s16) then
           result:=inherited first_int_to_real
         else
           begin
@@ -117,7 +116,8 @@ implementation
                 expectloc:=LOC_FPUREGISTER;
               fpu_vfpv2,
               fpu_vfpv3,
-              fpu_vfpv3_d16:
+              fpu_vfpv3_d16,
+              fpu_fpv4_s16:
                 expectloc:=LOC_MMREGISTER;
               else
                 internalerror(2009112702);
@@ -125,6 +125,48 @@ implementation
           end;
       end;
 
+    function tarmtypeconvnode.first_real_to_real: tnode;
+      begin
+        if (current_settings.fputype=fpu_fpv4_s16) then
+          begin
+            case tfloatdef(left.resultdef).floattype of
+              s32real:
+                case tfloatdef(resultdef).floattype of
+                  s64real:
+                    result:=ctypeconvnode.create_explicit(ccallnode.createintern('float32_to_float64',ccallparanode.create(
+                      ctypeconvnode.create_internal(left,search_system_type('FLOAT32REC').typedef),nil)),resultdef);
+                  s32real:
+                    begin
+                      result:=left;
+                      left:=nil;
+                    end;
+                  else
+                    internalerror(200610151);
+                end;
+              s64real:
+                case tfloatdef(resultdef).floattype of
+                  s32real:
+                    result:=ctypeconvnode.create_explicit(ccallnode.createintern('float64_to_float32',ccallparanode.create(
+                      ctypeconvnode.create_internal(left,search_system_type('FLOAT64').typedef),nil)),resultdef);
+                  s64real:
+                    begin
+                      result:=left;
+                      left:=nil;
+                    end;
+                  else
+                    internalerror(200610152);
+                end;
+              else
+                internalerror(200610153);
+            end;
+            left:=nil;
+            firstpass(result);
+            exit;
+          end
+        else
+          Result := inherited first_real_to_real;
+      end;
+
 
     procedure tarmtypeconvnode.second_int_to_real;
       const
@@ -168,8 +210,10 @@ implementation
                         current_asmdata.getjumplabel(l2);
                         reference_reset_symbol(href,l1,0,const_align(8));
 
+                        cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                         current_asmdata.CurrAsmList.concat(Taicpu.op_reg_const(A_CMP,left.location.register,0));
                         cg.a_jmp_flags(current_asmdata.CurrAsmList,F_GE,l2);
+                        cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
 
                         hregister:=cg.getfpuregister(current_asmdata.CurrAsmList,OS_F64);
                         new_section(current_asmdata.asmlists[al_typedconsts],sec_rodata_norel,l1.name,const_align(8));
@@ -187,6 +231,7 @@ implementation
                           begin
                             hregister:=location.register;
                             location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
+                            cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                             current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_MVF,location.register,hregister),PF_S));
                           end;
                       end;
@@ -211,6 +256,22 @@ implementation
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(
                 signedprec2vfpop[signed,location.size],location.register,left.location.register));
             end;
+          fpu_fpv4_s16:
+            begin
+              location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
+              signed:=left.location.size=OS_S32;
+              location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
+              if (left.location.size<>OS_F32) then
+                internalerror(2009112703);
+              if left.location.size<>location.size then
+                location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size)
+              else
+                location.register:=left.location.register;
+              if signed then
+                current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,location.register,left.location.register), PF_F32S32))
+              else
+                current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,location.register,left.location.register), PF_F32U32));
+            end;
         end;
       end;
 
@@ -316,6 +377,7 @@ implementation
          location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
          hreg1:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
          cg.g_flags2reg(current_asmdata.CurrAsmList,location.size,resflags,hreg1);
+         cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
          if (is_cbool(resultdef)) then
            cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NEG,location.size,hreg1,hreg1);
 

+ 2 - 3
compiler/arm/narmcon.pas

@@ -26,7 +26,7 @@ unit narmcon;
 interface
 
     uses
-      node,ncgcon,cpubase;
+      ncgcon,cpubase;
 
     type
       tarmrealconstnode = class(tcgrealconstnode)
@@ -39,8 +39,7 @@ interface
       verbose,
       globtype,globals,
       cpuinfo,
-      aasmbase,aasmtai,aasmdata,
-      symconst,symdef,
+      aasmbase,aasmtai,aasmdata,symdef,
       defutil,
       cgbase,cgutils,
       procinfo,

+ 36 - 14
compiler/arm/narminl.pas

@@ -58,16 +58,10 @@ interface
 implementation
 
     uses
-      globtype,systems,
-      cutils,verbose,globals,fmodule,
-      cpuinfo, defutil,
-      symconst,symdef,
-      aasmbase,aasmtai,aasmdata,aasmcpu,
-      cgbase,cgutils,
-      pass_1,pass_2,
-      cpubase,paramgr,
-      nbas,ncon,ncal,ncnv,nld,
-      tgobj,ncgutil,cgobj,cg64f32,rgobj,rgcpu,cgcpu, hlcgobj;
+      globtype,verbose,globals,
+      cpuinfo, defutil,symdef,aasmdata,aasmcpu,
+      cgbase,cgutils,pass_2,
+      cpubase,ncgutil,cgobj,cgcpu, hlcgobj;
 
 {*****************************************************************************
                               tarminlinenode
@@ -91,7 +85,8 @@ implementation
             end;
           fpu_vfpv2,
           fpu_vfpv3,
-          fpu_vfpv3_d16:
+          fpu_vfpv3_d16,
+          fpu_fpv4_s16:
             begin
               location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
               location_copy(location,left.location);
@@ -123,6 +118,13 @@ implementation
               fpu_vfpv3,
               fpu_vfpv3_d16:
                 expectloc:=LOC_MMREGISTER;
+              fpu_fpv4_s16:
+                begin
+                  if tfloatdef(left.resultdef).floattype=s32real then
+                    expectloc:=LOC_MMREGISTER
+                  else
+                    exit(inherited first_abs_real);
+                end;
               else
                 internalerror(2009112401);
             end;
@@ -146,6 +148,13 @@ implementation
               fpu_vfpv3,
               fpu_vfpv3_d16:
                 expectloc:=LOC_MMREGISTER;
+              fpu_fpv4_s16:
+                begin
+                  if tfloatdef(left.resultdef).floattype=s32real then
+                    expectloc:=LOC_MMREGISTER
+                  else
+                    exit(inherited first_sqr_real);
+                end;
               else
                 internalerror(2009112402);
             end;
@@ -169,6 +178,13 @@ implementation
               fpu_vfpv3,
               fpu_vfpv3_d16:
                 expectloc:=LOC_MMREGISTER;
+              fpu_fpv4_s16:
+                begin
+                  if tfloatdef(left.resultdef).floattype=s32real then
+                    expectloc:=LOC_MMREGISTER
+                  else
+                    exit(inherited first_sqrt_real);
+                end;
               else
                 internalerror(2009112403);
             end;
@@ -227,6 +243,8 @@ implementation
                 op:=A_FABSD;
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,location.register,left.location.register));
             end;
+          fpu_fpv4_s16:
+            current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg(A_VABS,location.register,left.location.register), PF_F32));
         else
           internalerror(2009111402);
         end;
@@ -254,6 +272,8 @@ implementation
                 op:=A_FMULD;
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,location.register,left.location.register,left.location.register));
             end;
+          fpu_fpv4_s16:
+            current_asmdata.CurrAsmList.Concat(setoppostfix(taicpu.op_reg_reg_reg(A_VMUL,location.register,left.location.register,left.location.register), PF_F32));
         else
           internalerror(2009111403);
         end;
@@ -281,6 +301,8 @@ implementation
                 op:=A_FSQRTD;
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,location.register,left.location.register));
             end;
+          fpu_fpv4_s16:
+            current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VSQRT,location.register,left.location.register));
         else
           internalerror(2009111402);
         end;
@@ -320,7 +342,7 @@ implementation
         ref : treference;
         r : tregister;
       begin
-        if current_settings.cputype>=cpu_armv5 then
+        if CPUARM_HAS_EDSP in cpu_capabilities[current_settings.cputype] then
           begin
             secondpass(left);
             case left.location.loc of
@@ -341,18 +363,18 @@ implementation
 
     procedure tarminlinenode.second_abs_long;
       var
-        hregister : tregister;
         opsize : tcgsize;
         hp : taicpu;
       begin
         secondpass(left);
         opsize:=def_cgsize(left.resultdef);
         hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
-        hregister:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
         location:=left.location;
         location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
+        cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
         current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_MOV,location.register,left.location.register), PF_S));
         current_asmdata.CurrAsmList.concat(setcondition(taicpu.op_reg_reg_const(A_RSB,location.register,location.register, 0), C_MI));
+        cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
       end;
 
 begin

+ 72 - 4
compiler/arm/narmmat.pas

@@ -39,6 +39,7 @@ interface
       end;
 
       tarmunaryminusnode = class(tcgunaryminusnode)
+        function pass_1: tnode; override;
         procedure second_float;override;
       end;
 
@@ -50,15 +51,16 @@ interface
 implementation
 
     uses
-      globtype,systems,
+      globtype,
       cutils,verbose,globals,constexp,
       aasmbase,aasmcpu,aasmtai,aasmdata,
       defutil,
+      symtype,symconst,symtable,
       cgbase,cgobj,hlcgobj,cgutils,
       pass_2,procinfo,
-      ncon,
+      ncon,ncnv,ncal,
       cpubase,cpuinfo,
-      ncgutil,cgcpu,
+      ncgutil,
       nadd,pass_1,symdef;
 
 {*****************************************************************************
@@ -121,6 +123,9 @@ implementation
            else if (tordconstnode(right).value = int64(-1)) then
              begin
                // note: only in the signed case possible..., may overflow
+               if cs_check_overflow in current_settings.localswitches then
+                 cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
+
                current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_MVN,
                  resultreg,numerator),toppostfix(ord(cs_check_overflow in current_settings.localswitches)*ord(PF_S))));
              end
@@ -285,6 +290,10 @@ implementation
             current_procinfo.CurrTrueLabel:=current_procinfo.CurrFalseLabel;
             current_procinfo.CurrFalseLabel:=hl;
             secondpass(left);
+
+            if left.location.loc<>LOC_JUMP then
+              internalerror(2012081305);
+
             maketojumpbool(current_asmdata.CurrAsmList,left,lr_load_regvars);
             hl:=current_procinfo.CurrTrueLabel;
             current_procinfo.CurrTrueLabel:=current_procinfo.CurrFalseLabel;
@@ -304,10 +313,11 @@ implementation
               LOC_SUBSETREG,LOC_CSUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF :
                 begin
                   hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
+                  cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                   current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,left.location.register,0));
                   location_reset(location,LOC_FLAGS,OS_NO);
                   location.resflags:=F_EQ;
-               end;
+                end;
               else
                 internalerror(2003042401);
             end;
@@ -318,6 +328,46 @@ implementation
                                TARMUNARYMINUSNODE
 *****************************************************************************}
 
+    function tarmunaryminusnode.pass_1: tnode;
+      var
+        procname: string[31];
+        fdef : tdef;
+      begin
+        if (current_settings.fputype<>fpu_fpv4_s16) or
+          (tfloatdef(resultdef).floattype=s32real) then
+          exit(inherited pass_1);
+
+        result:=nil;
+        firstpass(left);
+        if codegenerror then
+          exit;
+
+        if (left.resultdef.typ=floatdef) then
+          begin
+            case tfloatdef(resultdef).floattype of
+              s64real:
+                begin
+                  procname:='float64_sub';
+                  fdef:=search_system_type('FLOAT64').typedef;
+                end;
+              else
+                internalerror(2005082801);
+            end;
+            result:=ctypeconvnode.create_internal(ccallnode.createintern(procname,ccallparanode.create(
+              ctypeconvnode.create_internal(left,fDef),
+              ccallparanode.create(ctypeconvnode.create_internal(crealconstnode.create(0,resultdef),fdef),nil))),resultdef);
+
+            left:=nil;
+          end
+        else
+          begin
+            if (left.resultdef.typ=floatdef) then
+              expectloc:=LOC_FPUREGISTER
+             else if (left.resultdef.typ=orddef) then
+               expectloc:=LOC_REGISTER;
+          end;
+      end;
+
     procedure tarmunaryminusnode.second_float;
       var
         op: tasmop;
@@ -349,6 +399,15 @@ implementation
               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,
                 location.register,left.location.register));
             end;
+          fpu_fpv4_s16:
+            begin
+              location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
+              location:=left.location;
+              if (left.location.loc=LOC_CMMREGISTER) then
+                location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+              current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VNEG,
+                location.register,left.location.register), PF_F32));
+            end
           else
             internalerror(2009112602);
         end;
@@ -403,6 +462,9 @@ implementation
         begin
           shifterop_reset(so);
           shiftval2:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
+
+          cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
+
           {Do we shift more than 32 bits?}
           emit_instr(setoppostfix(taicpu.op_reg_reg_const(A_RSB, shiftval2, shiftval, 32), PF_S));
 
@@ -419,6 +481,8 @@ implementation
           so.rs:=shiftval2;
           emit_instr(setcondition(taicpu.op_reg_reg_reg_shifterop(A_ORR, reg2, reg2, reg1, so), C_PL));
 
+          cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
+
           {Final adjustments}
           if shiftright then so.shiftmode:=SM_LSR else so.shiftmode:=SM_LSL;
           so.rs:=shiftval;
@@ -450,16 +514,20 @@ implementation
               if nodetype=shln then
                 begin
                   {Shift left by one by 2 simple 32bit additions}
+                  cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                   emit_instr(setoppostfix(taicpu.op_reg_reg_reg(A_ADD, hreg64lo, hreg64lo, hreg64lo), PF_S));
                   emit_instr(taicpu.op_reg_reg_reg(A_ADC, hreg64hi, hreg64hi, hreg64hi));
+                  cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                 end
               else
                 begin
                   {Shift right by first shifting hi by one and then using RRX (rotate right extended), which rotates through the carry}
                   shifterop_reset(so); so.shiftmode:=SM_LSR; so.shiftimm:=1;
+                  cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                   emit_instr(setoppostfix(taicpu.op_reg_reg_shifterop(A_MOV, hreg64hi, hreg64hi, so), PF_S));
                   so.shiftmode:=SM_RRX; so.shiftimm:=0; {RRX does NOT have a shift amount}
                   emit_instr(taicpu.op_reg_reg_shifterop(A_MOV, hreg64lo, hreg64lo, so));
+                  cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                 end
             {A 32bit shift just replaces a register and clears the other}
             else if v = 32 then

+ 2 - 8
compiler/arm/narmmem.pas

@@ -27,8 +27,7 @@ interface
 
     uses
       globtype,
-      cgbase,cpuinfo,cpubase,
-      node,nmem,ncgmem;
+      cgbase,cpubase,nmem,ncgmem;
 
     type
       tarmvecnode = class(tcgvecnode)
@@ -38,12 +37,7 @@ interface
 implementation
 
     uses
-      systems,
-      cutils,verbose,
-      symdef,paramgr,
-      aasmtai,aasmdata,aasmcpu,
-      nld,ncon,nadd,
-      cgutils,cgobj;
+      cutils,verbose,aasmdata,aasmcpu,cgobj;
 
 {*****************************************************************************
                              TARMVECNODE

+ 70 - 8
compiler/arm/narmset.pas

@@ -27,9 +27,19 @@ interface
 
     uses
       globtype,
+      symtype,
+      cgbase,
       node,nset,pass_1,ncgset;
 
     type
+
+       { tarminnode }
+
+       tarminnode = class(tcginnode)
+         function pass_1: tnode; override;
+         procedure in_smallset(uopsize: tcgsize; opdef: tdef; setbase: aint); override;
+       end;
+
       tarmcasenode = class(tcgcasenode)
          procedure optimizevalues(var max_linear_list:aint;var max_dist:aword);override;
          function  has_jumptable : boolean;override;
@@ -41,15 +51,66 @@ interface
 implementation
 
     uses
-      systems,
-      verbose,globals,constexp,
-      symconst,symdef,defutil,
+      globals,constexp,defutil,
       aasmbase,aasmtai,aasmdata,aasmcpu,
-      cgbase,pass_2,
-      ncon,
-      cpubase,cpuinfo,procinfo,
+      cpubase,cpuinfo,
       cgutils,cgobj,ncgutil,
-      cgcpu;
+      cgcpu,hlcgobj;
+
+{*****************************************************************************
+                            TARMINNODE
+*****************************************************************************}
+
+    function tarminnode.pass_1: tnode;
+      var
+        setparts: Tsetparts;
+        numparts: byte;
+        use_small: boolean;
+      begin
+        result:=inherited pass_1;
+
+        if not(assigned(result)) then
+          begin
+            if not(checkgenjumps(setparts,numparts,use_small)) and
+              use_small then
+              expectloc:=LOC_FLAGS;
+          end;
+      end;
+
+    procedure tarminnode.in_smallset(uopsize: tcgsize; opdef: tdef; setbase: aint);
+      var
+        so : tshifterop;
+        hregister : tregister;
+      begin
+        location_reset(location,LOC_FLAGS,OS_NO);
+        location.resflags:=F_NE;
+        if left.location.loc=LOC_CONSTANT then
+          begin
+            hlcg.location_force_reg(current_asmdata.CurrAsmList, right.location,
+              right.resultdef, right.resultdef, true);
+
+            cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
+            current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_TST,right.location.register,1 shl (left.location.value-setbase)));
+          end
+        else
+          begin
+            hlcg.location_force_reg(current_asmdata.CurrAsmList, left.location,
+             left.resultdef, opdef, true);
+            register_maybe_adjust_setbase(current_asmdata.CurrAsmList, left.location,
+             setbase);
+            hlcg.location_force_reg(current_asmdata.CurrAsmList, right.location,
+             right.resultdef, right.resultdef, true);
+
+            hregister:=cg.getintregister(current_asmdata.CurrAsmList, uopsize);
+            current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_MOV,hregister,1));
+
+            shifterop_reset(so);
+            so.rs:=left.location.register;
+            so.shiftmode:=SM_LSL;
+            cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
+            current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_shifterop(A_TST,right.location.register,hregister,so));
+          end;
+      end;
 
 
 {*****************************************************************************
@@ -264,5 +325,6 @@ implementation
         end;
 
 begin
-   ccasenode:=tarmcasenode;
+  cinnode:=tarminnode;
+  ccasenode:=tarmcasenode;
 end.

+ 116 - 12
compiler/arm/raarmgas.pas

@@ -35,13 +35,16 @@ Unit raarmgas;
         actwideformat : boolean;
         function is_asmopcode(const s: string):boolean;override;
         function is_register(const s:string):boolean;override;
+        function is_targetdirective(const s: string): boolean; override;
         procedure handleopcode;override;
         procedure BuildReference(oper : tarmoperand);
         procedure BuildOperand(oper : tarmoperand);
+        procedure BuildSpecialreg(oper : tarmoperand);
         function TryBuildShifterOp(oper : tarmoperand) : boolean;
         procedure BuildOpCode(instr : tarminstruction);
         procedure ReadSym(oper : tarmoperand);
         procedure ConvertCalljmp(instr : tarminstruction);
+        procedure HandleTargetDirective; override;
       end;
 
 
@@ -51,19 +54,13 @@ Unit raarmgas;
       { helpers }
       cutils,
       { global }
-      globtype,globals,verbose,
-      systems,
-      { aasm }
-      cpuinfo,aasmbase,aasmtai,aasmdata,aasmcpu,
+      globtype,verbose,
+      systems,aasmbase,aasmtai,aasmdata,aasmcpu,
       { symtable }
-      symconst,symbase,symtype,symsym,symtable,
-      { parser }
-      scanner,
+      symconst,symsym,
       procinfo,
-      itcpugas,
       rabase,rautils,
-      cgbase,cgutils,cgobj
-      ;
+      cgbase,cgutils;
 
 
     function tarmattreader.is_register(const s:string):boolean;
@@ -118,6 +115,16 @@ Unit raarmgas;
           end;
       end;
 
+    function tarmattreader.is_targetdirective(const s: string): boolean;
+      begin
+        if s = '.thumb_func' then
+          result:=true
+        else if s='.thumb_set' then
+          result:=true
+        else
+          Result:=inherited is_targetdirective(s);
+      end;
+
 
     procedure tarmattreader.ReadSym(oper : tarmoperand);
       var
@@ -925,6 +932,13 @@ Unit raarmgas;
               oper.opr.regtype:=regtype;
               oper.opr.subreg:=subreg;
               oper.opr.regset:=registerset;
+              if actasmtoken=AS_XOR then
+                begin
+                  consume(AS_XOR);
+                  oper.opr.usermode:=true;
+                end
+              else
+                oper.opr.usermode:=false;
               if (registerset=[]) then
                 Message(asmr_e_empty_regset);
             end;
@@ -939,6 +953,68 @@ Unit raarmgas;
         end; { end case }
       end;
 
+    procedure tarmattreader.BuildSpecialreg(oper: tarmoperand);
+      var
+        hs, reg : String;
+        ch : char;
+        i, t : longint;
+        hreg : tregister;
+        flags : tspecialregflags;
+      begin
+        case actasmtoken of
+          AS_REGISTER:
+            begin
+              oper.opr.typ:=OPR_REGISTER;
+              oper.opr.reg:=actasmregister;
+              Consume(AS_REGISTER);
+            end;
+          AS_ID:
+            begin
+              t := pos('_', actasmpattern);
+              if t > 0 then
+                begin
+                  hs:=lower(actasmpattern);
+                  reg:=copy(hs, 1, t-1);
+                  delete(hs, 1, t);
+
+                  if length(hs) < 1 then
+                    Message(asmr_e_invalid_operand_type);
+
+                  if reg = 'cpsr' then
+                    hreg:=NR_CPSR
+                  else if reg='spsr' then
+                    hreg:=NR_SPSR
+                  else
+                    Message(asmr_e_invalid_register);
+
+                  flags:=[];
+                  for i := 1 to length(hs) do
+                    begin
+                      ch:=hs[i];
+                      if ch='c' then
+                        include(flags, srC)
+                      else if ch='x' then
+                        include(flags, srX)
+                      else if ch='f' then
+                        include(flags, srF)
+                      else if ch='s' then
+                        include(flags, srS)
+                      else
+                        message(asmr_e_invalid_operand_type);
+                    end;
+
+                  oper.opr.typ:=OPR_SPECIALREG;
+                  oper.opr.specialreg:=hreg;
+                  oper.opr.specialregflags:=flags;
+
+                  consume(AS_ID);
+                end
+              else
+                Message(asmr_e_invalid_operand_type); // Otherwise it would have been seen as a AS_REGISTER
+            end;
+        end;
+      end;
+
 
 {*****************************************************************************
                                 tarmattreader
@@ -979,7 +1055,7 @@ Unit raarmgas;
             AS_COMMA: { Operand delimiter }
               Begin
                 if ((instr.opcode in [A_MOV, A_MVN, A_CMP, A_CMN, A_TST, A_TEQ]) and (operandnum=2)) or
-                  ((operandnum=3) and not(instr.opcode in [A_UMLAL,A_UMULL,A_SMLAL,A_SMULL,A_MLA])) then
+                  ((operandnum=3) and not(instr.opcode in [A_UMLAL,A_UMULL,A_SMLAL,A_SMULL,A_MLA,A_MRC,A_MCR,A_MCRR,A_MRRC])) then
                   begin
                     Consume(AS_COMMA);
                     if not(TryBuildShifterOp(instr.Operands[operandnum+1] as tarmoperand)) then
@@ -1001,7 +1077,10 @@ Unit raarmgas;
                 break;
               end;
           else
-            BuildOperand(instr.Operands[operandnum] as tarmoperand);
+            if (instr.opcode = A_MSR) and (operandnum = 1) then
+              BuildSpecialreg(instr.Operands[operandnum] as tarmoperand)
+            else
+              BuildOperand(instr.Operands[operandnum] as tarmoperand);
           end; { end case }
         until false;
         instr.Ops:=operandnum;
@@ -1132,6 +1211,31 @@ Unit raarmgas;
           end;
       end;
 
+    procedure tarmattreader.HandleTargetDirective;
+      var
+        symname,
+        symval  : String;
+        val     : aint;
+        symtyp  : TAsmsymtype;
+      begin
+        if actasmpattern='.thumb_set' then
+          begin
+            consume(AS_TARGET_DIRECTIVE);
+            BuildConstSymbolExpression(true,false,false, val,symname,symtyp);
+            Consume(AS_COMMA);
+            BuildConstSymbolExpression(true,false,false, val,symval,symtyp);
+
+            curList.concat(tai_thumb_set.create(symname,symval));
+          end
+        else if actasmpattern='.thumb_func' then
+          begin
+            consume(AS_TARGET_DIRECTIVE);
+            curList.concat(tai_thumb_func.create);
+          end
+        else
+          inherited HandleTargetDirective;
+      end;
+
 
     procedure tarmattreader.handleopcode;
       var

+ 34 - 1
compiler/arm/rarmcon.inc

@@ -88,5 +88,38 @@ NR_D28 = tregister($0407001C);
 NR_D29 = tregister($0407001D);
 NR_D30 = tregister($0407001E);
 NR_D31 = tregister($0407001F);
-NR_CPSR_C = tregister($05000000);
+NR_CPSR = tregister($05000000);
 NR_FPSCR = tregister($05000001);
+NR_SPSR = tregister($05000002);
+NR_APSR_nzcv = tregister($05000003);
+NR_CR0 = tregister($05000004);
+NR_CR1 = tregister($05000005);
+NR_CR2 = tregister($05000006);
+NR_CR3 = tregister($05000007);
+NR_CR4 = tregister($05000008);
+NR_CR5 = tregister($05000009);
+NR_CR6 = tregister($0500000A);
+NR_CR7 = tregister($0500000B);
+NR_CR8 = tregister($0500000C);
+NR_CR9 = tregister($0500000D);
+NR_CR10 = tregister($0500000E);
+NR_CR11 = tregister($0500000F);
+NR_CR12 = tregister($05000010);
+NR_CR13 = tregister($05000011);
+NR_CR14 = tregister($05000012);
+NR_CR15 = tregister($05000013);
+NR_p15 = tregister($05000014);
+NR_APSR = tregister($05000015);
+NR_IPSR = tregister($05000016);
+NR_EPSR = tregister($05000017);
+NR_IEPSR = tregister($05000018);
+NR_IAPSR = tregister($05000019);
+NR_EAPSR = tregister($0500001A);
+NR_PSR = tregister($0500001B);
+NR_MSP = tregister($0500001C);
+NR_PSP = tregister($0500001D);
+NR_PRIMASK = tregister($0500001E);
+NR_BASEPRI = tregister($0500001F);
+NR_BASEPRI_MAX = tregister($05000020);
+NR_FAULTMASK = tregister($05000021);
+NR_CONTROL = tregister($05000022);

+ 33 - 0
compiler/arm/rarmdwa.inc

@@ -89,4 +89,37 @@
 0,
 0,
 0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
 0

+ 1 - 1
compiler/arm/rarmnor.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from armreg.dat }
-91
+124

+ 34 - 1
compiler/arm/rarmnum.inc

@@ -89,4 +89,37 @@ tregister($0407001D),
 tregister($0407001E),
 tregister($0407001F),
 tregister($05000000),
-tregister($05000001)
+tregister($05000001),
+tregister($05000002),
+tregister($05000003),
+tregister($05000004),
+tregister($05000005),
+tregister($05000006),
+tregister($05000007),
+tregister($05000008),
+tregister($05000009),
+tregister($0500000A),
+tregister($0500000B),
+tregister($0500000C),
+tregister($0500000D),
+tregister($0500000E),
+tregister($0500000F),
+tregister($05000010),
+tregister($05000011),
+tregister($05000012),
+tregister($05000013),
+tregister($05000014),
+tregister($05000015),
+tregister($05000016),
+tregister($05000017),
+tregister($05000018),
+tregister($05000019),
+tregister($0500001A),
+tregister($0500001B),
+tregister($0500001C),
+tregister($0500001D),
+tregister($0500001E),
+tregister($0500001F),
+tregister($05000020),
+tregister($05000021),
+tregister($05000022)

+ 34 - 1
compiler/arm/rarmrni.inc

@@ -89,4 +89,37 @@
 87,
 88,
 89,
-90
+90,
+91,
+92,
+93,
+94,
+95,
+96,
+97,
+98,
+99,
+100,
+101,
+102,
+103,
+104,
+105,
+106,
+107,
+108,
+109,
+110,
+111,
+112,
+113,
+114,
+115,
+116,
+117,
+118,
+119,
+120,
+121,
+122,
+123

+ 34 - 1
compiler/arm/rarmsri.inc

@@ -1,6 +1,27 @@
 { don't edit, this file is generated from armreg.dat }
 0,
+110,
+92,
+120,
+121,
+123,
 89,
+93,
+94,
+103,
+104,
+105,
+106,
+107,
+108,
+95,
+96,
+97,
+98,
+99,
+100,
+101,
+102,
 27,
 30,
 57,
@@ -33,6 +54,8 @@
 48,
 51,
 54,
+115,
+112,
 17,
 18,
 19,
@@ -41,7 +64,16 @@
 22,
 23,
 24,
+122,
 90,
+114,
+113,
+111,
+117,
+109,
+119,
+118,
+116,
 1,
 2,
 11,
@@ -89,4 +121,5 @@
 34,
 35,
 37,
-38
+38,
+91

+ 33 - 0
compiler/arm/rarmsta.inc

@@ -89,4 +89,37 @@
 0,
 0,
 0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
+0,
 0

+ 35 - 2
compiler/arm/rarmstd.inc

@@ -88,5 +88,38 @@
 'd29',
 'd30',
 'd31',
-'cpsr_c',
-'fpscr'
+'cpsr',
+'fpscr',
+'spsr',
+'apsr_nzcv',
+'cr0',
+'cr1',
+'cr2',
+'cr3',
+'cr4',
+'cr5',
+'cr6',
+'cr7',
+'cr8',
+'cr9',
+'cr10',
+'cr11',
+'cr12',
+'cr13',
+'cr14',
+'cr15',
+'p15',
+'apsr',
+'ipsr',
+'epsr',
+'iepsr',
+'iapsr',
+'eapsr',
+'psr',
+'msp',
+'psp',
+'primask',
+'basepri',
+'basepri_max',
+'faultmask',
+'control'

+ 34 - 1
compiler/arm/rarmsup.inc

@@ -88,5 +88,38 @@ RS_D28 = $1C;
 RS_D29 = $1D;
 RS_D30 = $1E;
 RS_D31 = $1F;
-RS_CPSR_C = $00;
+RS_CPSR = $00;
 RS_FPSCR = $01;
+RS_SPSR = $02;
+RS_APSR_nzcv = $03;
+RS_CR0 = $04;
+RS_CR1 = $05;
+RS_CR2 = $06;
+RS_CR3 = $07;
+RS_CR4 = $08;
+RS_CR5 = $09;
+RS_CR6 = $0A;
+RS_CR7 = $0B;
+RS_CR8 = $0C;
+RS_CR9 = $0D;
+RS_CR10 = $0E;
+RS_CR11 = $0F;
+RS_CR12 = $10;
+RS_CR13 = $11;
+RS_CR14 = $12;
+RS_CR15 = $13;
+RS_p15 = $14;
+RS_APSR = $15;
+RS_IPSR = $16;
+RS_EPSR = $17;
+RS_IEPSR = $18;
+RS_IAPSR = $19;
+RS_EAPSR = $1A;
+RS_PSR = $1B;
+RS_MSP = $1C;
+RS_PSP = $1D;
+RS_PRIMASK = $1E;
+RS_BASEPRI = $1F;
+RS_BASEPRI_MAX = $20;
+RS_FAULTMASK = $21;
+RS_CONTROL = $22;

+ 101 - 1
compiler/arm/rgcpu.pas

@@ -31,6 +31,9 @@ unit rgcpu;
        aasmbase,aasmtai,aasmdata,aasmcpu,
        cgbase,cgutils,
        cpubase,
+       {$ifdef DEBUG_SPILLING}
+       cutils,
+       {$endif}
        rgobj;
 
      type
@@ -45,6 +48,9 @@ unit rgcpu;
        end;
 
        trgcputhumb2 = class(trgobj)
+       private
+         procedure SplitITBlock(list:TAsmList;pos:tai);
+       public
          procedure do_spill_read(list:TAsmList;pos:tai;const spilltemp:treference;tempreg:tregister);override;
          procedure do_spill_written(list:TAsmList;pos:tai;const spilltemp:treference;tempreg:tregister);override;
        end;
@@ -60,17 +66,24 @@ unit rgcpu;
   implementation
 
     uses
-      verbose, cutils,globtype,globals,cpuinfo,
+      verbose,globtype,globals,cpuinfo,
       cgobj,
       procinfo;
 
     procedure trgintcputhumb2.add_cpu_interferences(p: tai);
       var
         r : tregister;
+        hr : longint;
       begin
         if p.typ=ait_instruction then
           begin
             case taicpu(p).opcode of
+              A_CBNZ,
+              A_CBZ:
+                begin
+                  for hr := RS_R8 to RS_R15 do
+                    add_edge(getsupreg(taicpu(p).oper[0]^.reg), hr);
+                end;
               A_ADD:
                 begin
                   if taicpu(p).ops = 3 then
@@ -245,6 +258,69 @@ unit rgcpu;
           result:=getsubreg(r);
       end;
 
+    function GetITRemainderOp(originalOp:TAsmOp;remLevels:longint;var newOp: TAsmOp;var NeedsCondSwap:boolean) : TAsmOp;
+      const
+        remOps : array[1..3] of array[A_ITE..A_ITTTT] of TAsmOp = (
+          (A_IT,A_IT,       A_IT,A_IT,A_IT,A_IT,            A_IT,A_IT,A_IT,A_IT,A_IT,A_IT,A_IT,A_IT),
+          (A_NONE,A_NONE,   A_ITT,A_ITE,A_ITE,A_ITT,        A_ITT,A_ITT,A_ITE,A_ITE,A_ITE,A_ITE,A_ITT,A_ITT),
+          (A_NONE,A_NONE,   A_NONE,A_NONE,A_NONE,A_NONE,    A_ITTT,A_ITEE,A_ITET,A_ITTE,A_ITTE,A_ITET,A_ITEE,A_ITTT));
+        newOps : array[1..3] of array[A_ITE..A_ITTTT] of TAsmOp = (
+          (A_IT,A_IT,       A_ITE,A_ITT,A_ITE,A_ITT,        A_ITEE,A_ITTE,A_ITET,A_ITTT,A_ITEE,A_ITTE,A_ITET,A_ITTT),
+          (A_NONE,A_NONE,   A_IT,A_IT,A_IT,A_IT,            A_ITE,A_ITT,A_ITE,A_ITT,A_ITE,A_ITT,A_ITE,A_ITT),
+          (A_NONE,A_NONE,   A_NONE,A_NONE,A_NONE,A_NONE,    A_IT,A_IT,A_IT,A_IT,A_IT,A_IT,A_IT,A_IT));
+        needsSwap: array[1..3] of array[A_ITE..A_ITTTT] of Boolean = (
+          (true ,false,     true ,true ,false,false,        true ,true ,true ,true ,false,false,false,false),
+          (false,false,     true ,false,true ,false,        true ,true ,false,false,true ,true ,false,false),
+          (false,false,     false,false,false,false,        true ,false,true ,false,true ,false,true ,false));
+      begin
+        result:=remOps[remLevels][originalOp];
+        newOp:=newOps[remLevels][originalOp];
+        NeedsCondSwap:=needsSwap[remLevels][originalOp];
+      end;
+
+    procedure trgcputhumb2.SplitITBlock(list: TAsmList; pos: tai);
+      var
+        hp : tai;
+        level,itLevel : LongInt;
+        remOp,newOp : TAsmOp;
+        needsSwap : boolean;
+      begin
+        hp:=pos;
+        level := 0;
+        while assigned(hp) do
+          begin
+            if IsIT(taicpu(hp).opcode) then
+              break
+            else if hp.typ=ait_instruction then
+              inc(level);
+
+            hp:=tai(hp.Previous);
+          end;
+
+        if not assigned(hp) then
+          internalerror(2012100801); // We are supposed to have found the ITxxx instruction here
+
+        if (hp.typ<>ait_instruction) or
+          (not IsIT(taicpu(hp).opcode)) then
+          internalerror(2012100802); // Sanity check
+
+        itLevel := GetITLevels(taicpu(hp).opcode);
+        if level=itLevel then
+          exit; // pos was the last instruction in the IT block anyway
+
+        remOp:=GetITRemainderOp(taicpu(hp).opcode,itLevel-level,newOp,needsSwap);
+
+        if (remOp=A_NONE) or
+          (newOp=A_NONE) then
+          Internalerror(2012100803);
+
+        taicpu(hp).opcode:=newOp;
+
+        if needsSwap then
+          list.InsertAfter(taicpu.op_cond(remOp,inverse_cond(taicpu(hp).oper[0]^.cc)), pos)
+        else
+          list.InsertAfter(taicpu.op_cond(remOp,taicpu(hp).oper[0]^.cc), pos);
+      end;
 
     procedure trgcputhumb2.do_spill_read(list:TAsmList;pos:tai;const spilltemp:treference;tempreg:tregister);
       var
@@ -267,6 +343,18 @@ unit rgcpu;
           (taicpu(pos).oper[1]^.reg=NR_PC) then
           pos:=tai(pos.previous);
 
+        if (pos.typ=ait_instruction) and
+          (taicpu(pos).condition<>C_None) and
+          (taicpu(pos).opcode<>A_B) then
+          SplitITBlock(list, pos)
+        else if (pos.typ=ait_instruction) and
+          IsIT(taicpu(pos).opcode) then
+          begin
+            if not assigned(pos.Previous) then
+              list.InsertBefore(tai_comment.Create('Dummy'), pos);
+            pos:=tai(pos.Previous);
+          end;
+
         if (spilltemp.offset>4095) or (spilltemp.offset<-255) then
           begin
             helplist:=TAsmList.create;
@@ -313,6 +401,18 @@ unit rgcpu;
         l : tasmlabel;
         hreg : tregister;
       begin
+        if (pos.typ=ait_instruction) and
+          (taicpu(pos).condition<>C_None) and
+          (taicpu(pos).opcode<>A_B) then
+          SplitITBlock(list, pos)
+        else if (pos.typ=ait_instruction) and
+          IsIT(taicpu(pos).opcode) then
+          begin
+            if not assigned(pos.Previous) then
+              list.InsertBefore(tai_comment.Create('Dummy'), pos);
+            pos:=tai(pos.Previous);
+          end;
+
         if (spilltemp.offset>4095) or (spilltemp.offset<-255) then
           begin
             helplist:=TAsmList.create;

+ 6 - 2
compiler/asmutils.pas

@@ -60,7 +60,10 @@ uses
         current_asmdata.getdatalabel(result.lab);
         result.ofs:=0;
         if NewSection then
-          new_section(list,sec_rodata,result.lab.name,const_align(sizeof(pint)));
+          begin
+            maybe_new_object_file(list);
+            new_section(list,sec_rodata_norel,result.lab.name,const_align(sizeof(pint)));
+          end;
         { put label before header on Darwin, because there the linker considers
           a global symbol to be the start of a new subsection }
         if target_info.system in systems_darwin then
@@ -101,7 +104,8 @@ uses
       begin
         current_asmdata.getdatalabel(result.lab);
         result.ofs:=0;
-        new_section(list,sec_rodata,result.lab.name,const_align(sizeof(pint)));
+        maybe_new_object_file(list);
+        new_section(list,sec_rodata_norel,result.lab.name,const_align(sizeof(pint)));
         strlength := getlengthwidestring(pcompilerwidestring(data));
         if Winlike then
           begin

+ 33 - 11
compiler/assemble.pas

@@ -66,7 +66,7 @@ interface
       }
       TExternalAssembler=class(TAssembler)
       private
-        procedure CreateSmartLinkPath(const s:string);
+        procedure CreateSmartLinkPath(const s:TPathStr);
       protected
       {outfile}
         AsmSize,
@@ -291,7 +291,7 @@ Implementation
       end;
 
 
-    procedure TExternalAssembler.CreateSmartLinkPath(const s:string);
+    procedure TExternalAssembler.CreateSmartLinkPath(const s:TPathStr);
 
         procedure DeleteFilesWithExt(const AExt:string);
         var
@@ -307,7 +307,7 @@ Implementation
         end;
 
       var
-        hs  : string;
+        hs  : TPathStr;
       begin
         if PathExists(s,false) then
          begin
@@ -580,10 +580,29 @@ Implementation
       begin
         result:=target_asm.asmcmd;
 {$ifdef m68k}
-        if current_settings.cputype = cpu_MC68020 then
-          result:='-m68020 '+result
+        { TODO: use a better approach for this }
+        if (target_info.system=system_m68k_amiga) then
+          begin
+            { m68k-amiga has old binutils, which doesn't support -march=* }
+            case current_settings.cputype of
+              cpu_MC68000:
+                result:='-m68000 '+result;
+              cpu_MC68020:
+                result:='-m68020 '+result;
+              { additionally, AmigaOS doesn't work on Coldfire }
+            end;
+          end
         else
-          result:='-m68000 '+result;
+          begin
+            case current_settings.cputype of
+              cpu_MC68000:
+                result:='-march=68000 '+result;
+              cpu_MC68020:
+                result:='-march=68020 '+result;
+              cpu_Coldfire:
+                result:='-march=cfv4e '+result;
+            end;
+          end;
 {$endif}
 {$ifdef arm}
         if (target_info.system=system_arm_darwin) then
@@ -605,13 +624,13 @@ Implementation
            Replace(result,'$OBJ',maybequoted(ObjFileName));
          end;
          if (cs_create_pic in current_settings.moduleswitches) then
-		   Replace(result,'$PIC','-KPIC')
+           Replace(result,'$PIC','-KPIC')
          else
-		   Replace(result,'$PIC','');
+           Replace(result,'$PIC','');
          if (cs_asm_source in current_settings.globalswitches) then
-		   Replace(result,'$NOWARN','')
-		 else
-		   Replace(result,'$NOWARN','-W');
+           Replace(result,'$NOWARN','')
+         else
+           Replace(result,'$NOWARN','-W');
       end;
 
 
@@ -1427,6 +1446,9 @@ Implementation
                    aitconst_64bit,
                    aitconst_32bit,
                    aitconst_16bit,
+                   aitconst_64bit_unaligned,
+                   aitconst_32bit_unaligned,
+                   aitconst_16bit_unaligned,
                    aitconst_8bit :
                      begin
                        if assigned(tai_const(hp).sym) and

+ 1 - 1
compiler/avr/agavrgas.pas

@@ -205,7 +205,7 @@ unit agavrgas;
             asmbin : 'as';
             asmcmd : '-o $OBJ $ASM';
             supported_targets : [system_avr_embedded];
-            flags : [af_allowdirect,af_needar,af_smartlink_sections];
+            flags : [af_needar,af_smartlink_sections];
             labelprefix : '.L';
             comment : '# ';
             dollarsign: 's';

+ 3 - 0
compiler/avr/avrreg.dat

@@ -39,3 +39,6 @@ R29,$01,$1d,r29,29,29
 R30,$01,$1e,r30,30,30
 R31,$01,$1f,r31,31,31
 
+SREG,$05,$00,sreg,0,0
+
+

+ 166 - 99
compiler/avr/cgcpu.pas

@@ -56,7 +56,7 @@ unit cgcpu;
         procedure a_call_ref(list : TAsmList;ref: treference);override;
 
         procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister); override;
-        procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
+        procedure a_op_reg_reg(list: TAsmList; Op: TOpCG; size: TCGSize; src, dst : TRegister); override;
 
         { move instructions }
         procedure a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);override;
@@ -107,6 +107,9 @@ unit cgcpu;
         function GetStore(const ref: treference): tasmop;
 
         procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: TCGSize; src, dst: TRegister); override;
+      protected
+        procedure a_op_reg_reg_internal(list: TAsmList; Op: TOpCG; size: TCGSize; src, srchi, dst, dsthi: TRegister);
+        procedure a_op_const_reg_internal(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg, reghi: TRegister);
       end;
 
       tcg64favr = class(tcg64f32)
@@ -125,7 +128,7 @@ unit cgcpu;
     uses
        globals,verbose,systems,cutils,
        fmodule,
-       symconst,symsym,
+       symconst,symsym,symtable,
        tgobj,rgobj,
        procinfo,cpupi,
        paramgr;
@@ -332,60 +335,22 @@ unit cgcpu;
 
 
      procedure tcgavr.a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister);
-       var
-         mask : qword;
-         shift : byte;
-         i : byte;
-         tmpreg : tregister;
        begin
-         mask:=$ff;
-         shift:=0;
-         case op of
-           OP_OR:
-             begin
-               for i:=1 to tcgsize2size[size] do
-                 begin
-                   list.concat(taicpu.op_reg_const(A_ORI,reg,(a and mask) shr shift));
-                   reg:=GetNextReg(reg);
-                   mask:=mask shl 8;
-                   inc(shift,8);
-                 end;
-             end;
-           OP_AND:
-             begin
-               for i:=1 to tcgsize2size[size] do
-                 begin
-                   list.concat(taicpu.op_reg_const(A_ANDI,reg,(a and mask) shr shift));
-                   reg:=GetNextReg(reg);
-                   mask:=mask shl 8;
-                   inc(shift,8);
-                 end;
-             end;
-           OP_SUB:
-             begin
-               list.concat(taicpu.op_reg_const(A_SUBI,reg,a));
-               if size in [OS_S16,OS_16,OS_S32,OS_32,OS_S64,OS_64] then
-                 begin
-                   for i:=2 to tcgsize2size[size] do
-                     begin
-                       reg:=GetNextReg(reg);
-                       mask:=mask shl 8;
-                       inc(shift,8);
-                       list.concat(taicpu.op_reg_const(A_SBCI,reg,(a and mask) shr shift));
-                     end;
-                 end;
-             end;
-         else
-           begin
-             tmpreg:=getintregister(list,size);
-             a_load_const_reg(list,size,a,tmpreg);
-             a_op_reg_reg(list,op,size,tmpreg,reg);
-           end;
-         end;
+         if not(size in [OS_S8,OS_8,OS_S16,OS_16,OS_S32,OS_32]) then
+           internalerror(2012102403);
+         a_op_const_reg_internal(list,Op,size,a,reg,NR_NO);
+       end;
+
+
+     procedure tcgavr.a_op_reg_reg(list: TAsmList; Op: TOpCG; size: TCGSize; src, dst : TRegister);
+       begin
+         if not(size in [OS_S8,OS_8,OS_S16,OS_16,OS_S32,OS_32]) then
+           internalerror(2012102401);
+         a_op_reg_reg_internal(list,Op,size,src,NR_NO,dst,NR_NO);
        end;
 
 
-     procedure tcgavr.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister);
+     procedure tcgavr.a_op_reg_reg_internal(list : TAsmList; Op: TOpCG; size: TCGSize; src, srchi, dst, dsthi: TRegister);
        var
          countreg,
          tmpreg: tregister;
@@ -393,6 +358,31 @@ unit cgcpu;
          instr : taicpu;
          paraloc1,paraloc2,paraloc3 : TCGPara;
          l1,l2 : tasmlabel;
+         pd : tprocdef;
+
+       procedure NextSrcDst;
+         begin
+           if i=5 then
+             begin
+               dst:=dsthi;
+               src:=srchi;
+             end
+           else
+             begin
+               dst:=GetNextReg(dst);
+               src:=GetNextReg(src);
+             end;
+         end;
+
+       { iterates TmpReg through all registers of dst }
+       procedure NextTmp;
+         begin
+           if i=5 then
+             tmpreg:=dsthi
+           else
+             tmpreg:=GetNextReg(tmpreg);
+         end;
+
       begin
          case op of
            OP_ADD:
@@ -402,12 +392,10 @@ unit cgcpu;
                  begin
                    for i:=2 to tcgsize2size[size] do
                      begin
-                       dst:=GetNextReg(dst);
-                       src:=GetNextReg(src);
+                       NextSrcDst;
                        list.concat(taicpu.op_reg_reg(A_ADC,dst,src));
-                   end;
-                 end
-               else
+                     end;
+                 end;
              end;
 
            OP_SUB:
@@ -417,8 +405,7 @@ unit cgcpu;
                  begin
                    for i:=2 to tcgsize2size[size] do
                      begin
-                       dst:=GetNextReg(dst);
-                       src:=GetNextReg(src);
+                       NextSrcDst;
                        list.concat(taicpu.op_reg_reg(A_SBC,dst,src));
                      end;
                  end;
@@ -435,18 +422,16 @@ unit cgcpu;
                    for i:=2 to tcgsize2size[size] do
                      begin
                        list.concat(taicpu.op_reg(A_COM,tmpreg));
-                       tmpreg:=GetNextReg(tmpreg);
+                       NextTmp;
                      end;
                    list.concat(taicpu.op_reg(A_NEG,dst));
                    tmpreg:=GetNextReg(dst);
                    for i:=2 to tcgsize2size[size] do
                      begin
                        list.concat(taicpu.op_reg_const(A_SBCI,dst,-1));
-                       tmpreg:=GetNextReg(tmpreg);
+                       NextTmp;
                    end;
-                 end
-               else
-                 list.concat(taicpu.op_reg(A_NEG,dst));
+                 end;
              end;
 
            OP_NOT:
@@ -456,8 +441,7 @@ unit cgcpu;
                    if src<>dst then
                      a_load_reg_reg(list,OS_8,OS_8,src,dst);
                    list.concat(taicpu.op_reg(A_COM,dst));
-                   src:=GetNextReg(src);
-                   dst:=GetNextReg(dst);
+                   NextSrcDst;
                  end;
              end;
 
@@ -467,12 +451,13 @@ unit cgcpu;
                  list.concat(taicpu.op_reg_reg(topcg2asmop[op],dst,src))
                else if size=OS_16 then
                  begin
+                   pd:=search_system_proc('fpc_mul_word');
                    paraloc1.init;
                    paraloc2.init;
                    paraloc3.init;
-                   paramanager.getintparaloc(pocall_default,1,u16inttype,paraloc1);
-                   paramanager.getintparaloc(pocall_default,2,u16inttype,paraloc2);
-                   paramanager.getintparaloc(pocall_default,3,pasbool8type,paraloc3);
+                   paramanager.getintparaloc(pd,1,paraloc1);
+                   paramanager.getintparaloc(pd,2,paraloc2);
+                   paramanager.getintparaloc(pd,3,paraloc3);
                    a_load_const_cgpara(list,OS_8,0,paraloc3);
                    a_load_reg_cgpara(list,OS_16,src,paraloc2);
                    a_load_reg_cgpara(list,OS_16,dst,paraloc1);
@@ -508,11 +493,11 @@ unit cgcpu;
                cg.a_label(list,l1);
                case op of
                  OP_SHR:
-                   list.concat(taicpu.op_reg(A_LSR,GetOffsetReg(dst,tcgsize2size[size]-1)));
+                   list.concat(taicpu.op_reg(A_LSR,GetOffsetReg64(dst,dsthi,tcgsize2size[size]-1)));
                  OP_SHL:
                    list.concat(taicpu.op_reg(A_LSL,dst));
                  OP_SAR:
-                   list.concat(taicpu.op_reg(A_ASR,GetOffsetReg(dst,tcgsize2size[size]-1)));
+                   list.concat(taicpu.op_reg(A_ASR,GetOffsetReg64(dst,dsthi,tcgsize2size[size]-1)));
                  OP_ROR:
                    begin
                      { load carry? }
@@ -522,7 +507,7 @@ unit cgcpu;
                          list.concat(taicpu.op_reg_const(A_SBRC,src,0));
                          list.concat(taicpu.op_none(A_SEC));
                        end;
-                     list.concat(taicpu.op_reg(A_ROR,GetOffsetReg(dst,tcgsize2size[size]-1)));
+                     list.concat(taicpu.op_reg(A_ROR,GetOffsetReg64(dst,dsthi,tcgsize2size[size]-1)));
                    end;
                  OP_ROL:
                    begin
@@ -530,7 +515,7 @@ unit cgcpu;
                      if not(size in [OS_8,OS_S8]) then
                        begin
                          list.concat(taicpu.op_none(A_CLC));
-                         list.concat(taicpu.op_reg_const(A_SBRC,GetOffsetReg(dst,tcgsize2size[size]-1),7));
+                         list.concat(taicpu.op_reg_const(A_SBRC,GetOffsetReg64(dst,dsthi,tcgsize2size[size]-1),7));
                          list.concat(taicpu.op_none(A_SEC));
                        end;
                      list.concat(taicpu.op_reg(A_ROL,dst))
@@ -545,12 +530,12 @@ unit cgcpu;
                        case op of
                          OP_ROR,
                          OP_SHR:
-                           list.concat(taicpu.op_reg(A_ROR,GetOffsetReg(dst,tcgsize2size[size]-i)));
+                           list.concat(taicpu.op_reg(A_ROR,GetOffsetReg64(dst,dsthi,tcgsize2size[size]-i)));
                          OP_ROL,
                          OP_SHL:
-                           list.concat(taicpu.op_reg(A_ROL,GetOffsetReg(dst,i-1)));
+                           list.concat(taicpu.op_reg(A_ROL,GetOffsetReg64(dst,dsthi,i-1)));
                          OP_SAR:
-                           list.concat(taicpu.op_reg(A_ROR,GetOffsetReg(dst,tcgsize2size[size]-i)));
+                           list.concat(taicpu.op_reg(A_ROR,GetOffsetReg64(dst,dsthi,tcgsize2size[size]-i)));
                          else
                            internalerror(2011030902);
                        end;
@@ -569,8 +554,7 @@ unit cgcpu;
                 for i:=1 to tcgsize2size[size] do
                   begin
                     list.concat(taicpu.op_reg_reg(topcg2asmop[op],dst,src));
-                    dst:=GetNextReg(dst);
-                    src:=GetNextReg(src);
+                    NextSrcDst;
                   end;
              end;
            else
@@ -578,6 +562,81 @@ unit cgcpu;
          end;
        end;
 
+     procedure tcgavr.a_op_const_reg_internal(list: TAsmList; Op: TOpCG;
+      size: TCGSize; a: tcgint; reg, reghi: TRegister);
+
+       var
+         mask : qword;
+         shift : byte;
+         i : byte;
+         tmpreg : tregister;
+         tmpreg64 : tregister64;
+
+      procedure NextReg;
+        begin
+          if i=5 then
+            reg:=reghi
+          else
+            reg:=GetNextReg(reg);
+        end;
+
+       begin
+         mask:=$ff;
+         shift:=0;
+         case op of
+           OP_OR:
+             begin
+               for i:=1 to tcgsize2size[size] do
+                 begin
+                   list.concat(taicpu.op_reg_const(A_ORI,reg,(a and mask) shr shift));
+                   NextReg;
+                   mask:=mask shl 8;
+                   inc(shift,8);
+                 end;
+             end;
+           OP_AND:
+             begin
+               for i:=1 to tcgsize2size[size] do
+                 begin
+                   list.concat(taicpu.op_reg_const(A_ANDI,reg,(a and mask) shr shift));
+                   NextReg;
+                   mask:=mask shl 8;
+                   inc(shift,8);
+                 end;
+             end;
+           OP_SUB:
+             begin
+               list.concat(taicpu.op_reg_const(A_SUBI,reg,a));
+               if size in [OS_S16,OS_16,OS_S32,OS_32,OS_S64,OS_64] then
+                 begin
+                   for i:=2 to tcgsize2size[size] do
+                     begin
+                       NextReg;
+                       mask:=mask shl 8;
+                       inc(shift,8);
+                       list.concat(taicpu.op_reg_const(A_SBCI,reg,(a and mask) shr shift));
+                     end;
+                 end;
+             end;
+         else
+           begin
+             if size in [OS_64,OS_S64] then
+               begin
+                 tmpreg64.reglo:=getintregister(list,OS_32);
+                 tmpreg64.reghi:=getintregister(list,OS_32);
+                 cg64.a_load64_const_reg(list,a,tmpreg64);
+                 cg64.a_op64_reg_reg(list,op,size,tmpreg64,joinreg64(reg,reghi));
+               end
+             else
+               begin
+                 tmpreg:=getintregister(list,size);
+                 a_load_const_reg(list,size,a,tmpreg);
+                 a_op_reg_reg(list,op,size,tmpreg,reg);
+               end;
+           end;
+       end;
+     end;
+
 
      procedure tcgavr.a_load_const_reg(list : TAsmList; size: tcgsize; a : tcgint;reg : tregister);
        var
@@ -1020,18 +1079,20 @@ unit cgcpu;
                  end;
                OS_S8:
                  begin
-                   { dest is always at least 16 bit at this point }
                    emit_mov(list,reg2,reg1);
 
-                   reg2:=GetNextReg(reg2);
-                   list.concat(taicpu.op_reg(A_CLR,reg2));
-                   list.concat(taicpu.op_reg_const(A_SBRC,reg1,7));
-                   list.concat(taicpu.op_reg(A_COM,reg2));
-                   tmpreg:=reg2;
-                   for i:=3 to tcgsize2size[tosize] do
+                   if tcgsize2size[tosize]>1 then
                      begin
                        reg2:=GetNextReg(reg2);
-                       emit_mov(list,reg2,tmpreg);
+                       list.concat(taicpu.op_reg(A_CLR,reg2));
+                       list.concat(taicpu.op_reg_const(A_SBRC,reg1,7));
+                       list.concat(taicpu.op_reg(A_COM,reg2));
+                       tmpreg:=reg2;
+                       for i:=3 to tcgsize2size[tosize] do
+                         begin
+                           reg2:=GetNextReg(reg2);
+                           emit_mov(list,reg2,tmpreg);
+                         end;
                      end;
                  end;
                OS_16:
@@ -1050,22 +1111,24 @@ unit cgcpu;
                  end;
                OS_S16:
                  begin
-                   { dest is always at least 32 bit at this point }
                    emit_mov(list,reg2,reg1);
 
                    reg1:=GetNextReg(reg1);
                    reg2:=GetNextReg(reg2);
                    emit_mov(list,reg2,reg1);
 
-                   reg2:=GetNextReg(reg2);
-                   list.concat(taicpu.op_reg(A_CLR,reg2));
-                   list.concat(taicpu.op_reg_const(A_SBRC,reg1,7));
-                   list.concat(taicpu.op_reg(A_COM,reg2));
-                   tmpreg:=reg2;
-                   for i:=4 to tcgsize2size[tosize] do
+                   if tcgsize2size[tosize]>2 then
                      begin
                        reg2:=GetNextReg(reg2);
-                       emit_mov(list,reg2,tmpreg);
+                       list.concat(taicpu.op_reg(A_CLR,reg2));
+                       list.concat(taicpu.op_reg_const(A_SBRC,reg1,7));
+                       list.concat(taicpu.op_reg(A_COM,reg2));
+                       tmpreg:=reg2;
+                       for i:=4 to tcgsize2size[tosize] do
+                         begin
+                           reg2:=GetNextReg(reg2);
+                           emit_mov(list,reg2,tmpreg);
+                         end;
                      end;
                  end;
                else
@@ -1447,13 +1510,15 @@ unit cgcpu;
     procedure tcgavr.g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
       var
         paraloc1,paraloc2,paraloc3 : TCGPara;
+        pd : tprocdef;
       begin
+        pd:=search_system_proc('MOVE');
         paraloc1.init;
         paraloc2.init;
         paraloc3.init;
-        paramanager.getintparaloc(pocall_default,1,voidpointertype,paraloc1);
-        paramanager.getintparaloc(pocall_default,2,voidpointertype,paraloc2);
-        paramanager.getintparaloc(pocall_default,3,ptrsinttype,paraloc3);
+        paramanager.getintparaloc(pd,1,paraloc1);
+        paramanager.getintparaloc(pd,2,paraloc2);
+        paramanager.getintparaloc(pd,3,paraloc3);
         a_load_const_cgpara(list,OS_SINT,len,paraloc3);
         a_loadaddr_ref_cgpara(list,dest,paraloc2);
         a_loadaddr_ref_cgpara(list,source,paraloc1);
@@ -1733,13 +1798,15 @@ unit cgcpu;
 
     procedure tcg64favr.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
       begin
-        { TODO : a_op64_reg_reg }
+         if not(size in [OS_S64,OS_64]) then
+           internalerror(2012102402);
+         tcgavr(cg).a_op_reg_reg_internal(list,Op,size,regsrc.reglo,regsrc.reghi,regdst.reglo,regdst.reghi);
       end;
 
 
     procedure tcg64favr.a_op64_const_reg(list : TAsmList;op:TOpCG;size : tcgsize;value : int64;reg : tregister64);
       begin
-        { TODO : a_op64_const_reg }
+        tcgavr(cg).a_op_const_reg_internal(list,Op,size,value,reg.reglo,reg.reghi);
       end;
 
 

+ 16 - 1
compiler/avr/cpubase.pas

@@ -262,6 +262,9 @@ unit cpubase;
       { Offset where the parent framepointer is pushed }
       PARENT_FRAMEPOINTER_OFFSET = 0;
 
+      NR_DEFAULTFLAGS = NR_SREG;
+      RS_DEFAULTFLAGS = RS_SREG;
+
 {*****************************************************************************
                        GCC /ABI linking information
 *****************************************************************************}
@@ -314,7 +317,10 @@ unit cpubase;
     { returns the last virtual register }
     function GetLastReg(const r : TRegister) : TRegister;
 
+    { returns the register with the offset of ofs of a continuous set of register starting with r }
     function GetOffsetReg(const r : TRegister;ofs : shortint) : TRegister;
+    { returns the register with the offset of ofs of a continuous set of register starting with r and being continued with rhi }
+    function GetOffsetReg64(const r,rhi: TRegister;ofs : shortint): TRegister;
 
   implementation
 
@@ -323,7 +329,7 @@ unit cpubase;
 
 
     const
-      std_regname_table : array[tregisterindex] of string[7] = (
+      std_regname_table : TRegNameTable = (
         {$i ravrstd.inc}
       );
 
@@ -460,4 +466,13 @@ unit cpubase;
       end;
 
 
+    function GetOffsetReg64(const r,rhi: TRegister;ofs : shortint): TRegister;
+      begin
+        if ofs>3 then
+          result:=TRegister(longint(rhi)+ofs-4)
+        else
+          result:=TRegister(longint(r)+ofs);
+      end;
+
+
 end.

+ 16 - 31
compiler/avr/cpuinfo.pas

@@ -44,18 +44,6 @@ Type
        cpu_avr6
       );
 
-   tcpuflags =
-      (AVR_HAS_JMP_CALL,
-       AVR_HAS_MOVW,
-       AVR_HAS_LPMX,
-       AVR_HAS_MUL,
-       AVR_HAS_RAMPZ,
-       AVR_HAS_ELPM,
-       AVR_HAS_ELPMX,
-       AVR_2_BYTE_PC,
-       AVR_3_BYTE_PC
-      );
-
    tfputype =
      (fpu_none,
       fpu_soft,
@@ -116,7 +104,6 @@ Const
    ((
    	controllertypestr:'';
         controllerunitstr:'';
-        interruptvectors:0;
         flashbase:0;
         flashsize:0;
         srambase:0;
@@ -127,7 +114,6 @@ Const
         (
    	controllertypestr:'ATMEGA16';
         controllerunitstr:'ATMEGA16';
-        interruptvectors:0;
         flashbase:0;
         flashsize:$4000;
         srambase:0;
@@ -138,7 +124,6 @@ Const
         (
    	controllertypestr:'ATMEGA32';
         controllerunitstr:'ATMEGA32';
-        interruptvectors:0;
         flashbase:0;
         flashsize:$8000;
         srambase:0;
@@ -149,7 +134,6 @@ Const
    	(
         controllertypestr:'ATMEGA48';
         controllerunitstr:'ATMEGA48';
-        interruptvectors:0;
         flashbase:0;
         flashsize:$1000;
         srambase:0;
@@ -160,7 +144,6 @@ Const
    	(
         controllertypestr:'ATMEGA64';
         controllerunitstr:'ATMEGA64';
-        interruptvectors:0;
         flashbase:0;
         flashsize:$10000;
         srambase:0;
@@ -171,7 +154,6 @@ Const
    	(
         controllertypestr:'ATMEGA128';
         controllerunitstr:'ATMEGA128';
-        interruptvectors:0;
         flashbase:0;
         flashsize:$20000;
         srambase:0;
@@ -188,25 +170,28 @@ Const
                                  { no need to write info about those }
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
                                  [cs_opt_regvar,cs_opt_loopunroll,cs_opt_tailrecursion,
-				  cs_opt_stackframe,cs_opt_nodecse,cs_opt_reorder_fields,cs_opt_fastmath];
-   cpuflagsstr : array[tcpuflags] of string[20] =
-      ('AVR_HAS_JMP_CALL',
-       'AVR_HAS_MOVW',
-       'AVR_HAS_LPMX',
-       'AVR_HAS_MUL',
-       'AVR_HAS_RAMPZ',
-       'AVR_HAS_ELPM',
-       'AVR_HAS_ELPMX',
-       'AVR_2_BYTE_PC',
-       'AVR_3_BYTE_PC'
-      );
-
+                                  cs_opt_stackframe,cs_opt_nodecse,cs_opt_reorder_fields,cs_opt_fastmath];
 
    level1optimizerswitches = genericlevel1optimizerswitches;
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches +
      [cs_opt_regvar,cs_opt_stackframe,cs_opt_tailrecursion];
    level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [{,cs_opt_loopunroll}];
+   level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [];
+
+ type
+   tcpuflags =
+      (CPUAVR_HAS_JMP_CALL,
+       CPUAVR_HAS_MOVW,
+       CPUAVR_HAS_LPMX,
+       CPUAVR_HAS_MUL,
+       CPUAVR_HAS_RAMPZ,
+       CPUAVR_HAS_ELPM,
+       CPUAVR_HAS_ELPMX,
+       CPUAVR_2_BYTE_PC,
+       CPUAVR_3_BYTE_PC
+      );
 
+ const
    cpu_capabilities : array[tcputype] of set of tcpuflags =
      ( { cpu_none } [],
        { cpu_avr1 } [],

+ 17 - 6
compiler/avr/cpupara.pas

@@ -37,8 +37,8 @@ unit cpupara;
           function get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;override;
           function get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;override;
           function push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;override;
-          function ret_in_param(def : tdef;calloption : tproccalloption) : boolean;override;
-          procedure getintparaloc(calloption : tproccalloption; nr : longint; def : tdef; var cgpara : tcgpara);override;
+          function ret_in_param(def:tdef;pd:tabstractprocdef):boolean;override;
+          procedure getintparaloc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
           function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
           function  get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
@@ -68,12 +68,14 @@ unit cpupara;
       end;
 
 
-    procedure tavrparamanager.getintparaloc(calloption : tproccalloption; nr : longint; def : tdef; var cgpara : tcgpara);
+    procedure tavrparamanager.getintparaloc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);
       var
         paraloc : pcgparalocation;
+        def : tdef;
       begin
         if nr<1 then
           internalerror(2002070801);
+        def:=tparavarsym(pd.paras[nr-1]).vardef;
         cgpara.reset;
         cgpara.size:=def_cgsize(def);
         cgpara.intsize:=tcgsize2size[cgpara.size];
@@ -178,22 +180,31 @@ unit cpupara;
       end;
 
 
-    function tavrparamanager.ret_in_param(def : tdef;calloption : tproccalloption) : boolean;
+    function tavrparamanager.ret_in_param(def:tdef;pd:tabstractprocdef):boolean;
       begin
+        { this must be system independent safecall and record constructor result
+          is always return in param }
+        if (tf_safecall_exceptions in target_info.flags) and
+           (pd.proccalloption=pocall_safecall) or
+           ((pd.proctypeoption=potype_constructor)and is_record(def)) then
+          begin
+            result:=true;
+            exit;
+          end;
         case def.typ of
           recorddef:
             { this is how gcc 4.0.4 on linux seems to do it, it doesn't look like being
               ARM ABI standard compliant
             }
             result:=not((trecorddef(def).symtable.SymList.count=1) and
-              not(ret_in_param(tabstractvarsym(trecorddef(def).symtable.SymList[0]).vardef,calloption)));
+              not(ret_in_param(tabstractvarsym(trecorddef(def).symtable.SymList[0]).vardef,pd)));
           {
           objectdef
           arraydef:
             result:=not(def.size in [1,2,4]);
           }
           else
-            result:=inherited ret_in_param(def,calloption);
+            result:=inherited ret_in_param(def,pd);
         end;
       end;
 

+ 7 - 4
compiler/avr/navradd.pas

@@ -29,7 +29,7 @@ interface
        node,ncgadd,cpubase;
 
     type
-       tavraddnode = class(tcgaddnode)
+       TAVRAddNode = class(tcgaddnode)
        private
          function  GetResFlags(unsigned:Boolean):TResFlags;
        protected
@@ -54,7 +54,7 @@ interface
       ncgutil,tgobj,rgobj,rgcpu,cgobj,cg64f32;
 
 {*****************************************************************************
-                               TSparcAddNode
+                               TAVRAddNode
 *****************************************************************************}
 
     function tavraddnode.GetResFlags(unsigned:Boolean):TResFlags;
@@ -193,12 +193,15 @@ interface
 
         for i:=2 to tcgsize2size[left.location.size] do
           begin
-            tmpreg1:=GetNextReg(tmpreg1);
-            tmpreg2:=GetNextReg(tmpreg2);
             if i=5 then
               begin
                 tmpreg1:=left.location.registerhi;
                 tmpreg2:=right.location.registerhi;
+              end
+            else
+              begin
+                tmpreg1:=GetNextReg(tmpreg1);
+                tmpreg2:=GetNextReg(tmpreg2);
               end;
             current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CPC,tmpreg1,tmpreg2));
           end;

+ 4 - 0
compiler/avr/navrmat.pas

@@ -223,6 +223,10 @@ implementation
             current_procinfo.CurrTrueLabel:=current_procinfo.CurrFalseLabel;
             current_procinfo.CurrFalseLabel:=hl;
             secondpass(left);
+
+            if left.location.loc<>LOC_JUMP then
+              internalerror(2012081304);
+
             maketojumpbool(current_asmdata.CurrAsmList,left,lr_load_regvars);
             hl:=current_procinfo.CurrTrueLabel;
             current_procinfo.CurrTrueLabel:=current_procinfo.CurrFalseLabel;

+ 1 - 0
compiler/avr/ravrcon.inc

@@ -32,3 +32,4 @@ NR_R28 = tregister($0100001c);
 NR_R29 = tregister($0100001d);
 NR_R30 = tregister($0100001e);
 NR_R31 = tregister($0100001f);
+NR_SREG = tregister($05000000);

+ 2 - 1
compiler/avr/ravrdwa.inc

@@ -31,4 +31,5 @@
 28,
 29,
 30,
-31
+31,
+0

+ 1 - 1
compiler/avr/ravrnor.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from avrreg.dat }
-33
+34

+ 2 - 1
compiler/avr/ravrnum.inc

@@ -31,4 +31,5 @@ tregister($0100001b),
 tregister($0100001c),
 tregister($0100001d),
 tregister($0100001e),
-tregister($0100001f)
+tregister($0100001f),
+tregister($05000000)

+ 2 - 1
compiler/avr/ravrrni.inc

@@ -31,4 +31,5 @@
 29,
 30,
 31,
-32
+32,
+33

+ 2 - 1
compiler/avr/ravrsri.inc

@@ -31,4 +31,5 @@
 7,
 8,
 9,
-10
+10,
+33

+ 2 - 1
compiler/avr/ravrsta.inc

@@ -31,4 +31,5 @@
 28,
 29,
 30,
-31
+31,
+0

+ 2 - 1
compiler/avr/ravrstd.inc

@@ -31,4 +31,5 @@
 'r28',
 'r29',
 'r30',
-'r31'
+'r31',
+'sreg'

+ 1 - 0
compiler/avr/ravrsup.inc

@@ -32,3 +32,4 @@ RS_R28 = $1c;
 RS_R29 = $1d;
 RS_R30 = $1e;
 RS_R31 = $1f;
+RS_SREG = $00;

+ 1 - 1
compiler/browcol.pas

@@ -48,7 +48,7 @@ const
     sfObject        = $00000002;
     sfClass         = $00000004;
     sfPointer       = $00000008;
-    sfHasMemInfo    = $80000000;
+    sfHasMemInfo    = $40000000;
 
 type
     TStoreCollection = object(TStringCollection)

+ 0 - 2
compiler/cclasses.pas

@@ -1506,8 +1506,6 @@ begin
 end;
 
 function TFPHashList.InternalFind(AHash:LongWord;const AName:TSymStr;out PrevIndex:Integer):Integer;
-var
-  HashIndex : Integer;
 begin
   prefetch(AName[1]);
   Result:=FHashTable^[AHash and FCapacityMask];

+ 2 - 0
compiler/cfileutl.pas

@@ -708,6 +708,8 @@ end;
         P: PChar;
       begin
         Result := s;
+        { make result unique since we're going to change it via a pchar }
+        uniquestring(result);
         L := Length(Result);
         if L=0 then
           exit;

+ 42 - 17
compiler/cg64f32.pas

@@ -61,7 +61,6 @@ unit cg64f32;
         procedure a_load64_reg_loc(list : TAsmList;reg : tregister64;const l : tlocation);override;
 
 
-
         procedure a_load64high_reg_ref(list : TAsmList;reg : tregister;const ref : treference);override;
         procedure a_load64low_reg_ref(list : TAsmList;reg : tregister;const ref : treference);override;
         procedure a_load64high_ref_reg(list : TAsmList;const ref : treference;reg : tregister);override;
@@ -366,8 +365,6 @@ unit cg64f32;
       end;
 
 
-
-
     procedure tcg64f32.a_load64_subsetref_subsetref(list: TAsmlist; const fromsref, tosref: tsubsetreference);
 
       var
@@ -648,10 +645,20 @@ unit cg64f32;
         tmploclo.init;
         tmplochi.init;
         splitparaloc64(paraloc,tmploclo,tmplochi);
-        { Keep this order of first hi before lo to have
-          the correct push order for i386 }
-        cg.a_load_reg_cgpara(list,OS_32,reg.reghi,tmplochi);
-        cg.a_load_reg_cgpara(list,OS_32,reg.reglo,tmploclo);
+        if target_info.endian=endian_big then
+          begin
+            { Keep this order of first lo before hi to have
+              the correct push order for m68k }
+            cg.a_load_reg_cgpara(list,OS_32,reg.reglo,tmploclo);
+            cg.a_load_reg_cgpara(list,OS_32,reg.reghi,tmplochi);
+          end
+        else
+          begin
+            { Keep this order of first hi before lo to have
+              the correct push order for i386 }
+            cg.a_load_reg_cgpara(list,OS_32,reg.reghi,tmplochi);
+            cg.a_load_reg_cgpara(list,OS_32,reg.reglo,tmploclo);
+          end;
         tmploclo.done;
         tmplochi.done;
       end;
@@ -664,10 +671,20 @@ unit cg64f32;
         tmploclo.init;
         tmplochi.init;
         splitparaloc64(paraloc,tmploclo,tmplochi);
-        { Keep this order of first hi before lo to have
-          the correct push order for i386 }
-        cg.a_load_const_cgpara(list,OS_32,aint(hi(value)),tmplochi);
-        cg.a_load_const_cgpara(list,OS_32,aint(lo(value)),tmploclo);
+        if target_info.endian=endian_big then
+          begin
+            { Keep this order of first lo before hi to have
+              the correct push order for m68k }
+            cg.a_load_const_cgpara(list,OS_32,aint(lo(value)),tmploclo);
+            cg.a_load_const_cgpara(list,OS_32,aint(hi(value)),tmplochi);
+          end
+        else
+          begin
+            { Keep this order of first hi before lo to have
+              the correct push order for i386 }
+            cg.a_load_const_cgpara(list,OS_32,aint(hi(value)),tmplochi);
+            cg.a_load_const_cgpara(list,OS_32,aint(lo(value)),tmploclo);
+          end;
         tmploclo.done;
         tmplochi.done;
       end;
@@ -684,13 +701,21 @@ unit cg64f32;
         tmprefhi:=r;
         tmpreflo:=r;
         if target_info.endian=endian_big then
-          inc(tmpreflo.offset,4)
+          begin
+            { Keep this order of first lo before hi to have
+              the correct push order for m68k }
+            inc(tmpreflo.offset,4);
+            cg.a_load_ref_cgpara(list,OS_32,tmpreflo,tmploclo);
+            cg.a_load_ref_cgpara(list,OS_32,tmprefhi,tmplochi);
+          end
         else
-          inc(tmprefhi.offset,4);
-        { Keep this order of first hi before lo to have
-          the correct push order for i386 }
-        cg.a_load_ref_cgpara(list,OS_32,tmprefhi,tmplochi);
-        cg.a_load_ref_cgpara(list,OS_32,tmpreflo,tmploclo);
+          begin
+            { Keep this order of first hi before lo to have
+              the correct push order for i386 }
+            inc(tmprefhi.offset,4);
+            cg.a_load_ref_cgpara(list,OS_32,tmprefhi,tmplochi);
+            cg.a_load_ref_cgpara(list,OS_32,tmpreflo,tmploclo);
+          end;
         tmploclo.done;
         tmplochi.done;
       end;

+ 29 - 6
compiler/cgbase.pas

@@ -145,8 +145,8 @@ interface
                   OS_F32,OS_F64,OS_F80,OS_C64,OS_F128,
                  { multi-media sizes: split in byte, word, dword, ... }
                  { entities, then the signed counterparts             }
-                  OS_M8,OS_M16,OS_M32,OS_M64,OS_M128,
-                  OS_MS8,OS_MS16,OS_MS32,OS_MS64,OS_MS128);
+                  OS_M8,OS_M16,OS_M32,OS_M64,OS_M128,OS_M256,  
+                  OS_MS8,OS_MS16,OS_MS32,OS_MS64,OS_MS128,OS_MS256 );  
 
       { Register types }
       TRegisterType = (
@@ -174,7 +174,10 @@ interface
         R_SUBFQ,   { = 8; Float that allocates 4 FPU registers }
         R_SUBMMS,  { = 9; single scalar in multi media register }
         R_SUBMMD,  { = 10; double scalar in multi media register }
-        R_SUBMMWHOLE  { = 11; complete MM register, size depends on CPU }
+        R_SUBMMWHOLE,  { = 11; complete MM register, size depends on CPU }
+        { For Intel X86 AVX-Register }
+        R_SUBMMX,     { = 12; 128 BITS }
+        R_SUBMMY      { = 13; 256 BITS }
       );
       TSubRegisterSet = set of TSubRegister;
 
@@ -209,6 +212,9 @@ interface
       { A type to store register locations for 64 Bit values. }
 {$ifdef cpu64bitalu}
       tregister64 = tregister;
+      tregister128 = record
+         reglo,reghi : tregister;
+      end;
 {$else cpu64bitalu}
       tregister64 = record
          reglo,reghi : tregister;
@@ -269,7 +275,7 @@ interface
          { floating point values }
          4,8,10,8,16,
          { multimedia values }
-         1,2,4,8,16,1,2,4,8,16);
+         1,2,4,8,16,32,1,2,4,8,16,32); 
 
        tfloat2tcgsize: array[tfloattype] of tcgsize =
          (OS_F32,OS_F64,OS_F80,OS_F80,OS_C64,OS_C64,OS_F128);
@@ -280,13 +286,25 @@ interface
        tvarregable2tcgloc : array[tvarregable] of tcgloc = (LOC_VOID,
           LOC_CREGISTER,LOC_CFPUREGISTER,LOC_CMMREGISTER,LOC_CREGISTER);
 
+{$ifdef cpu64bitalu}
+       { operand size describing an unsigned value in a pair of int registers }
+       OS_PAIR = OS_128;
+       { operand size describing an signed value in a pair of int registers }
+       OS_SPAIR = OS_S128;
+{$else cpu64bitalu}
+       { operand size describing an unsigned value in a pair of int registers }
+       OS_PAIR = OS_64;
+       { operand size describing an signed value in a pair of int registers }
+       OS_SPAIR = OS_S64;
+{$endif cpu64bitalu}
+
        { Table to convert tcgsize variables to the correspondending
          unsigned types }
        tcgsize2unsigned : array[tcgsize] of tcgsize = (OS_NO,
           OS_8,OS_16,OS_32,OS_64,OS_128,OS_8,OS_16,OS_32,OS_64,OS_128,
           OS_F32,OS_F64,OS_F80,OS_C64,OS_F128,
-          OS_M8,OS_M16,OS_M32,OS_M64,OS_M128,OS_M8,OS_M16,OS_M32,
-          OS_M64,OS_M128);
+          OS_M8,OS_M16,OS_M32,OS_M64,OS_M128,OS_M256,OS_M8,OS_M16,OS_M32,
+          OS_M64,OS_M128,OS_M256);
 
        tcgloc2str : array[TCGLoc] of string[12] = (
             'LOC_INVALID',
@@ -594,6 +612,11 @@ implementation
           OS_NO,OS_8,OS_16,OS_NO,OS_32,OS_NO,OS_NO,OS_NO,OS_64
         );
       begin
+{$ifdef cpu64bitalu}
+        if a=16 then
+          result:=OS_128
+        else
+{$endif cpu64bitalu}
         if a>8 then
           result:=OS_NO
         else

Unele fișiere nu au fost afișate deoarece prea multe fișiere au fost modificate în acest diff