Преглед на файлове

Synchronized with trunk, part 1 (only make cycle tested, make all is broken, avx-512 support not yet tested

git-svn-id: branches/tg74/avx512@42642 -
florian преди 6 години
родител
ревизия
746bfced25
променени са 100 файла, в които са добавени 6723 реда и са изтрити 2160 реда
  1. 352 27
      .gitattributes
  2. 83 7
      Makefile
  3. 8 2
      Makefile.fpc
  4. 439 132
      compiler/Makefile
  5. 248 56
      compiler/Makefile.fpc
  6. 4 3
      compiler/aarch64/aasmcpu.pas
  7. 3 1
      compiler/aarch64/agcpugas.pas
  8. 122 12
      compiler/aarch64/aoptcpu.pas
  9. 2 4
      compiler/aarch64/aoptcpub.pas
  10. 86 36
      compiler/aarch64/cgcpu.pas
  11. 10 0
      compiler/aarch64/cpubase.pas
  12. 113 94
      compiler/aarch64/cpupara.pas
  13. 3 0
      compiler/aarch64/cputarg.pas
  14. 4 1
      compiler/aarch64/hlcgcpu.pas
  15. 0 1
      compiler/aarch64/ncpucnv.pas
  16. 0 1
      compiler/aarch64/ncpuinl.pas
  17. 5 5
      compiler/aarch64/ncpuset.pas
  18. 3 0
      compiler/aarch64/racpu.pas
  19. 8 4
      compiler/aarch64/racpugas.pas
  20. 4 0
      compiler/aarch64/rgcpu.pas
  21. 0 108
      compiler/aasmbase.pas
  22. 172 15
      compiler/aasmcnst.pas
  23. 73 3
      compiler/aasmdata.pas
  24. 4 0
      compiler/aasmsym.pas
  25. 73 25
      compiler/aasmtai.pas
  26. 138 65
      compiler/aggas.pas
  27. 7 0
      compiler/aopt.pas
  28. 55 16
      compiler/aoptobj.pas
  29. 12 1
      compiler/aoptutils.pas
  30. 204 68
      compiler/arm/aasmcpu.pas
  31. 16 1
      compiler/arm/agarmgas.pas
  32. 70 8
      compiler/arm/aoptcpu.pas
  33. 19 6
      compiler/arm/aoptcpub.pas
  34. 6 1
      compiler/arm/armins.dat
  35. 1 1
      compiler/arm/armnop.inc
  36. 35 0
      compiler/arm/armtab.inc
  37. 125 62
      compiler/arm/cgcpu.pas
  38. 43 3
      compiler/arm/cpubase.pas
  39. 2 0
      compiler/arm/cpuelf.pas
  40. 1 0
      compiler/arm/cpunode.pas
  41. 174 58
      compiler/arm/cpupara.pas
  42. 13 1
      compiler/arm/cpupi.pas
  43. 6 2
      compiler/arm/narmadd.pas
  44. 3 0
      compiler/arm/narmcnv.pas
  45. 93 64
      compiler/arm/narmcon.pas
  46. 95 0
      compiler/arm/narmld.pas
  47. 7 5
      compiler/arm/narmmat.pas
  48. 24 16
      compiler/arm/narmset.pas
  49. 7 0
      compiler/arm/raarmgas.pas
  50. 8 0
      compiler/arm/rgcpu.pas
  51. 2 2
      compiler/arm/symcpu.pas
  52. 121 0
      compiler/armgen/armpara.pas
  53. 23 0
      compiler/assemble.pas
  54. 132 54
      compiler/avr/aoptcpu.pas
  55. 0 4
      compiler/avr/aoptcpub.pas
  56. 3 1
      compiler/avr/ccpuinnr.inc
  57. 137 68
      compiler/avr/cgcpu.pas
  58. 6 1
      compiler/avr/cpubase.pas
  59. 1 1
      compiler/avr/cpuinfo.pas
  60. 16 5
      compiler/avr/cpupara.pas
  61. 34 2
      compiler/avr/navrinl.pas
  62. 101 97
      compiler/avr/raavr.pas
  63. 20 7
      compiler/avr/rgcpu.pas
  64. 1 1
      compiler/blockutl.pas
  65. 9 4
      compiler/browcol.pas
  66. 87 93
      compiler/cclasses.pas
  67. 0 2
      compiler/cfidwarf.pas
  68. 16 3
      compiler/cfileutl.pas
  69. 22 2
      compiler/cgbase.pas
  70. 74 14
      compiler/cgobj.pas
  71. 9 6
      compiler/cgutils.pas
  72. 5 0
      compiler/compinnr.pas
  73. 2 2
      compiler/cresstr.pas
  74. 1 0
      compiler/cstreams.pas
  75. 151 8
      compiler/cutils.pas
  76. 10 2
      compiler/dbgbase.pas
  77. 4 0
      compiler/dbgcodeview.pas
  78. 328 53
      compiler/dbgdwarf.pas
  79. 16 3
      compiler/dbgstabs.pas
  80. 5 1
      compiler/dbgstabx.pas
  81. 48 13
      compiler/defcmp.pas
  82. 47 11
      compiler/defutil.pas
  83. 261 0
      compiler/elfbase.pas
  84. 535 13
      compiler/entfile.pas
  85. 16 5
      compiler/fmodule.pas
  86. 38 6
      compiler/fpcdefs.inc
  87. 2 5
      compiler/fpcp.pas
  88. 187 119
      compiler/fppu.pas
  89. 2 0
      compiler/gendef.pas
  90. 63 5
      compiler/globals.pas
  91. 4 1
      compiler/globstat.pas
  92. 63 5
      compiler/globtype.pas
  93. 630 9
      compiler/hlcg2ll.pas
  94. 207 74
      compiler/hlcgobj.pas
  95. 195 79
      compiler/htypechk.pas
  96. 56 435
      compiler/i386/aoptcpu.pas
  97. 0 4
      compiler/i386/aoptcpub.pas
  98. 12 5
      compiler/i386/cgcpu.pas
  99. 2 0
      compiler/i386/cpuelf.pas
  100. 36 20
      compiler/i386/cpupara.pas

Файловите разлики са ограничени, защото са твърде много
+ 352 - 27
.gitattributes


+ 83 - 7
Makefile

@@ -1,8 +1,8 @@
 #
-# Don't edit, this file is generated by FPCMake Version 2.0.0 [2018-04-12 rev 38745]
+# Don't edit, this file is generated by FPCMake Version 2.0.0
 #
 default: help
-MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim i386-android i386-aros m68k-linux m68k-netbsd m68k-amiga m68k-atari m68k-palmos m68k-macos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii powerpc-aix sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-netbsd x86_64-solaris x86_64-openbsd x86_64-darwin x86_64-win64 x86_64-embedded x86_64-iphonesim x86_64-aros x86_64-dragonfly arm-linux arm-netbsd arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian arm-android arm-aros powerpc64-linux powerpc64-darwin powerpc64-embedded powerpc64-aix avr-embedded armeb-linux armeb-embedded mips-linux mipsel-linux mipsel-embedded mipsel-android jvm-java jvm-android i8086-embedded i8086-msdos i8086-win16 aarch64-linux aarch64-darwin wasm-wasm sparc64-linux
+MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim i386-android i386-aros m68k-linux m68k-netbsd m68k-amiga m68k-atari m68k-palmos m68k-macos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii powerpc-aix sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-haiku x86_64-netbsd x86_64-solaris x86_64-openbsd x86_64-darwin x86_64-win64 x86_64-embedded x86_64-iphonesim x86_64-android x86_64-aros x86_64-dragonfly arm-linux arm-netbsd arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian arm-android arm-aros powerpc64-linux powerpc64-darwin powerpc64-embedded powerpc64-aix avr-embedded armeb-linux armeb-embedded mips-linux mipsel-linux mipsel-embedded mipsel-android jvm-java jvm-android i8086-embedded i8086-msdos i8086-win16 aarch64-linux aarch64-darwin aarch64-android wasm-wasm sparc64-linux riscv32-linux riscv32-embedded riscv64-linux riscv64-embedded
 BSDs = freebsd netbsd openbsd darwin dragonfly
 UNIXs = linux $(BSDs) solaris qnx haiku aix
 LIMIT83fs = go32v2 os2 emx watcom msdos win16 atari
@@ -288,9 +288,7 @@ else
 ifeq ($(CPU_TARGET),i386)
 BINUTILSPREFIX=i686-linux-android-
 else
-ifeq ($(CPU_TARGET),mipsel)
-BINUTILSPREFIX=mipsel-linux-android-
-endif
+BINUTILSPREFIX=$(CPU_TARGET)-linux-android-
 endif
 endif
 endif
@@ -332,7 +330,7 @@ FPCFPMAKE=$(FPC)
 endif
 endif
 override PACKAGE_NAME=fpc
-override PACKAGE_VERSION=3.1.1
+override PACKAGE_VERSION=3.3.1
 REQUIREDVERSION=3.0.4
 REQUIREDVERSION2=3.0.2
 ifndef inOS2
@@ -391,6 +389,12 @@ endif
 ifeq ($(CPU_TARGET),aarch64)
 PPSUF=a64
 endif
+ifeq ($(CPU_TARGET),riscv32)
+PPSUF=rv32
+endif
+ifeq ($(CPU_TARGET),riscv64)
+PPSUF=rv64
+endif
 ifdef CROSSCOMPILE
 ifneq ($(CPU_TARGET),jvm)
 PPPRE=ppcross
@@ -471,7 +475,7 @@ BUILDOPTS=FPC=$(PPNEW) FPCFPMAKE=$(FPCFPMAKENEW) RELEASE=1 'OPT=$(OPTNEW)'
 INSTALLOPTS=FPC=$(PPNEW) ZIPDESTDIR=$(BASEDIR) FPCMAKE=$(FPCMAKENEW)
 BuildOnlyBaseCPUs=jvm
 ifneq ($(wildcard utils),)
-NOUTILSTARGETS=embedded gba nds msdos win16 $(BuildOnlyBaseCPUs)
+NOUTILSTARGETS=embedded gba nds msdos win16 macos $(BuildOnlyBaseCPUs)
 ifeq ($(findstring $(OS_TARGET),$(NOUTILSTARGETS)),)
 ifdef BUILDFULLNATIVE
 UTILS=1
@@ -613,6 +617,9 @@ endif
 ifeq ($(FULL_TARGET),x86_64-freebsd)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
+ifeq ($(FULL_TARGET),x86_64-haiku)
+override TARGET_DIRS+=compiler rtl utils packages installer
+endif
 ifeq ($(FULL_TARGET),x86_64-netbsd)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
@@ -634,6 +641,9 @@ endif
 ifeq ($(FULL_TARGET),x86_64-iphonesim)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
+ifeq ($(FULL_TARGET),x86_64-android)
+override TARGET_DIRS+=compiler rtl utils packages installer
+endif
 ifeq ($(FULL_TARGET),x86_64-aros)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
@@ -727,12 +737,27 @@ endif
 ifeq ($(FULL_TARGET),aarch64-darwin)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
+ifeq ($(FULL_TARGET),aarch64-android)
+override TARGET_DIRS+=compiler rtl utils packages installer
+endif
 ifeq ($(FULL_TARGET),wasm-wasm)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
 ifeq ($(FULL_TARGET),sparc64-linux)
 override TARGET_DIRS+=compiler rtl utils packages installer
 endif
+ifeq ($(FULL_TARGET),riscv32-linux)
+override TARGET_DIRS+=compiler rtl utils packages installer
+endif
+ifeq ($(FULL_TARGET),riscv32-embedded)
+override TARGET_DIRS+=compiler rtl utils packages installer
+endif
+ifeq ($(FULL_TARGET),riscv64-linux)
+override TARGET_DIRS+=compiler rtl utils packages installer
+endif
+ifeq ($(FULL_TARGET),riscv64-embedded)
+override TARGET_DIRS+=compiler rtl utils packages installer
+endif
 override INSTALL_FPCPACKAGE=y
 ifdef REQUIRE_UNITSDIR
 override UNITSDIR+=$(REQUIRE_UNITSDIR)
@@ -1108,6 +1133,7 @@ endif
 ifeq ($(OS_TARGET),aix)
 BATCHEXT=.sh
 EXEEXT=
+SHAREDLIBEXT=.a
 SHORTSUFFIX=aix
 endif
 ifeq ($(OS_TARGET),java)
@@ -1787,6 +1813,7 @@ ifdef CLEAN_FILES
 	-$(DEL) $(CLEAN_FILES)
 endif
 	-$(DELTREE) units
+	-$(DELTREE) bin
 	-$(DEL) *$(OEXT) *$(PPUEXT) *$(RSTEXT) *$(ASMEXT) *$(STATICLIBEXT) *$(SHAREDLIBEXT) *$(PPLEXT)
 ifneq ($(PPUEXT),.ppu)
 	-$(DEL) *.o *.ppu *.a
@@ -2227,6 +2254,13 @@ TARGET_DIRS_UTILS=1
 TARGET_DIRS_PACKAGES=1
 TARGET_DIRS_INSTALLER=1
 endif
+ifeq ($(FULL_TARGET),x86_64-haiku)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_INSTALLER=1
+endif
 ifeq ($(FULL_TARGET),x86_64-netbsd)
 TARGET_DIRS_COMPILER=1
 TARGET_DIRS_RTL=1
@@ -2276,6 +2310,13 @@ TARGET_DIRS_UTILS=1
 TARGET_DIRS_PACKAGES=1
 TARGET_DIRS_INSTALLER=1
 endif
+ifeq ($(FULL_TARGET),x86_64-android)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_INSTALLER=1
+endif
 ifeq ($(FULL_TARGET),x86_64-aros)
 TARGET_DIRS_COMPILER=1
 TARGET_DIRS_RTL=1
@@ -2493,6 +2534,13 @@ TARGET_DIRS_UTILS=1
 TARGET_DIRS_PACKAGES=1
 TARGET_DIRS_INSTALLER=1
 endif
+ifeq ($(FULL_TARGET),aarch64-android)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_INSTALLER=1
+endif
 ifeq ($(FULL_TARGET),wasm-wasm)
 TARGET_DIRS_COMPILER=1
 TARGET_DIRS_RTL=1
@@ -2507,6 +2555,34 @@ TARGET_DIRS_UTILS=1
 TARGET_DIRS_PACKAGES=1
 TARGET_DIRS_INSTALLER=1
 endif
+ifeq ($(FULL_TARGET),riscv32-linux)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_INSTALLER=1
+endif
+ifeq ($(FULL_TARGET),riscv32-embedded)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_INSTALLER=1
+endif
+ifeq ($(FULL_TARGET),riscv64-linux)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_INSTALLER=1
+endif
+ifeq ($(FULL_TARGET),riscv64-embedded)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_INSTALLER=1
+endif
 ifdef TARGET_DIRS_COMPILER
 compiler_all:
 	$(MAKE) -C compiler all

+ 8 - 2
Makefile.fpc

@@ -4,7 +4,7 @@
 
 [package]
 name=fpc
-version=3.1.1
+version=3.3.1
 
 [target]
 dirs=compiler rtl utils packages installer
@@ -85,6 +85,12 @@ endif
 ifeq ($(CPU_TARGET),aarch64)
 PPSUF=a64
 endif
+ifeq ($(CPU_TARGET),riscv32)
+PPSUF=rv32
+endif
+ifeq ($(CPU_TARGET),riscv64)
+PPSUF=rv64
+endif
 
 # cross compilers uses full cpu_target, not just ppc-suffix
 # (except if the target cannot run a native compiler)
@@ -199,7 +205,7 @@ INSTALLOPTS=FPC=$(PPNEW) ZIPDESTDIR=$(BASEDIR) FPCMAKE=$(FPCMAKENEW)
 BuildOnlyBaseCPUs=jvm
 
 ifneq ($(wildcard utils),)
-NOUTILSTARGETS=embedded gba nds msdos win16 $(BuildOnlyBaseCPUs)
+NOUTILSTARGETS=embedded gba nds msdos win16 macos $(BuildOnlyBaseCPUs)
 ifeq ($(findstring $(OS_TARGET),$(NOUTILSTARGETS)),)
 ifdef BUILDFULLNATIVE
 UTILS=1

Файловите разлики са ограничени, защото са твърде много
+ 439 - 132
compiler/Makefile


+ 248 - 56
compiler/Makefile.fpc

@@ -4,14 +4,14 @@
 
 [package]
 name=compiler
-version=3.1.1
+version=3.3.1
 
 [target]
 programs=pp
 dirs=utils
 
 [compiler]
-targetdir=.
+targetdir=$(CPU_UNITDIR)/bin/$(FULL_TARGET)
 unittargetdir=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 unitdir=$(COMPILERSOURCEDIR)
 includedir=$(CPC_TARGET)
@@ -32,7 +32,7 @@ fpcdir=..
 unexport FPC_VERSION FPC_COMPILERINFO
 
 # Which platforms are ready for inclusion in the cycle
-CYCLETARGETS=i386 powerpc sparc arm x86_64 powerpc64 m68k armeb mipsel mips avr jvm i8086 aarch64 sparc64
+CYCLETARGETS=i386 powerpc sparc arm x86_64 powerpc64 m68k armeb mipsel mips avr jvm i8086 aarch64 sparc64 riscv32 riscv64
 
 # All supported targets used for clean
 ALLTARGETS=$(CYCLETARGETS)
@@ -83,6 +83,12 @@ endif
 ifdef AARCH64
 PPC_TARGET=aarch64
 endif
+ifdef RISCV32
+PPC_TARGET=riscv32
+endif
+ifdef RISCV64
+PPC_TARGET=riscv64
+endif
 
 # Default is to generate a compiler for the same
 # platform as CPU_TARGET (a native compiler)
@@ -213,6 +219,12 @@ endif
 ifeq ($(CPC_TARGET),aarch64)
 CPUSUF=a64
 endif
+ifeq ($(CPC_TARGET),riscv32)
+CPUSUF=rv32
+endif
+ifeq ($(CPC_TARGET),riscv64)
+CPUSUF=rv64
+endif
 
 # Do not define the default -d$(CPU_TARGET) because that
 # will conflict with our -d$(CPC_TARGET)
@@ -297,7 +309,12 @@ endif
 
 # ARM specific
 ifeq ($(PPC_TARGET),arm)
-override LOCALOPT+=
+override LOCALOPT+=-Fuarmgen
+endif
+
+# ARMEB specific
+ifeq ($(PPC_TARGET),armeb)
+override LOCALOPT+=-Fuarmgen
 endif
 
 # mipsel specific
@@ -310,11 +327,26 @@ ifeq ($(PPC_TARGET),jvm)
 override LOCALOPT+=-Fujvm
 endif
 
+# AArch64 specific
+ifeq ($(PPC_TARGET),aarch64)
+override LOCALOPT+=-Fuarmgen
+endif
+
 # i8086 specific
 ifeq ($(PPC_TARGET),i8086)
 override LOCALOPT+=-Fux86
 endif
 
+# RiscV32 specific
+ifeq ($(PPC_TARGET),riscv32)
+override LOCALOPT+=-Furiscv
+endif
+
+# RiscV64 specific
+ifeq ($(PPC_TARGET),riscv64)
+override LOCALOPT+=-Furiscv
+endif
+
 OPTWPOCOLLECT=-OWdevirtcalls,optvmts -FW$(BASEDIR)/pp1.wpo
 OPTWPOPERFORM=-Owdevirtcalls,optvmts -Fw$(BASEDIR)/pp1.wpo
 # symbol liveness WPO requires nm, smart linking and no stripping (the latter
@@ -348,8 +380,11 @@ endif
 ifeq ($(OS_TARGET),win16)
 NoNativeBinaries=1
 endif
+ifeq ($(OS_TARGET),macos)
+NoNativeBinaries=1
+endif
 
-# Allow install for jvm 
+# Allow install for jvm
 ifeq ($(NoNativeBinaries),1)
 override EXEEXT=$(SRCEXEEXT)
 # In those cases, installation in a cross-installation
@@ -426,23 +461,139 @@ INSTALLEXEFILE=$(EXENAME)
 endif
 
 #####################################################################
-# CPU targets
+# Rules to run the compiler trough GDB using utils/gppc386.pp code
+# inside specific levels of cycle.
+# Simply compile utils and utils/gppc386
+# And move generated utils/gppc386 to ./g$(TEMPNAME)
+#####################################################################
+
+# Use debugger for all compilations
+ifdef DEBUG_CYCLE
+DEBUG_EXENAME=1
+DEBUG_PPEXENAME=1
+DEBUG_TEMPNAME=1
+DEBUG_PPCROSSNAME=1
+DEBUG_TEMPNAME1=1
+DEBUG_TEMPNAME2=1
+DEBUG_TEMPNAME3=1
+DEBUG_TEMPWPONAME1=1
+DEBUG_TEMPWPONAME2=1
+endif
+
+# Or DEBUG_XXX to only start a specific compiler
+# inside GDB
+ifdef DEBUG_EXENAME
+EXENAMEPREFIX=g
+NEED_G_COMPILERS+=g$(EXENAME)
+endif
+
+ifdef DEBUG_PPEXENAME
+PPEXENAMEPREFIX=g
+NEED_G_COMPILERS+=g$(PPEXENAME)
+endif
+
+ifdef DEBUG_TEMPNAME
+TEMPNAMEPREFIX=g
+NEED_G_COMPILERS+=g$(TEMPNAME)
+endif
+
+ifdef DEBUG_PPCROSSNAME
+PPCROSSNAMEPREFIX=g
+NEED_G_COMPILERS+=g$(PPCROSSNAME)
+endif
+
+ifdef DEBUG_TEMPNAME1
+TEMPNAME1PREFIX=g
+NEED_G_COMPILERS+=g$(TEMPNAME1)
+endif
+
+ifdef DEBUG_TEMPNAME2
+TEMPNAME2PREFIX=g
+NEED_G_COMPILERS+=g$(TEMPNAME2)
+endif
+
+ifdef DEBUG_TEMPNAME3
+TEMPNAME3PREFIX=g
+NEED_G_COMPILERS+=g$(TEMPNAME3)
+endif
+
+ifdef DEBUG_TEMPWPONAME1
+TEMPNAMEWPO1PREFIX=g
+NEED_G_COMPILERS+=g$(TEMPWPONAME1)
+endif
+
+ifdef DEBUG_TEMPWPONAME2
+TEMPWPONAME2PREFIX=g
+NEED_G_COMPILERS+=g$(TEMPWPONAME2)
+endif
+
+ALL_G_COMPILERS="g$(EXENAME) g$(PPEXENAME) g$(TEMPNAME) g$(PPCROSSNAME) g$(TEMPNAME1) g$(TEMPNAME2) g$(TEMPNAME3) g$(TEMPWPONAME1) g$(TEMPWPONAME2)"
+
+#####################################################################
+# To start a given compiler $(PP) with gdb, copy utils/gppc386 as g$(PP).
+# Symbolic link is not working, full copy required.
+# Use a file as time stamp to avoid recompiling utils/gppc386
+# unless needed.
+#####################################################################
+g$(COMPILERTEMPNAME): fpcmade.generate_g_compilers
+	$(COPY) ./utils/gppc386 ./g$(COMPILERTEMPNAME)
+
+fpcmade.generate_g_compilers: utils/gppc386.pp
+	$(MAKE) rtlclean rtl utils
+	$(MAKE) -C utils gppc386$(EXEEXT)
+	$(GECHO) -n "utils/gppc386 generated at " > $@
+	$(GDATE) >> $@
+
+ifdef EXENAMEPREFIX
+	$(MAKE) g$(EXENAME) COMPILERTEMPNAME=$(EXENAME)
+endif
+ifdef PPEXENAMEPREFIX
+	$(MAKE) g$(PPEXENAME) COMPILERTEMPNAME=$(PPEXENAME)
+endif
+ifdef TEMPNAMEPREFIX
+	$(MAKE) g$(TEMPNAME) COMPILERTEMPNAME=$(TEMPNAME)
+endif
+ifdef PPCROSSNAMEPREFIX
+	$(MAKE) g$(PPCROSSNAME) COMPILERTEMPNAME=$(PPCROSSNAME)
+endif
+ifdef TEMPNAME1PREFIX
+	$(MAKE) g$(TEMPNAME1) COMPILERTEMPNAME=$(TEMPNAME1)
+endif
+ifdef TEMPNAME2PREFIX
+	$(MAKE) g$(TEMPNAME2) COMPILERTEMPNAME=$(TEMPNAME2)
+endif
+ifdef TEMPNAME3PREFIX
+	$(MAKE) g$(TEMPNAME3) COMPILERTEMPNAME=$(TEMPNAME3)
+endif
+ifdef TEMPWPONAME1PREFIX
+	$(MAKE) g$(TEMPWPONAME1) COMPILERTEMPNAME=$(TEMPWPONAME1)
+endif
+ifdef TEMPWPONAME2PREFIX
+	$(MAKE) g$(TEMPWPONAME2) COMPILERTEMPNAME=$(TEMPWPONAME2)
+endif
+
+
+#####################################################################
+# cpu targets
 #####################################################################
 
-PPC_TARGETS=i386 m68k powerpc sparc arm armeb x86_64 powerpc64 mips mipsel avr jvm i8086 aarch64 sparc64
+PPC_TARGETS=i386 m68k powerpc sparc arm armeb x86_64 powerpc64 mips mipsel avr jvm i8086 aarch64 sparc64 riscv32 riscv64
+PPC_SUFFIXES=386 68k ppc sparc arm armeb x64 ppc64 mips mipsel avr jvm 8086 a64 sparc64 rv32 rv64
 INSTALL_TARGETS=$(addsuffix _exe_install,$(sort $(CYCLETARGETS) $(PPC_TARGETS)))
 SYMLINKINSTALL_TARGETS=$(addsuffix _symlink_install,$(sort $(CYCLETARGETS) $(PPC_TARGETS)))
 
 .PHONY: $(PPC_TARGETS) $(INSTALL_TARGETS)$(SYMLINKINSTALL_TARGETS)
 
 $(PPC_TARGETS):
-        $(MAKE) PPC_TARGET=$@ CPU_UNITDIR=$@ all
+        $(MAKE) PPC_TARGET=$@ CPU_UNITDIR=$@ compiler
 
 $(INSTALL_TARGETS):
-        $(MAKE) all install PPC_TARGET=$(subst _exe_install,,$@) CPU_UNITDIR=$(subst _exe_install,,$@)
+        $(MAKE) PPC_TARGET=$(subst _exe_install,,$@) CPU_UNITDIR=$(subst _exe_install,,$@) compiler
+		$(MAKE) PPC_TARGET=$(subst _exe_install,,$@) CPU_UNITDIR=$(subst _exe_install,,$@) exeinstall
 
 $(SYMLINKINSTALL_TARGETS):
-        $(MAKE) all installsymlink PPC_TARGET=$(subst _symlink_install,,$@) CPU_UNITDIR=$(subst _symlink_install,,$@)
+        $(MAKE) PPC_TARGET=$(subst _symlink_install,,$@) CPU_UNITDIR=$(subst _symlink_install,,$@) compiler
+		$(MAKE) PPC_TARGET=$(subst _symlink_install,,$@) CPU_UNITDIR=$(subst _symlink_install,,$@) installsymlink
 
 alltargets: $(ALLTARGETS)
 
@@ -451,8 +602,6 @@ alltargets: $(ALLTARGETS)
 # Default makefile
 #####################################################################
 
-.NOTPARALLEL:
-
 .PHONY: all compiler echotime ppuclean execlean clean distclean
 
 all: compiler $(addsuffix _all,$(TARGET_DIRS))
@@ -488,17 +637,16 @@ tempclean:
 	-$(DEL) $(PPCROSSNAME) $(TEMPNAME) $(TEMPNAME1) $(TEMPNAME2) $(TEMPNAME3) $(MSG2INC) pp1.wpo pp2.wpo
 
 execlean :
-	-$(DEL) ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcsparc64$(EXEEXT)
-	-$(DEL) ppcarm$(EXEEXT) ppcavr$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) ppca64$(EXEEXT)
-	-$(DEL) ppcross386$(EXEEXT) ppcross68k$(EXEEXT) ppcrossx64$(EXEEXT) ppcrossppc$(EXEEXT) ppcrosssparc$(EXEEXT) ppcrossppc64$(EXEEXT) ppcrosssparc64$(EXEEXT)
-	-$(DEL) ppcrossarm$(EXEEXT) ppcrossavr$(EXEEXT) ppcrossmips$(EXEEXT) ppcrossmipsel$(EXEEXT) ppcrossjvm$(EXEEXT) ppcross8086$(EXEEXT) ppcrossa64$(EXEEXT)
-	-$(DEL) $(EXENAME) $(TEMPWPONAME1) $(TEMPWPONAME2)
+	-$(DEL) $(addsuffix $(EXEEXT), $(addprefix ppc, $(PPC_SUFFIXES)))
+	-$(DEL) $(addsuffix $(EXEEXT), $(addprefix ppcross, $(PPC_SUFFIXES)))
+	-$(DEL) $(EXENAME) $(TEMPWPONAME1) $(TEMPWPONAME2) $(ALL_G_COMPILERS)
+	-$(DEL) fpcmade.generate_g_compilers
 
 $(addsuffix _clean,$(ALLTARGETS)):
         -$(DELTREE) $(addprefix $(subst _clean,,$@),/units)
+        -$(DELTREE) $(addprefix $(subst _clean,,$@),/bin)
         -$(DEL) $(addprefix $(subst _clean,,$@)/,*$(OEXT) *$(PPUEXT) *$(RSTEXT) *$(ASMEXT) *$(STATICLIBEXT) *$(SHAREDLIBEXT) *$(PPLEXT))
-        -$(DEL) $(addprefix $(subst _clean,,$@)/,ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcarv$(EXEEXT) ppcsparc64$(EXEEXT))
-        -$(DEL) $(addprefix $(subst _clean,,$@)/,ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) ppca64$(EXEEXT) $(EXENAME))
+        -$(DEL) $(addprefix $(subst _clean,,$@)/ppc,$(addsuffix $(EXEEXT), $(PPC_SUFFIXES)))
 
 cycleclean: cleanall $(addsuffix _clean,$(CPC_TARGET))
         -$(DEL) $(EXENAME)
@@ -620,7 +768,6 @@ endif
         $(EXECPPAS)
         $(MOVE) $(COMPILER_TARGETDIR)/$(PPEXENAME) $(EXENAME)
 
-
 #####################################################################
 # Cycle targets
 #
@@ -651,22 +798,27 @@ ifeq ($(OS_SOURCE),$(OS_TARGET))
 ifndef NOWPOCYCLE
 ifdef RELEASE
 DOWPOCYCLE=1
+endif
+endif
+
+ifdef DOWPOCYCLE
 # Two WPO cycles in case of RELEASE=1
 wpocycle:
 # don't use cycle_clean, it will delete the compiler utilities again
         $(RM) $(EXENAME)
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3)' 'OPT=$(strip $(LOCALOPT) $(OPTWPOCOLLECT) $(OPTNEW))' compiler
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3PREFIX)$(TEMPNAME3)' 'OPT=$(strip $(LOCALOPT) $(OPTWPOCOLLECT) $(OPTNEW))' compiler
         $(RM) $(EXENAME)
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3)' 'OPT=$(strip $(RTLOPT) $(OPTWPOPERFORM) $(OPTNEW))' rtlclean rtl
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3)' 'OPT=$(strip $(LOCALOPT) $(OPTNEW) $(OPTWPOPERFORM) $(subst pp1.wpo,pp2.wpo,$(OPTWPOCOLLECT)))' $(addsuffix _clean,$(ALLTARGETS)) compiler
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3PREFIX)$(TEMPNAME3)' 'OPT=$(strip $(RTLOPT) $(OPTWPOPERFORM) $(OPTNEW))' rtlclean
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3PREFIX)$(TEMPNAME3)' 'OPT=$(strip $(RTLOPT) $(OPTWPOPERFORM) $(OPTNEW))' rtl
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3PREFIX)$(TEMPNAME3)' 'OPT=$(strip $(LOCALOPT) $(OPTNEW) $(OPTWPOPERFORM) $(subst pp1.wpo,pp2.wpo,$(OPTWPOCOLLECT)))' $(addsuffix _clean,$(ALLTARGETS)) 
+	$(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3PREFIX)$(TEMPNAME3)' 'OPT=$(strip $(LOCALOPT) $(OPTNEW) $(OPTWPOPERFORM) $(subst pp1.wpo,pp2.wpo,$(OPTWPOCOLLECT)))' compiler
         $(MOVE) $(EXENAME) $(TEMPWPONAME1)
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPWPONAME1)' 'OPT=$(strip $(RTLOPT) $(OPTNEW) $(subst pp1.wpo,pp2.wpo,$(OPTWPOPERFORM)))' rtlclean rtl
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPWPONAME1)' 'OPT=$(strip $(LOCALOPT) $(OPTNEW) $(subst pp1.wpo,pp2.wpo,$(OPTWPOPERFORM)))' $(addsuffix _clean,$(ALLTARGETS)) compiler
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPWPONAME1PREFIX)$(TEMPWPONAME1)' 'OPT=$(strip $(RTLOPT) $(OPTNEW) $(subst pp1.wpo,pp2.wpo,$(OPTWPOPERFORM)))' rtlclean
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPWPONAME1PREFIX)$(TEMPWPONAME1)' 'OPT=$(strip $(RTLOPT) $(OPTNEW) $(subst pp1.wpo,pp2.wpo,$(OPTWPOPERFORM)))' rtl
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPWPONAME1PREFIX)$(TEMPWPONAME1)' 'OPT=$(strip $(LOCALOPT) $(OPTNEW) $(subst pp1.wpo,pp2.wpo,$(OPTWPOPERFORM)))' $(addsuffix _clean,$(ALLTARGETS))
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPWPONAME1PREFIX)$(TEMPWPONAME1)' 'OPT=$(strip $(LOCALOPT) $(OPTNEW) $(subst pp1.wpo,pp2.wpo,$(OPTWPOPERFORM)))' compiler
         $(COPY) $(EXENAME) $(TEMPWPONAME2)
-endif
-endif
-
-ifndef DOWPOCYCLE
+else
 wpocycle:
 endif
 
@@ -691,8 +843,10 @@ next :
         $(COPY) $(FPC) $(EXENAME)
 else
 next :
-        $(MAKE) rtlclean rtl
-        $(MAKE) cycleclean compiler
+        $(MAKE) rtlclean
+        $(MAKE) rtl
+        $(MAKE) cycleclean
+        $(MAKE) compiler
         $(MAKE) echotime
 endif
 
@@ -702,20 +856,24 @@ $(TEMPNAME1) :
         $(MOVE) $(EXENAME) $(TEMPNAME1)
 
 $(TEMPNAME2) : $(TEMPNAME1)
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME1)' 'OLDFPC=' next CYCLELEVEL=2
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME1PREFIX)$(TEMPNAME1)' 'OLDFPC=' next CYCLELEVEL=2
         -$(DEL) $(TEMPNAME2)
         $(MOVE) $(EXENAME) $(TEMPNAME2)
 
 $(TEMPNAME3) : $(TEMPNAME2)
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME2)' 'OLDFPC=$(BASEDIR)/$(TEMPNAME1)' next CYCLELEVEL=3
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME2PREFIX)$(TEMPNAME2)' 'OLDFPC=$(BASEDIR)/$(TEMPNAME1)' next CYCLELEVEL=3
         -$(DEL) $(TEMPNAME3)
         $(MOVE) $(EXENAME) $(TEMPNAME3)
 
 cycle:
-        $(MAKE) tempclean $(TEMPNAME3)
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3)' 'OLDFPC=$(BASEDIR)/$(TEMPNAME2)' next CYCLELEVEL=4
+ifdef NEED_G_COMPILERS
+	$(MAKE) fpcmade.generate_g_compilers
+endif
+        $(MAKE) tempclean
+        $(MAKE) $(TEMPNAME3)
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3PREFIX)$(TEMPNAME3)' 'OLDFPC=$(BASEDIR)/$(TEMPNAME2)' next CYCLELEVEL=4
         $(DIFF) $(TEMPNAME3) $(EXENAME)
-        $(MAKE) $(addsuffix _all,$(TARGET_DIRS)) 'FPC=$(BASEDIR)/$(EXENAME)'
+        $(MAKE) $(addsuffix _all,$(TARGET_DIRS)) 'FPC=$(BASEDIR)/$(EXENAMEPREFIX)$(EXENAME)'
         $(MAKE) wpocycle
         $(MAKE) echotime
 
@@ -726,17 +884,26 @@ else
 #
 
 cycle:
+ifdef NEED_G_COMPILERS
+	$(MAKE) fpcmade.generate_g_compilers
+endif
 # ppc (source native)
-        $(MAKE) OS_TARGET=$(OS_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl CYCLELEVEL=1
-        $(MAKE) OS_TARGET=$(OS_SOURCE) EXENAME=$(TEMPNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=1
+        $(MAKE) OS_TARGET=$(OS_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=1 rtlclean
+        $(MAKE) OS_TARGET=$(OS_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=1 rtl
+        $(MAKE) OS_TARGET=$(OS_SOURCE) EXENAME=$(TEMPNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=1 cycleclean
+        $(MAKE) OS_TARGET=$(OS_SOURCE) EXENAME=$(TEMPNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=1 compiler
 # ppcross<ARCH> (source native)
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl CYCLELEVEL=2
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=2
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAMEPREFIX)$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=2 rtlclean
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAMEPREFIX)$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=2 rtl
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAMEPREFIX)$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=2 cycleclean
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAMEPREFIX)$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=2 compiler
 # ppc<ARCH> (target native)
 ifndef CROSSINSTALL
-        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' rtlclean rtl CYCLELEVEL=3
+        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAMEPREFIX)$(PPCROSSNAME)' CYCLELEVEL=3 rtlclean
+        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAMEPREFIX)$(PPCROSSNAME)' CYCLELEVEL=3 rtl
 ifndef NoNativeBinaries
-        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' cycleclean compiler CYCLELEVEL=3
+        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAMEPREFIX)$(PPCROSSNAME)' CYCLELEVEL=3 cycleclean
+        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAMEPREFIX)$(PPCROSSNAME)' CYCLELEVEL=3 compiler
 endif
 endif
 
@@ -754,18 +921,27 @@ else
 
 cycle: override FPC=
 cycle:
+ifdef NEED_G_COMPILERS
+	$(MAKE) fpcmade.generate_g_compilers
+endif
 # ppc (source native)
 # Clear detected compiler binary, because it can be existing crosscompiler binary, but we need native compiler here
-        $(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl CYCLELEVEL=1
-        $(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) EXENAME=$(TEMPNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=1
+        $(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=1 rtlclean
+        $(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=1 rtl
+        $(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) EXENAME=$(TEMPNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=1 cycleclean 
+        $(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) EXENAME=$(TEMPNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=1 compiler 
 # ppcross<ARCH> (source native)
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl CYCLELEVEL=2
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) PPC_TARGET=$(CPU_TARGET) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=2
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAMEPREFIX)$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=2 rtlclean 
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAMEPREFIX)$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=2 rtl 
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAMEPREFIX)$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) PPC_TARGET=$(CPU_TARGET) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=2 cycleclean 
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAMEPREFIX)$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) PPC_TARGET=$(CPU_TARGET) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 CYCLELEVEL=2 compiler 
 # ppc<ARCH> (target native)
 ifndef CROSSINSTALL
-        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' 'OPT=$(RTLOPT) $(CROSSOPT)' rtlclean rtl CYCLELEVEL=3
+        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAMEPREFIX)$(PPCROSSNAME)' 'OPT=$(RTLOPT) $(CROSSOPT)' CYCLELEVEL=3 rtlclean
+        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAMEPREFIX)$(PPCROSSNAME)' 'OPT=$(RTLOPT) $(CROSSOPT)' CYCLELEVEL=3 rtl
 ifndef NoNativeBinaries
-        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' 'OPT=$(strip $(LOCALOPT) $(CROSSOPT))' cycleclean compiler CYCLELEVEL=3
+        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAMEPREFIX)$(PPCROSSNAME)' 'OPT=$(strip $(LOCALOPT) $(CROSSOPT))' CYCLELEVEL=3 cycleclean
+        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAMEPREFIX)$(PPCROSSNAME)' 'OPT=$(strip $(LOCALOPT) $(CROSSOPT))' CYCLELEVEL=3 compiler
 endif
 endif
 
@@ -788,7 +964,8 @@ cvstest:
 #
 # 1. build a compiler using cycle
 # 2. remove all .ppufiles
-# 3. build all supported cross compilers except the
+# 3. clean and recompile rtl if DOWPOCYCLE is set
+# 4. build all supported cross compilers except the
 #    current PPC_TARGET which was already build
 # unless FPC_SUPPORT_X87_TYPES_ON_WIN64 is set,
 # win64 cannot compile i386 or i8086 compiler
@@ -799,19 +976,24 @@ ifeq ($(OS_SOURCE),win64)
   EXCLUDE_80BIT_TARGETS=1
 endif
 
-ifneq ($(findstring $(CPU_SOURCE),aarch64 arm avr jvm m68k mips mipsel powerpc powerpc64 sparc sparc64),)
+ifneq ($(findstring $(CPU_SOURCE),aarch64 arm avr jvm m68k mips mipsel powerpc powerpc64 sparc sparc64 riscv32 riscv64),)
   EXCLUDE_80BIT_TARGETS=1
 endif
 
 full: fullcycle
 
 fullcycle:
+        $(MAKE) distclean
         $(MAKE) cycle
         $(MAKE) ppuclean
+ifdef DOWPOCYCLE
+        $(MAKE) rtlclean
+        $(MAKE) rtl 'FPC=$(BASEDIR)/$(EXENAMEPREFIX)$(EXENAME)'
+endif
 ifndef EXCLUDE_80BIT_TARGETS
-        $(MAKE) $(filter-out $(PPC_TARGET),$(CYCLETARGETS)) 'FPC=$(BASEDIR)/$(EXENAME)'
+        $(MAKE) $(filter-out $(PPC_TARGET),$(CYCLETARGETS)) 'FPC=$(BASEDIR)/$(EXENAMEPREFIX)$(EXENAME)'
 else
-        $(MAKE) $(filter-out i8086,$(filter-out i386,$(filter-out $(PPC_TARGET),$(CYCLETARGETS)))) 'FPC=$(BASEDIR)/$(EXENAME)'
+        $(MAKE) $(filter-out i8086,$(filter-out i386,$(filter-out $(PPC_TARGET),$(CYCLETARGETS)))) 'FPC=$(BASEDIR)/$(EXENAMEPREFIX)$(EXENAME)'
 endif
 
 #####################################################################
@@ -859,12 +1041,13 @@ endif
 
 fullinstall:
 ifndef EXCLUDE_80BIT_TARGETS
-        $(MAKE) $(addsuffix _exe_install,$(filter-out $(PPC_TARGET),$(CYCLETARGETS)))
+        $(MAKE) $(addsuffix _exe_install,$(filter-out $(PPC_TARGET),$(CYCLETARGETS))) $(addsuffix _all,$(TARGET_DIRS))
 else
-        $(MAKE) $(addsuffix _exe_install,$(filter-out i8086,$(filter-out i386,$(filter-out $(PPC_TARGET),$(CYCLETARGETS)))))
+        $(MAKE) $(addsuffix _exe_install,$(filter-out i8086,$(filter-out i386,$(filter-out $(PPC_TARGET),$(CYCLETARGETS))))) $(addsuffix _all,$(TARGET_DIRS))
 endif
-
-install: quickinstall
+        $(MAKE) $(addsuffix _install,$(TARGET_DIRS))
+        
+auxfilesinstall:
 ifndef CROSSINSTALL
 ifdef UNIXHier
         $(MKDIR) $(INSTALL_BASEDIR)
@@ -873,6 +1056,15 @@ endif
         $(MKDIR) $(MSGINSTALLDIR)
         $(INSTALL) $(MSGFILES) $(MSGINSTALLDIR)
 endif
+	
+
+install: 
+# if no FPC is passed, use that one we assume, we just build
+ifndef FPC
+	$(MAKE) quickinstall auxfilesinstall FPC=$(BASEDIR)/$(INSTALLEXEFILE)
+else
+	$(MAKE) quickinstall auxfilesinstall
+endif
 
 # This also installs a link from bin to the actual executable.
 # The .deb does that later.

+ 4 - 3
compiler/aarch64/aasmcpu.pas

@@ -592,7 +592,6 @@ implementation
 
     function simple_ref_type(op: tasmop; size:tcgsize; oppostfix: toppostfix; const ref: treference): tsimplereftype;
       var
-        maxoffs: asizeint;
         accesssize: longint;
       begin
         result:=sr_internal_illegal;
@@ -922,8 +921,8 @@ implementation
 
 
     procedure BuildInsTabCache;
-      var
-        i : longint;
+//      var
+//        i : longint;
       begin
 (*        new(instabcache);
         FillChar(instabcache^,sizeof(tinstabcache),$ff);
@@ -1006,6 +1005,7 @@ implementation
 *)
 
     procedure insertpcrelativedata(list,listtoinsert : TAsmList);
+(*
       var
         curinspos,
         penalty,
@@ -1021,6 +1021,7 @@ implementation
         l : tasmlabel;
         doinsert,
         removeref : boolean;
+*)
       begin
 (*
         curdata:=TAsmList.create;

+ 3 - 1
compiler/aarch64/agcpugas.pas

@@ -180,6 +180,8 @@ unit agcpugas;
                 result:=result+']';
               AM_PREINDEXED:
                 result:=result+']!';
+              else
+                ;
             end;
           end;
       end;
@@ -274,7 +276,7 @@ unit agcpugas;
             idtxt  : 'AS';
             asmbin : 'as';
             asmcmd : '-o $OBJ $EXTRAOPT $ASM';
-            supported_targets : [system_aarch64_linux];
+            supported_targets : [system_aarch64_linux,system_aarch64_android];
             flags : [af_needar,af_smartlink_sections];
             labelprefix : '.L';
             comment : '// ';

+ 122 - 12
compiler/aarch64/aoptcpu.pas

@@ -21,26 +21,44 @@
  ****************************************************************************
 }
 
-
 Unit aoptcpu;
 
 {$i fpcdefs.inc}
 
-Interface
+{ $define DEBUG_AOPTCPU}
 
-uses cpubase, aasmtai, aopt, aoptcpub;
+Interface
 
-Type
-  TCpuAsmOptimizer = class(TAsmOptimizer)
-    { uses the same constructor as TAopObj }
-    function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
-    procedure PeepHoleOptPass2;override;
-  End;
+    uses
+      globtype, globals,
+      cutils,
+      cgbase, cpubase, aasmtai, aasmcpu, aopt, aoptcpub;
+
+    Type
+      TCpuAsmOptimizer = class(TAsmOptimizer)
+        { uses the same constructor as TAopObj }
+        function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
+        procedure PeepHoleOptPass2;override;
+        function GetNextInstructionUsingReg(Current : tai; out Next : tai; reg : TRegister) : Boolean;
+        function LookForPostindexedPattern(p : taicpu) : boolean;
+        procedure DebugMsg(const s : string; p : tai);
+      End;
 
 Implementation
 
   uses
-    aasmbase,aasmcpu,cgbase;
+    aasmbase;
+
+{$ifdef DEBUG_AOPTCPU}
+  procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);
+    begin
+      asml.insertbefore(tai_comment.Create(strpnew(s)), p);
+    end;
+{$else DEBUG_AOPTCPU}
+  procedure TCpuAsmOptimizer.DebugMsg(const s: string;p : tai);inline;
+    begin
+    end;
+{$endif DEBUG_AOPTCPU}
 
   function CanBeCond(p : tai) : boolean;
     begin
@@ -48,11 +66,103 @@ Implementation
     end;
 
 
-  function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
+  function MatchInstruction(const instr: tai; const op: TAsmOps; const postfix: TOpPostfixes): boolean;
+    begin
+      result :=
+        (instr.typ = ait_instruction) and
+        ((op = []) or (taicpu(instr).opcode in op)) and
+        ((postfix = []) or (taicpu(instr).oppostfix in postfix));
+    end;
+
+
+  function MatchInstruction(const instr: tai; const op: TAsmOp; const postfix: TOpPostfixes): boolean;
+    begin
+      result :=
+        (instr.typ = ait_instruction) and
+        (taicpu(instr).opcode = op) and
+        ((postfix = []) or (taicpu(instr).oppostfix in postfix));
+    end;
+
+
+  function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
+    Out Next: tai; reg: TRegister): Boolean;
+    begin
+      Next:=Current;
+      repeat
+        Result:=GetNextInstruction(Next,Next);
+      until not (Result) or
+            not(cs_opt_level3 in current_settings.optimizerswitches) or
+            (Next.typ<>ait_instruction) or
+            RegInInstruction(reg,Next) or
+            is_calljmp(taicpu(Next).opcode);
+    end;
+
+  {
+    optimize
+      ldr/str regX,[reg1]
+      ...
+      add/sub reg1,reg1,regY/const
+
+      into
+
+      ldr/str regX,[reg1], regY/const
+  }
+  function TCpuAsmOptimizer.LookForPostindexedPattern(p: taicpu) : boolean;
     var
-      next1: tai;
+      hp1 : tai;
+    begin
+      Result:=false;
+      if (p.oper[1]^.typ = top_ref) and
+        (p.oper[1]^.ref^.addressmode=AM_OFFSET) and
+        (p.oper[1]^.ref^.index=NR_NO) and
+        (p.oper[1]^.ref^.offset=0) and
+        GetNextInstructionUsingReg(p, hp1, p.oper[1]^.ref^.base) and
+        { we cannot check NR_DEFAULTFLAGS for modification yet so don't allow a condition }
+        MatchInstruction(hp1, [A_ADD, A_SUB], [PF_None]) and
+        (taicpu(hp1).oper[0]^.reg=p.oper[1]^.ref^.base) and
+        (taicpu(hp1).oper[1]^.reg=p.oper[1]^.ref^.base) and
+        (
+         { valid offset? }
+         (taicpu(hp1).oper[2]^.typ=top_const) and
+         (taicpu(hp1).oper[2]^.val>=-256) and
+         (abs(taicpu(hp1).oper[2]^.val)<256)
+        ) and
+        { don't apply the optimization if the base register is loaded }
+        (getsupreg(p.oper[0]^.reg)<>getsupreg(p.oper[1]^.ref^.base)) and
+        not(RegModifiedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) and
+        not(RegModifiedBetween(taicpu(hp1).oper[2]^.reg,p,hp1)) then
+        begin
+          DebugMsg('Peephole Str/LdrAdd/Sub2Str/Ldr Postindex done', p);
+          p.oper[1]^.ref^.addressmode:=AM_POSTINDEXED;
+          if taicpu(hp1).opcode=A_ADD then
+            p.oper[1]^.ref^.offset:=taicpu(hp1).oper[2]^.val
+          else
+            p.oper[1]^.ref^.offset:=-taicpu(hp1).oper[2]^.val;
+          asml.Remove(hp1);
+          hp1.Free;
+          Result:=true;
+        end;
+    end;
+
+
+  function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
     begin
       result := false;
+      if p.typ=ait_instruction then
+        begin
+          case taicpu(p).opcode of
+            A_LDR:
+              begin
+                Result:=LookForPostindexedPattern(taicpu(p));
+              end;
+            A_STR:
+              begin
+                Result:=LookForPostindexedPattern(taicpu(p));
+              end;
+            else
+              ;
+          end;
+        end;
     end;
 
 

+ 2 - 4
compiler/aarch64/aoptcpub.pas

@@ -76,10 +76,6 @@ Const
 
   MaxCh = 3;
 
-{ the maximum number of operands an instruction has }
-
-  MaxOps = 4;
-
 {Oper index of operand that contains the source (reference) with a load }
 {instruction                                                            }
 
@@ -146,6 +142,8 @@ Implementation
                   exit
                 end;
             end;
+          else
+            ;
         end;
     end;
 

+ 86 - 36
compiler/aarch64/cgcpu.pas

@@ -382,8 +382,6 @@ implementation
                             reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.temppos,ref.alignment,ref.volatility);
                           end;
                       end
-                    else
-                      internalerror(2014110904);
                   end;
                 end;
               A_LDP,A_STP:
@@ -810,35 +808,80 @@ implementation
     procedure tcgaarch64.a_load_ref_reg_unaligned(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; register: tregister);
       var
         href: treference;
-        hreg1, hreg2, tmpreg: tregister;
+        hreg1, hreg2, tmpreg,tmpreg2: tregister;
+        i : Integer;
       begin
-        if fromsize in [OS_64,OS_S64] then
-          begin
-            { split into two 32 bit loads }
-            hreg1:=getintregister(list,OS_32);
-            hreg2:=getintregister(list,OS_32);
-            if target_info.endian=endian_big then
-              begin
-                tmpreg:=hreg1;
-                hreg1:=hreg2;
-                hreg2:=tmpreg;
-              end;
-            { can we use LDP? }
-            if (ref.alignment=4) and
-               (simple_ref_type(A_LDP,OS_32,PF_None,ref)=sr_simple) then
-              list.concat(taicpu.op_reg_reg_ref(A_LDP,hreg1,hreg2,ref))
-            else
-              begin
-                a_load_ref_reg(list,OS_32,OS_32,ref,hreg1);
-                href:=ref;
-                inc(href.offset,4);
-                a_load_ref_reg(list,OS_32,OS_32,href,hreg2);
-              end;
-            a_load_reg_reg(list,OS_32,OS_64,hreg1,register);
-            list.concat(taicpu.op_reg_reg_const_const(A_BFI,register,makeregsize(hreg2,OS_64),32,32));
-          end
-       else
-         inherited;
+        case fromsize of
+          OS_64,OS_S64:
+            begin
+              { split into two 32 bit loads }
+              hreg1:=getintregister(list,OS_32);
+              hreg2:=getintregister(list,OS_32);
+              if target_info.endian=endian_big then
+                begin
+                  tmpreg:=hreg1;
+                  hreg1:=hreg2;
+                  hreg2:=tmpreg;
+                end;
+              { can we use LDP? }
+              if (ref.alignment=4) and
+                 (simple_ref_type(A_LDP,OS_32,PF_None,ref)=sr_simple) then
+                list.concat(taicpu.op_reg_reg_ref(A_LDP,hreg1,hreg2,ref))
+              else
+                begin
+                  a_load_ref_reg(list,OS_32,OS_32,ref,hreg1);
+                  href:=ref;
+                  inc(href.offset,4);
+                  a_load_ref_reg(list,OS_32,OS_32,href,hreg2);
+                end;
+              a_load_reg_reg(list,OS_32,OS_64,hreg1,register);
+              list.concat(taicpu.op_reg_reg_const_const(A_BFI,register,makeregsize(hreg2,OS_64),32,32));
+            end;
+          OS_16,OS_S16,
+          OS_32,OS_S32:
+            begin
+              if ref.alignment=2 then
+                begin
+                  href:=ref;
+                  if target_info.endian=endian_big then
+                    inc(href.offset,tcgsize2size[fromsize]-2);
+                  tmpreg:=getintregister(list,OS_32);
+                  a_load_ref_reg(list,OS_16,OS_32,href,tmpreg);
+                  tmpreg2:=getintregister(list,OS_32);
+                  for i:=1 to (tcgsize2size[fromsize]-1) div 2 do
+                    begin
+                      if target_info.endian=endian_big then
+                        dec(href.offset,2)
+                      else
+                        inc(href.offset,2);
+                      a_load_ref_reg(list,OS_16,OS_32,href,tmpreg2);
+                      list.concat(taicpu.op_reg_reg_const_const(A_BFI,tmpreg,tmpreg2,i*16,16));
+                    end;
+                  a_load_reg_reg(list,fromsize,tosize,tmpreg,register);
+                end
+              else
+                begin
+                  href:=ref;
+                  if target_info.endian=endian_big then
+                    inc(href.offset,tcgsize2size[fromsize]-1);
+                  tmpreg:=getintregister(list,OS_32);
+                  a_load_ref_reg(list,OS_8,OS_32,href,tmpreg);
+                  tmpreg2:=getintregister(list,OS_32);
+                  for i:=1 to tcgsize2size[fromsize]-1 do
+                    begin
+                      if target_info.endian=endian_big then
+                        dec(href.offset)
+                      else
+                        inc(href.offset);
+                      a_load_ref_reg(list,OS_8,OS_32,href,tmpreg2);
+                      list.concat(taicpu.op_reg_reg_const_const(A_BFI,tmpreg,tmpreg2,i*8,8));
+                    end;
+                  a_load_reg_reg(list,fromsize,tosize,tmpreg,register);
+                end;
+            end;
+          else
+            inherited;
+        end;
       end;
 
 
@@ -1040,13 +1083,19 @@ implementation
 
 
      procedure tcgaarch64.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
+       var
+         r : tregister;
        begin
          if not shufflescalar(shuffle) then
            internalerror(2014122802);
          if not(tcgsize2size[fromsize] in [4,8]) or
-            (tcgsize2size[fromsize]<>tcgsize2size[tosize]) then
+            (tcgsize2size[fromsize]>tcgsize2size[tosize]) then
            internalerror(2014122804);
-         list.concat(taicpu.op_reg_reg(A_UMOV,intreg,mmreg));
+         if tcgsize2size[fromsize]<tcgsize2size[tosize] then
+           r:=makeregsize(intreg,fromsize)
+         else
+           r:=intreg;
+         list.concat(taicpu.op_reg_reg(A_UMOV,r,mmreg));
        end;
 
 
@@ -1076,18 +1125,15 @@ implementation
 
     procedure tcgaarch64.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister);
       var
-        bitsize,
-        signbit: longint;
+        bitsize: longint;
       begin
         if srcsize in [OS_64,OS_S64] then
           begin
             bitsize:=64;
-            signbit:=6;
           end
         else
           begin
             bitsize:=32;
-            signbit:=5;
           end;
         { source is 0 -> dst will have to become 255 }
         list.concat(taicpu.op_reg_const(A_CMP,src,0));
@@ -1257,6 +1303,8 @@ implementation
               a_load_const_reg(list,size,a,dst);
               exit;
             end;
+          else
+            ;
         end;
         case op of
           OP_ADD,
@@ -1405,6 +1453,8 @@ implementation
                     check for overflow) }
                   internalerror(2014122101);
                 end;
+              else
+                internalerror(2019050936);
             end;
           end;
         a_op_reg_reg_reg(list,op,size,src1,src2,dst);

+ 10 - 0
compiler/aarch64/cpubase.pas

@@ -48,6 +48,8 @@ unit cpubase;
     type
       TAsmOp= {$i a64op.inc}
 
+      TAsmOps = set of TAsmOp;
+
       { This should define the array of instructions as string }
       op2strtable=array[tasmop] of string[11];
 
@@ -325,6 +327,7 @@ unit cpubase;
     procedure shifterop_reset(var so : tshifterop); {$ifdef USEINLINE}inline;{$endif USEINLINE}
 
     function dwarf_reg(r:tregister):shortint;
+    function dwarf_reg_no_error(r:tregister):shortint;
 
     function is_shifter_const(d: aint; size: tcgsize): boolean;
 
@@ -365,8 +368,11 @@ unit cpubase;
           R_MMREGISTER:
             begin
               case s of
+                { records }
+                OS_32,
                 OS_F32:
                   cgsize2subreg:=R_SUBMMS;
+                OS_64,
                 OS_F64:
                   cgsize2subreg:=R_SUBMMD;
                 else
@@ -490,6 +496,10 @@ unit cpubase;
           internalerror(200603251);
       end;
 
+    function dwarf_reg_no_error(r:tregister):shortint;
+      begin
+        result:=regdwarf_table[findreg_by_number(r)];
+      end;
 
     function is_shifter_const(d: aint; size: tcgsize): boolean;
       var

+ 113 - 94
compiler/aarch64/cpupara.pas

@@ -30,10 +30,10 @@ unit cpupara;
        globtype,globals,
        aasmtai,aasmdata,
        cpuinfo,cpubase,cgbase,cgutils,
-       symconst,symbase,symtype,symdef,parabase,paramgr;
+       symconst,symbase,symtype,symdef,parabase,paramgr,armpara;
 
     type
-       tcpuparamanager = class(tparamanager)
+       tcpuparamanager = class(tarmgenparamanager)
           function get_volatile_registers_int(calloption: tproccalloption): tcpuregisterset; override;
           function get_volatile_registers_fpu(calloption: tproccalloption): tcpuregisterset; override;
           function get_volatile_registers_mm(calloption: tproccalloption): tcpuregisterset; override;
@@ -42,7 +42,7 @@ unit cpupara;
           function push_addr_param(varspez: tvarspez; def: tdef; calloption: tproccalloption): boolean; override;
           function ret_in_param(def: tdef; pd: tabstractprocdef):boolean;override;
           function create_paraloc_info(p: tabstractprocdef; side: tcallercallee):longint;override;
-          function create_varargs_paraloc_info(p: tabstractprocdef; varargspara: tvarargsparalist):longint;override;
+          function create_varargs_paraloc_info(p: tabstractprocdef; side: tcallercallee; varargspara: tvarargsparalist):longint;override;
           function get_funcretloc(p: tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
           function param_use_paraloc(const cgpara: tcgpara): boolean; override;
          private
@@ -52,6 +52,7 @@ unit cpupara;
 
           procedure init_para_alloc_values;
           procedure alloc_para(out result: tcgpara; p: tabstractprocdef; varspez: tvarspez; side: tcallercallee; paradef: tdef; isvariadic, isdelphinestedcc: boolean);
+          function getparaloc(calloption: tproccalloption; p: tdef): tcgloc;
 
           procedure create_paraloc_info_intern(p: tabstractprocdef; side: tcallercallee; paras: tparalist; isvariadic: boolean);
        end;
@@ -91,7 +92,7 @@ unit cpupara;
 
     function tcpuparamanager.get_saved_registers_int(calloption: tproccalloption): tcpuregisterarray;
       const
-        saved_regs : array[0..9] of tsuperregister =
+        saved_regs : {$ifndef VER3_0}tcpuregisterarray{$else}array[0..9] of tsuperregister{$endif} =
           (RS_X19,RS_X20,RS_X21,RS_X22,RS_X23,RS_X24,RS_X25,RS_X26,RS_X27,RS_X28);
       begin
         result:=saved_regs;
@@ -100,89 +101,14 @@ unit cpupara;
 
     function tcpuparamanager.get_saved_registers_mm(calloption: tproccalloption): tcpuregisterarray;
       const
-        saved_mm_regs : array[0..7] of tsuperregister = (RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15);
+        saved_mm_regs : {$ifndef VER3_0}tcpuregisterarray{$else}array[0..7] of tsuperregister{$endif} =
+          (RS_D8,RS_D9,RS_D10,RS_D11,RS_D12,RS_D13,RS_D14,RS_D15);
       begin
         result:=saved_mm_regs;
       end;
 
 
-    function is_hfa_internal(p: tdef; var basedef: tdef; var elecount: longint): boolean;
-      var
-        i: longint;
-        sym: tsym;
-        tmpelecount: longint;
-      begin
-        result:=false;
-        case p.typ of
-          arraydef:
-            begin
-              if is_special_array(p) then
-                exit;
-              { an array of empty records has no influence }
-              if tarraydef(p).elementdef.size=0 then
-                begin
-                  result:=true;
-                  exit
-                end;
-              tmpelecount:=0;
-              if not is_hfa_internal(tarraydef(p).elementdef,basedef,tmpelecount) then
-                exit;
-              { tmpelecount now contains the number of hfa elements in a
-                single array element (e.g. 2 if it's an array of a record
-                containing two singles) -> multiply by number of elements
-                in the array }
-              inc(elecount,tarraydef(p).elecount*tmpelecount);
-              if elecount>4 then
-                exit;
-              result:=true;
-            end;
-          floatdef:
-            begin
-              if not assigned(basedef) then
-                basedef:=p
-              else if basedef<>p then
-                exit;
-              inc(elecount);
-              result:=true;
-            end;
-          recorddef:
-            begin
-              for i:=0 to tabstractrecorddef(p).symtable.symlist.count-1 do
-                begin
-                  sym:=tsym(tabstractrecorddef(p).symtable.symlist[i]);
-                  if sym.typ<>fieldvarsym then
-                    continue;
-                  if not is_hfa_internal(tfieldvarsym(sym).vardef,basedef,elecount) then
-                    exit
-                end;
-              result:=true;
-            end;
-          else
-            exit
-        end;
-      end;
-
-
-    { Returns whether a def is a "homogeneous float array" at the machine level.
-      This means that in the memory layout, the def only consists of maximally
-      4 floating point values that appear consecutively in memory }
-    function is_hfa(p: tdef; out basedef: tdef) : boolean;
-      var
-        elecount: longint;
-      begin
-        result:=false;
-        basedef:=nil;
-        elecount:=0;
-        result:=is_hfa_internal(p,basedef,elecount);
-        result:=
-          result and
-          (elecount>0) and
-          (elecount<=4) and
-          (p.size=basedef.size*elecount)
-      end;
-
-
-    function getparaloc(calloption: tproccalloption; p: tdef): tcgloc;
+    function tcpuparamanager.getparaloc(calloption: tproccalloption; p: tdef): tcgloc;
       var
         hfabasedef: tdef;
       begin
@@ -270,7 +196,8 @@ unit cpupara;
               then indexed beyond its bounds) }
           arraydef:
             result:=
-              (calloption in cdecl_pocalls) or
+              ((calloption in cdecl_pocalls) and
+               not is_dynamic_array(def)) or
               is_open_array(def) or
               is_array_of_const(def) or
               is_array_constructor(def) or
@@ -282,6 +209,8 @@ unit cpupara;
             result:=def.size>16;
           stringdef :
             result:=tstringdef(def).stringtype in [st_shortstring,st_longstring];
+          else
+            ;
         end;
       end;
 
@@ -363,6 +292,24 @@ unit cpupara;
          if not assigned(result.location) or
             not(result.location^.loc in [LOC_REGISTER,LOC_MMREGISTER,LOC_VOID]) then
            internalerror(2014113001);
+         {
+           According to ARM64 ABI: "If the size of the argument is less than 8 bytes then
+           the size of the argument is set to 8 bytes. The effect is as if the argument
+           was copied to the least significant bits of a 64-bit register and the remaining
+           bits filled with unspecified values."
+
+           Therefore at caller side force the ordinal result to be always 64-bit, so it
+           will be stripped to the required size and uneeded bits are discarded.
+
+           This is not required for iOS, where the result is zero/sign extended.
+         }
+         if (target_info.abi<>abi_aarch64_darwin) and
+            (side=callerside) and (result.location^.loc = LOC_REGISTER) and
+            (result.def.size<8) and is_ordinal(result.def) then
+           begin
+             result.location^.size:=OS_64;
+             result.location^.def:=u64inttype;
+           end;
       end;
 
 
@@ -400,11 +347,16 @@ unit cpupara;
         if (p.proccalloption in cstylearrayofconst) and
            is_array_of_const(paradef) then
           begin
+            result.size:=OS_NO;
+            result.def:=paradef;
+            result.alignment:=std_param_align;
+            result.intsize:=0;
             paraloc:=result.add_location;
             { hack: the paraloc must be valid, but is not actually used }
             paraloc^.loc:=LOC_REGISTER;
             paraloc^.register:=NR_X0;
             paraloc^.size:=OS_ADDR;
+            paraloc^.def:=paradef;
             exit;
           end;
 
@@ -491,6 +443,8 @@ unit cpupara;
                    loc:=LOC_REFERENCE;
                  end;
              end;
+           else
+             ;
          end;
 
          { allocate registers/stack locations }
@@ -532,8 +486,48 @@ unit cpupara;
              end
            else
              begin
+{$ifndef llvm}
                paraloc^.size:=locsize;
                paraloc^.def:=locdef;
+{$else llvm}
+               case locsize of
+                 OS_8,OS_16,OS_32:
+                   begin
+                     paraloc^.size:=OS_64;
+                     paraloc^.def:=u64inttype;
+                   end;
+                 OS_S8,OS_S16,OS_S32:
+                   begin
+                     paraloc^.size:=OS_S64;
+                     paraloc^.def:=s64inttype;
+                   end;
+                 OS_F32:
+                   begin
+                     paraloc^.size:=OS_F32;
+                     paraloc^.def:=s32floattype;
+                   end;
+                 OS_F64:
+                   begin
+                     paraloc^.size:=OS_F64;
+                     paraloc^.def:=s64floattype;
+                   end;
+                 else
+                   begin
+                     if is_record(locdef) or
+                        ((locdef.typ=arraydef) and
+                         not is_special_array(locdef)) then
+                       begin
+                         paraloc^.size:=OS_64;
+                         paraloc^.def:=u64inttype;
+                       end
+                     else
+                       begin
+                         paraloc^.size:=locsize;
+                         paraloc^.def:=locdef;
+                       end;
+                   end;
+               end;
+{$endif llvm}
              end;
 
            { paraloc loc }
@@ -551,12 +545,29 @@ unit cpupara;
                     responsibility to sign or zero-extend arguments having fewer
                     than 32 bits, and that unused bits in a register are
                     unspecified. In iOS, however, the caller must perform such
-                    extensions, up to 32 bits." }
-                 if (target_info.abi=abi_aarch64_darwin) and
-                    (side=callerside) and
-                    is_ordinal(paradef) and
-                    (paradef.size<4) then
-                   paraloc^.size:=OS_32;
+                    extensions, up to 32 bits."
+                    Zero extend an argument at caller side for iOS and
+                    ignore the argument's unspecified high bits at callee side for
+                    all other platforms. }
+                 if (paradef.size<4) and is_ordinal(paradef) then
+                   begin
+                     if target_info.abi=abi_aarch64_darwin then
+                       begin
+                         if side=callerside then
+                           begin
+                             paraloc^.size:=OS_32;
+                             paraloc^.def:=u32inttype;
+                           end;
+                       end
+                     else
+                       begin
+                         if side=calleeside then
+                           begin
+                             paraloc^.size:=OS_32;
+                             paraloc^.def:=u32inttype;
+                           end;
+                       end;
+                   end;
 
                  { in case it's a composite, "The argument is passed as though
                    it had been loaded into the registers from a double-word-
@@ -567,7 +578,7 @@ unit cpupara;
                  if (target_info.endian=endian_big) and
                     not(paraloc^.size in [OS_64,OS_S64]) and
                     (paradef.typ in [setdef,recorddef,arraydef,objectdef]) then
-                   paraloc^.shiftval:=-(8-tcgsize2size[paraloc^.size]);
+                   paraloc^.shiftval:=-(8-tcgsize2size[paraloc^.size])*8;
                end;
              LOC_MMREGISTER:
                begin
@@ -581,7 +592,7 @@ unit cpupara;
                   paraloc^.loc:=LOC_REFERENCE;
 
                   { the current stack offset may not be properly aligned in
-                    case we're on Darwin have allocated a non-variadic argument
+                    case we're on Darwin and have allocated a non-variadic argument
                     < 8 bytes previously }
                   if target_info.abi=abi_aarch64_darwin then
                     curstackoffset:=align(curstackoffset,paraloc^.def.alignment);
@@ -633,12 +644,12 @@ unit cpupara;
      end;
 
 
-    function tcpuparamanager.create_varargs_paraloc_info(p: tabstractprocdef; varargspara: tvarargsparalist):longint;
+    function tcpuparamanager.create_varargs_paraloc_info(p: tabstractprocdef; side: tcallercallee; varargspara: tvarargsparalist):longint;
       begin
         init_para_alloc_values;
 
         { non-variadic parameters }
-        create_paraloc_info_intern(p,callerside,p.paras,false);
+        create_paraloc_info_intern(p,side,p.paras,false);
         if p.proccalloption in cstylearrayofconst then
           begin
             { on Darwin, we cannot use any registers for variadic parameters }
@@ -648,11 +659,19 @@ unit cpupara;
                 curmmreg:=succ(RS_LAST_MM_PARAM_SUPREG);
               end;
             { continue loading the parameters  }
-            create_paraloc_info_intern(p,callerside,varargspara,true);
+            if assigned(varargspara) then
+              begin
+                if side=callerside then
+                  create_paraloc_info_intern(p,side,varargspara,true)
+                else
+                  internalerror(2019021916);
+              end;
             result:=curstackoffset;
           end
         else
           internalerror(200410231);
+
+        create_funcretloc_info(p,side);
       end;
 
 begin

+ 3 - 0
compiler/aarch64/cputarg.pas

@@ -41,6 +41,9 @@ implementation
     {$ifndef NOTARGETBSD}
       ,t_bsd
     {$endif}
+    {$ifndef NOTARGETANDROID}
+      ,t_android
+    {$endif}
 
 {**************************************
              Assemblers

+ 4 - 1
compiler/aarch64/hlcgcpu.pas

@@ -64,7 +64,10 @@ implementation
     begin
       tocgsize:=def_cgsize(tosize);
       if (sreg.startbit<>0) or
-         not(sreg.bitlen in [32,64]) then
+         not((sreg.subsetregsize in [OS_32,OS_S32]) and
+             (sreg.bitlen=32)) or
+         not((sreg.subsetregsize in [OS_64,OS_S64]) and
+             (sreg.bitlen=64)) then
         begin
           if is_signed(subsetsize) then
             op:=A_SBFX

+ 0 - 1
compiler/aarch64/ncpucnv.pas

@@ -142,7 +142,6 @@ implementation
   procedure taarch64typeconvnode.second_int_to_bool;
     var
       resflags: tresflags;
-      hlabel: tasmlabel;
     begin
       if (nf_explicit in flags) and
          not(left.expectloc in [LOC_FLAGS,LOC_JUMP]) then

+ 0 - 1
compiler/aarch64/ncpuinl.pas

@@ -128,7 +128,6 @@ implementation
     procedure taarch64inlinenode.second_abs_long;
       var
         opsize : tcgsize;
-        hp : taicpu;
       begin
         secondpass(left);
         opsize:=def_cgsize(left.resultdef);

+ 5 - 5
compiler/aarch64/ncpuset.pas

@@ -31,9 +31,9 @@ interface
     type
        taarch64casenode = class(tcgcasenode)
          protected
-           procedure optimizevalues(var max_linear_list: aint; var max_dist: aword);override;
+           procedure optimizevalues(var max_linear_list: int64; var max_dist: qword);override;
            function  has_jumptable: boolean;override;
-           procedure genjumptable(hp: pcaselabel ;min_, max_: aint);override;
+           procedure genjumptable(hp: pcaselabel ;min_, max_: int64);override;
        end;
 
 
@@ -56,7 +56,7 @@ implementation
 *****************************************************************************}
 
 
-    procedure taarch64casenode.optimizevalues(var max_linear_list: aint; var max_dist: aword);
+    procedure taarch64casenode.optimizevalues(var max_linear_list: int64; var max_dist: qword);
       begin
         max_linear_list:=10;
       end;
@@ -68,7 +68,7 @@ implementation
       end;
 
 
-    procedure taarch64casenode.genjumptable(hp: pcaselabel; min_, max_: aint);
+    procedure taarch64casenode.genjumptable(hp: pcaselabel; min_, max_: int64);
       var
         last: TConstExprInt;
         tablelabel: TAsmLabel;
@@ -80,7 +80,7 @@ implementation
 
       procedure genitem(list:TAsmList;t : pcaselabel);
         var
-          i : aint;
+          i : int64;
         begin
           if assigned(t^.less) then
             genitem(list,t^.less);

+ 3 - 0
compiler/aarch64/racpu.pas

@@ -73,6 +73,7 @@ unit racpu;
         { a 32 bit integer register could actually be 16 or 8 bit }
         if result=OS_32 then
           case oppostfix of
+            PF_NONE: ;
             PF_B:
               result:=OS_8;
             PF_SB:
@@ -81,6 +82,8 @@ unit racpu;
               result:=OS_16;
             PF_SH:
               result:=OS_S16;
+            else
+              Message(asmr_e_invalid_opcode_and_operand)
           end;
       end;
 

+ 8 - 4
compiler/aarch64/racpugas.pas

@@ -485,8 +485,8 @@ Unit racpugas;
                       useszr:=false;
                       for i:=low(instr.operands) to pred(opnr) do
                         begin
-                          if (instr.operands[1].opr.typ=OPR_REGISTER) then
-                            case getsupreg(instr.operands[1].opr.reg) of
+                          if (instr.operands[i].opr.typ=OPR_REGISTER) then
+                            case getsupreg(instr.operands[i].opr.reg) of
                               RS_XZR:
                                 useszr:=true;
                               RS_SP:
@@ -494,7 +494,10 @@ Unit racpugas;
                             end;
                         end;
                       result:=valid_shifter_operand(instr.opcode,useszr,usessp,instr.Is64bit,sm,instr.operands[opnr].opr.shifterop.shiftimm);
-                    end
+                      if result then
+                        instr.Ops:=opnr;
+                    end;
+                  break;
                 end;
           end;
       end;
@@ -520,6 +523,8 @@ Unit racpugas;
                     end;
                 end;
             end;
+          else
+            ;
         end;
         result:=C_None;;
       end;
@@ -933,7 +938,6 @@ Unit racpugas;
         j  : longint;
         hs : string;
         maxlen : longint;
-        icond : tasmcond;
       Begin
         { making s a value parameter would break other assembler readers }
         hs:=s;

+ 4 - 0
compiler/aarch64/rgcpu.pas

@@ -140,6 +140,8 @@ implementation
                { ok in immediate form }
                if taicpu(p).oper[taicpu(p).ops-1]^.typ=top_const then
                  exit;
+             else
+               ;
            end;
            { add interferences for other registers }
            for i:=0 to taicpu(p).ops-1 do
@@ -163,6 +165,8 @@ implementation
                              add_edge(getsupreg(taicpu(p).oper[j]^.reg),getsupreg(taicpu(p).oper[i]^.ref^.base));
                        end;
                    end;
+                 else
+                   ;
                end;
              end;
          end;

+ 0 - 108
compiler/aasmbase.pas

@@ -230,11 +230,6 @@ interface
     function create_smartlink_library:boolean;inline;
     function create_smartlink:boolean;inline;
 
-    function LengthUleb128(a: qword) : byte;
-    function LengthSleb128(a: int64) : byte;
-    function EncodeUleb128(a: qword;out buf) : byte;
-    function EncodeSleb128(a: int64;out buf) : byte;
-
     function ReplaceForbiddenAsmSymbolChars(const s: ansistring): ansistring;
 
     { dummy default noop callback }
@@ -283,109 +278,6 @@ implementation
       end;
 
 
-    function LengthUleb128(a: qword) : byte;
-      begin
-        result:=0;
-        repeat
-          a := a shr 7;
-          inc(result);
-          if a=0 then
-            break;
-        until false;
-      end;
-
-
-    function LengthSleb128(a: int64) : byte;
-      var
-        b, size: byte;
-        asign : int64;
-        neg, more: boolean;
-      begin
-        more := true;
-        neg := a < 0;
-        size := sizeof(a)*8;
-        result:=0;
-        repeat
-          b := a and $7f;
-          a := a shr 7;
-          if neg then
-            begin
-              { Use a variable to be sure that the correct or mask is generated }
-              asign:=1;
-              asign:=asign shl (size - 7);
-              a := a or -asign;
-            end;
-          if (((a = 0) and
-               (b and $40 = 0)) or
-              ((a = -1) and
-               (b and $40 <> 0))) then
-            more := false;
-          inc(result);
-          if not(more) then
-            break;
-        until false;
-      end;
-
-
-    function EncodeUleb128(a: qword;out buf) : byte;
-      var
-        b: byte;
-        pbuf : pbyte;
-      begin
-        result:=0;
-        pbuf:=@buf;
-        repeat
-          b := a and $7f;
-          a := a shr 7;
-          if a<>0 then
-            b := b or $80;
-          pbuf^:=b;
-          inc(pbuf);
-          inc(result);
-          if a=0 then
-            break;
-        until false;
-      end;
-
-
-    function EncodeSleb128(a: int64;out buf) : byte;
-      var
-        b, size: byte;
-        asign : int64;
-        neg, more: boolean;
-        pbuf : pbyte;
-      begin
-        more := true;
-        neg := a < 0;
-        size := sizeof(a)*8;
-        result:=0;
-        pbuf:=@buf;
-        repeat
-          b := a and $7f;
-          a := a shr 7;
-          if neg then
-            begin
-              { Use a variable to be sure that the correct or mask is generated }
-              asign:=1;
-              asign:=asign shl (size - 7);
-              a := a or -asign;
-            end;
-          if (((a = 0) and
-               (b and $40 = 0)) or
-              ((a = -1) and
-               (b and $40 <> 0))) then
-            more := false
-          else
-            b := b or $80;
-          pbuf^:=b;
-          inc(pbuf);
-          inc(result);
-          if not(more) then
-            break;
-        until false;
-      end;
-
-
     function ReplaceForbiddenAsmSymbolChars(const s: ansistring): ansistring;
       var
         i : longint;

+ 172 - 15
compiler/aasmcnst.pas

@@ -57,7 +57,8 @@ type
     protected
      fval: tai;
     public
-     constructor create(_adetyp: ttypedconstkind; _def: tdef; _val: tai);
+     constructor create(_def: tdef; _val: tai);
+     destructor destroy; override;
      property val: tai read fval write setval;
    end;
 
@@ -69,7 +70,7 @@ type
      { iterator to walk over all individual items in the aggregate }
      tadeenumerator = class(tobject)
       private
-       fvalues: tfplist;
+       fvalues: tfpobjectlist;
        fvaluespos: longint;
        function getcurrent: tai_abstracttypedconst;
       public
@@ -80,7 +81,7 @@ type
      end;
 
     protected
-     fvalues: tfplist;
+     fvalues: tfpobjectlist;
      fisstring: boolean;
 
      { converts the existing data to a single tai_string }
@@ -92,7 +93,7 @@ type
      procedure addvalue(val: tai_abstracttypedconst);
      function valuecount: longint;
      procedure insertvaluebeforepos(val: tai_abstracttypedconst; pos: longint);
-     function replacevalueatpos(val: tai_abstracttypedconst; pos: longint): tai_abstracttypedconst;
+     procedure replacevalueatpos(val: tai_abstracttypedconst; pos: longint);
      { change the type to a record, regardless of how the aggregate was created;
        the size of the original type and the record must match }
      procedure changetorecord(_def: trecorddef);
@@ -269,6 +270,8 @@ type
      { finalize the asmlist: add the necessary symbols etc }
      procedure finalize_asmlist(sym: tasmsymbol; def: tdef; section: TAsmSectiontype; const secname: TSymStr; alignment: shortint; const options: ttcasmlistoptions); virtual;
      procedure finalize_asmlist_add_indirect_sym(sym: tasmsymbol; def: tdef; section: TAsmSectiontype; const secname: TSymStr; alignment: shortint; const options: ttcasmlistoptions); virtual;
+     { prepare finalization (common for the default and overridden versions }
+     procedure finalize_asmlist_prepare(const options: ttcasmlistoptions; var alignment: shortint);
 
      { functionality of the above for vectorized dead strippable sections }
      procedure finalize_vectorized_dead_strip_asmlist(def: tdef; const basename, itemname: TSymStr; st: tsymtable; alignment: shortint; options: ttcasmlistoptions); virtual;
@@ -348,6 +351,12 @@ type
      { emits a tasmlabofs as returned by emit_*string_const }
      procedure emit_string_offset(const ll: tasmlabofs; const strlength: longint; const st: tstringtype; const winlikewidestring: boolean; const charptrdef: tdef);virtual;
 
+     { emits a tasmlabofs as returned by begin_dynarray_const }
+     procedure emit_dynarray_offset(const ll:tasmlabofs;const arrlength:asizeint;const arrdef:tarraydef; const arrconstdatadef: trecorddef);virtual;
+     { starts a dynamic array constant so that its data can be emitted directly afterwards }
+     function begin_dynarray_const(arrdef:tdef;var startlab:tasmlabel;out arrlengthloc:ttypedconstplaceholder):tasmlabofs;virtual;
+     function end_dynarray_const(arrdef:tdef;arrlength:asizeint;arrlengthloc:ttypedconstplaceholder):tdef;virtual;
+
      { emit a shortstring constant, and return its def }
      function emit_shortstring_const(const str: shortstring): tdef;
      { emit a pchar string constant (the characters, not a pointer to them), and return its def }
@@ -359,6 +368,9 @@ type
      { emit an ordinal constant }
      procedure emit_ord_const(value: int64; def: tdef);
 
+     { emit a reference to a pooled shortstring constant }
+     procedure emit_pooled_shortstring_const_ref(const str:shortstring);
+
      { begin a potential aggregate type. Must be called for any type
        that consists of multiple tai constant data entries, or that
        represents an aggregate at the Pascal level (a record, a non-dynamic
@@ -444,6 +456,12 @@ type
        supported this is equal to the header size }
      class function get_string_symofs(typ: tstringtype; winlikewidestring: boolean): pint; virtual;
 
+     { returns the offset of the array data relatve to dynamic array constant
+       labels. On most platforms, this is 0 (with the header at a negative
+       offset), but on some platforms such negative offsets are not supported
+       and thus this is equal to the header size }
+     class function get_dynarray_symofs:pint;virtual;
+
      { set the fieldvarsym whose data we will emit next; needed
        in case of variant records, so we know which part of the variant gets
        initialised. Also in case of objects, because the fieldvarsyms are spread
@@ -453,9 +471,10 @@ type
        record (also if that field is a nested anonymous record) }
      property next_field_name: TIDString write set_next_field_name;
     protected
-     { this one always return the actual offset, called by the above (and
+     { these ones always return the actual offset, called by the above (and
        overridden versions) }
      class function get_string_header_size(typ: tstringtype; winlikewidestring: boolean): pint;
+     class function get_dynarray_header_size:pint;
    end;
    ttai_typedconstbuilderclass = class of ttai_typedconstbuilder;
 
@@ -621,13 +640,20 @@ implementation
       end;
 
 
-   constructor tai_simpletypedconst.create(_adetyp: ttypedconstkind; _def: tdef; _val: tai);
+   constructor tai_simpletypedconst.create(_def: tdef; _val: tai);
      begin
-       inherited create(_adetyp,_def);
+       inherited create(tck_simple,_def);
        fval:=_val;
      end;
 
 
+   destructor tai_simpletypedconst.destroy;
+     begin
+       fval.free;
+       inherited destroy;
+     end;
+
+
 {****************************************************************************
               tai_aggregatetypedconst.tadeenumerator
  ****************************************************************************}
@@ -684,7 +710,7 @@ implementation
        { the "nil" def will be replaced with an array def of the appropriate
          size once we're finished adding data, so we don't create intermediate
          arraydefs all the time }
-       fvalues.add(tai_simpletypedconst.create(tck_simple,nil,newstr));
+       fvalues.add(tai_simpletypedconst.create(nil,newstr));
      end;
 
    procedure tai_aggregatetypedconst.add_to_string(strtai: tai_string; othertai: tai);
@@ -718,7 +744,7 @@ implementation
      begin
        inherited;
        fisstring:=false;
-       fvalues:=tfplist.create;
+       fvalues:=tfpobjectlist.create(true);
      end;
 
 
@@ -768,9 +794,9 @@ implementation
      end;
 
 
-   function tai_aggregatetypedconst.replacevalueatpos(val: tai_abstracttypedconst; pos: longint): tai_abstracttypedconst;
+   procedure tai_aggregatetypedconst.replacevalueatpos(val: tai_abstracttypedconst; pos: longint);
      begin
-       result:=tai_abstracttypedconst(fvalues[pos]);
+       { since fvalues owns its elements, it will automatically free the old value }
        fvalues[pos]:=val;
      end;
 
@@ -802,6 +828,8 @@ implementation
 
 
    destructor tai_aggregatetypedconst.destroy;
+     var
+       ai: tai_abstracttypedconst;
      begin
        fvalues.free;
        inherited destroy;
@@ -912,9 +940,7 @@ implementation
      end;
 
 
-   procedure ttai_typedconstbuilder.finalize_asmlist(sym: tasmsymbol; def: tdef; section: TAsmSectiontype; const secname: TSymStr; alignment: shortint; const options: ttcasmlistoptions);
-     var
-       prelist: tasmlist;
+   procedure ttai_typedconstbuilder.finalize_asmlist_prepare(const options: ttcasmlistoptions; var alignment: shortint);
      begin
        if tcalo_apply_constalign in options then
          alignment:=const_align(alignment);
@@ -930,7 +956,14 @@ implementation
              tcalo_vectorized_dead_strip_end]*options)<>[]) and
           not fvectorized_finalize_called then
          internalerror(2015110602);
+     end;
 
+
+   procedure ttai_typedconstbuilder.finalize_asmlist(sym: tasmsymbol; def: tdef; section: TAsmSectiontype; const secname: TSymStr; alignment: shortint; const options: ttcasmlistoptions);
+     var
+       prelist: tasmlist;
+     begin
+       finalize_asmlist_prepare(options, alignment);
        prelist:=tasmlist.create;
        { only now add items based on the symbolname, because it may be
          modified by the "section" specifier in case of a typed constant }
@@ -947,7 +980,14 @@ implementation
            new_section(prelist,section,secname,alignment);
          end
        else if tcalo_new_section in options then
-         new_section(prelist,section,secname,alignment)
+         begin
+           { insert ait_cutobject for smart-linking on targets
+             that do not support smarlinking based on sections,
+             like msdos }
+           if not (tf_smartlink_sections in target_info.flags) then
+             maybe_new_object_file(prelist);
+           new_section(prelist,section,secname,alignment);
+         end
        else
          prelist.concat(cai_align.Create(alignment));
 
@@ -1073,6 +1113,7 @@ implementation
              secname:=make_mangledname(basename,st,'2_'+itemname);
            exclude(options,tcalo_vectorized_dead_strip_item);
          end;
+       current_module.linkorderedsymbols.concat(sym.Name);
        finalize_asmlist(sym,def,sectype,secname,alignment,options);
      end;
 
@@ -1117,6 +1158,16 @@ implementation
      end;
 
 
+   class function ttai_typedconstbuilder.get_dynarray_symofs:pint;
+     begin
+       { darwin's linker does not support negative offsets }
+       if not (target_info.system in systems_darwin) then
+         result:=0
+       else
+         result:=get_dynarray_header_size;
+     end;
+
+
    class function ttai_typedconstbuilder.get_string_header_size(typ: tstringtype; winlikewidestring: boolean): pint;
      var
        ansistring_header_size: pint;
@@ -1152,6 +1203,16 @@ implementation
      end;
 
 
+   class function ttai_typedconstbuilder.get_dynarray_header_size:pint;
+     begin
+       result:=
+         { reference count }
+         ptrsinttype.size +
+         { high value }
+         sizesinttype.size;
+     end;
+
+
    constructor ttai_typedconstbuilder.create(const options: ttcasmlistoptions);
      begin
        inherited create;
@@ -1674,6 +1735,52 @@ implementation
      end;
 
 
+   procedure ttai_typedconstbuilder.emit_dynarray_offset(const ll:tasmlabofs;const arrlength:asizeint;const arrdef:tarraydef; const arrconstdatadef: trecorddef);
+     begin
+       emit_tai(tai_const.create_sym_offset(ll.lab,ll.ofs),arrdef);
+     end;
+
+
+   function ttai_typedconstbuilder.begin_dynarray_const(arrdef:tdef;var startlab:tasmlabel;out arrlengthloc:ttypedconstplaceholder):tasmlabofs;
+     var
+       dynarray_symofs: asizeint;
+     begin
+       result.lab:=startlab;
+       result.ofs:=0;
+       { pack the data, so that we don't add unnecessary null bytes after the
+         constant string }
+       begin_anonymous_record('',1,sizeof(TConstPtrUInt),1,1);
+       dynarray_symofs:=get_dynarray_symofs;
+       { what to do if ptrsinttype <> sizesinttype??? }
+       emit_tai(tai_const.create_sizeint(-1),ptrsinttype);
+       inc(result.ofs,ptrsinttype.size);
+       arrlengthloc:=emit_placeholder(sizesinttype);
+       inc(result.ofs,sizesinttype.size);
+       if dynarray_symofs=0 then
+         begin
+           { results in slightly more efficient code }
+           emit_tai(tai_label.create(result.lab),arrdef);
+           result.ofs:=0;
+           { create new label of the same kind (including whether or not the
+             name starts with target_asm.labelprefix in case it's AB_LOCAL,
+             so we keep the difference depending on whether the original was
+             allocated via getstatic/getlocal/getglobal datalabel) }
+           startlab:=tasmlabel.create(current_asmdata.AsmSymbolDict,startlab.name+'$dynarrlab',startlab.bind,startlab.typ);
+         end;
+       { sanity check }
+       if result.ofs<>dynarray_symofs then
+         internalerror(2018020601);
+     end;
+
+
+   function ttai_typedconstbuilder.end_dynarray_const(arrdef:tdef;arrlength:asizeint;arrlengthloc:ttypedconstplaceholder):tdef;
+     begin
+       { we emit the high value, not the count }
+       arrlengthloc.replace(tai_const.Create_sizeint(arrlength-1),sizesinttype);
+       result:=end_anonymous_record;
+     end;
+
+
    function ttai_typedconstbuilder.emit_shortstring_const(const str: shortstring): tdef;
      begin
        { we use an arraydef instead of a shortstringdef, because we don't have
@@ -1759,6 +1866,56 @@ implementation
      end;
 
 
+   procedure ttai_typedconstbuilder.emit_pooled_shortstring_const_ref(const str:shortstring);
+     var
+       pool : thashset;
+       entry : phashsetitem;
+       strlab : tasmlabel;
+       l : longint;
+       pc : pansichar;
+       datadef : tdef;
+       strtcb : ttai_typedconstbuilder;
+     begin
+       pool:=current_asmdata.ConstPools[sp_shortstr];
+
+       entry:=pool.FindOrAdd(@str[1],length(str));
+
+       { :-(, we must generate a new entry }
+       if not assigned(entry^.Data) then
+         begin
+           current_asmdata.getglobaldatalabel(strlab);
+
+           { include length and terminating zero for quick conversion to pchar }
+           l:=length(str);
+           getmem(pc,l+2);
+           move(str[1],pc[1],l);
+           pc[0]:=chr(l);
+           pc[l+1]:=#0;
+
+           datadef:=carraydef.getreusable(cansichartype,l+2);
+
+           { we start a new constbuilder as we don't know whether we're called
+             from inside an internal constbuilder }
+           strtcb:=ctai_typedconstbuilder.create([tcalo_is_lab,tcalo_make_dead_strippable,tcalo_apply_constalign]);
+
+           strtcb.maybe_begin_aggregate(datadef);
+           strtcb.emit_tai(Tai_string.Create_pchar(pc,l+2),datadef);
+           strtcb.maybe_end_aggregate(datadef);
+
+           current_asmdata.asmlists[al_typedconsts].concatList(
+             strtcb.get_final_asmlist(strlab,datadef,sec_rodata_norel,strlab.name,const_align(sizeof(pint)))
+           );
+           strtcb.free;
+
+           entry^.Data:=strlab;
+         end
+       else
+         strlab:=tasmlabel(entry^.Data);
+
+       emit_tai(tai_const.Create_sym(strlab),charpointertype);
+     end;
+
+
    procedure ttai_typedconstbuilder.maybe_begin_aggregate(def: tdef);
      begin
        begin_aggregate_internal(def,false);

+ 73 - 3
compiler/aasmdata.pas

@@ -96,7 +96,8 @@ interface
          sp_objcprotocolrefs,
          sp_varsets,
          sp_floats,
-         sp_guids
+         sp_guids,
+         sp_paraloc
       );
       
     const
@@ -134,6 +135,22 @@ interface
          section_count : longint;
          constructor create;
          function  getlasttaifilepos : pfileposinfo;
+         { inserts another List at the begin and make this List empty }
+         procedure insertList(p : TLinkedList); override;
+         { inserts another List before the provided item and make this List empty }
+         procedure insertListBefore(Item:TLinkedListItem;p : TLinkedList); override;
+         { inserts another List after the provided item and make this List empty }
+         procedure insertListAfter(Item:TLinkedListItem;p : TLinkedList); override;
+         { concats another List at the end and make this List empty }
+         procedure concatList(p : TLinkedList); override;
+         { concats another List at the start and makes a copy
+           the list is ordered in reverse.
+         }
+         procedure insertListcopy(p : TLinkedList); override;
+         { concats another List at the end and makes a copy }
+         procedure concatListcopy(p : TLinkedList); override;
+         { removes all items from the list, the items are not freed }
+         procedure RemoveAll; override;
       end;
 
       TAsmCFI=class
@@ -337,6 +354,59 @@ implementation
       end;
 
 
+    procedure TAsmList.insertList(p : TLinkedList);
+      begin
+        inherited insertList(p);
+        inc(section_count,TAsmList(p).section_count);
+        TAsmList(p).section_count:=0;
+      end;
+
+
+    procedure TAsmList.insertListBefore(Item : TLinkedListItem; p : TLinkedList);
+      begin
+        inherited insertListBefore(Item,p);
+        inc(section_count,TAsmList(p).section_count);
+        TAsmList(p).section_count:=0;
+      end;
+
+
+    procedure TAsmList.insertListAfter(Item : TLinkedListItem; p : TLinkedList);
+      begin
+        inherited insertListAfter(Item,p);
+        inc(section_count,TAsmList(p).section_count);
+        TAsmList(p).section_count:=0;
+      end;
+
+
+    procedure TAsmList.concatList(p : TLinkedList);
+      begin
+        inherited concatList(p);
+        inc(section_count,TAsmList(p).section_count);
+        TAsmList(p).section_count:=0;
+      end;
+
+
+    procedure TAsmList.insertListcopy(p : TLinkedList);
+      begin
+        inherited insertListcopy(p);
+        inc(section_count,TAsmList(p).section_count);
+     end;
+
+
+    procedure TAsmList.concatListcopy(p : TLinkedList);
+      begin
+        inherited concatListcopy(p);
+        inc(section_count,TAsmList(p).section_count);
+      end;
+
+
+    procedure TAsmList.RemoveAll;
+      begin
+         inherited RemoveAll;
+         section_count:=0;
+      end;
+
+
 {****************************************************************************
                                 TAsmData
 ****************************************************************************}
@@ -423,8 +493,8 @@ implementation
         CurrAsmList:=TAsmList.create;
         for hal:=low(TAsmListType) to high(TAsmListType) do
           AsmLists[hal]:=TAsmList.create;
-        WideInits :=TLinkedList.create;
-        ResStrInits:=TLinkedList.create;
+        WideInits :=TAsmList.create;
+        ResStrInits:=TAsmList.create;
         { CFI }
         FAsmCFI:=CAsmCFI.Create;
       end;

+ 4 - 0
compiler/aasmsym.pas

@@ -53,6 +53,8 @@ implementation
         case o.typ of
           top_local :
             o.localoper^.localsymderef.build(tlocalvarsym(o.localoper^.localsym));
+          else
+            ;
         end;
       end;
 
@@ -65,6 +67,8 @@ implementation
             end;
           top_local :
             o.localoper^.localsym:=tlocalvarsym(o.localoper^.localsymderef.resolve);
+          else
+            ;
         end;
       end;
 

+ 73 - 25
compiler/aasmtai.pas

@@ -145,7 +145,11 @@ interface
           { offset of symbol's GOT slot in GOT }
           aitconst_got,
           { offset of symbol itself from GOT }
-          aitconst_gotoff_symbol
+          aitconst_gotoff_symbol,
+          { ARM TLS code }
+          aitconst_gottpoff,
+          aitconst_tpoff
+
         );
 
         tairealconsttype = (
@@ -232,6 +236,7 @@ interface
 {$if defined(arm) or defined(aarch64)}
        ,top_conditioncode
        ,top_shifterop
+       ,top_realconst
 {$endif defined(arm) or defined(aarch64)}
 {$ifdef m68k}
        { m68k only }
@@ -261,6 +266,10 @@ interface
        ,top_para
        ,top_asmlist
 {$endif llvm}
+{$if defined(riscv32) or defined(riscv64)}
+       ,top_fenceflags
+       ,top_roundingmode
+{$endif defined(riscv32) or defined(riscv64)}
        );
 
       { kinds of operations that an instruction can perform on an operand }
@@ -359,7 +368,9 @@ interface
           all assemblers. }
         asd_cpu,
         { for the OMF object format }
-        asd_omf_linnum_line
+        asd_omf_linnum_line,
+        { RISC-V }
+        asd_option
       );
 
       TAsmSehDirective=(
@@ -371,7 +382,7 @@ interface
           ash_pushnv,ash_savenv
         );
 
-      TSymbolPairKind = (spk_set, spk_thumb_set, spk_localentry);
+      TSymbolPairKind = (spk_set, spk_set_global, spk_thumb_set, spk_localentry);
 
 
     const
@@ -399,7 +410,9 @@ interface
         'code',
         'cpu',
         { for the OMF object format }
-        'omf_line'
+        'omf_line',
+        { RISC-V }
+        'option'
       );
       sehdirectivestr : array[TAsmSehDirective] of string[16]=(
         '.seh_proc','.seh_endproc',
@@ -410,7 +423,7 @@ interface
         '.pushnv','.savenv'
       );
       symbolpairkindstr: array[TSymbolPairKind] of string[11]=(
-        '.set', '.thumb_set', '.localentry'
+        '.set', '.set', '.thumb_set', '.localentry'
       );
 
     type
@@ -438,6 +451,7 @@ interface
         {$if defined(arm) or defined(aarch64)}
             top_shifterop : (shifterop : pshifterop);
             top_conditioncode : (cc : TAsmCond);
+            top_realconst : (val_real:bestreal);
         {$endif defined(arm) or defined(aarch64)}
         {$ifdef m68k}
             top_regset : (dataregset,addrregset,fpuregset: tcpuregisterset);
@@ -464,6 +478,10 @@ interface
             top_para   : (paras: tfplist);
             top_asmlist : (asmlist: tasmlist);
         {$endif llvm}
+        {$if defined(riscv32) or defined(riscv64)}
+            top_fenceflags : (fenceflags : TFenceFlags);
+            top_roundingmode : (roundingmode : TRoundingMode);
+        {$endif defined(riscv32) or defined(riscv64)}
         end;
         poper=^toper;
 
@@ -571,6 +589,9 @@ interface
           function getcopy:tlinkedlistitem;override;
        end;
 
+       type
+         TSectionFlags = (SF_None,SF_A,SF_W,SF_X);
+         TSectionProgbits = (SPB_None,SPB_PROGBITS,SPB_NOBITS);
 
        { Generates a section / segment directive }
        tai_section = class(tai)
@@ -578,7 +599,11 @@ interface
           secorder : TasmSectionorder;
           secalign : longint;
           name     : pshortstring;
-          sec      : TObjSection; { used in binary writer }
+          { used in binary writer }
+          sec      : TObjSection;
+          { used only by ELF so far }
+          secflags : TSectionFlags;
+          secprogbits : TSectionProgbits;
           destructor Destroy;override;
           constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
           procedure ppuwrite(ppufile:tcompilerppufile);override;
@@ -838,11 +863,13 @@ interface
         { alignment for operator }
         tai_align_abstract = class(tai)
            aligntype : byte;   { 1 = no align, 2 = word align, 4 = dword align }
+           maxbytes  : byte;   { if needed bytes would be larger than maxbyes, alignment is ignored }
            fillsize  : byte;   { real size to fill }
            fillop    : byte;   { value to fill with - optional }
            use_op    : boolean;
            constructor Create(b:byte);virtual;
            constructor Create_op(b: byte; _op: byte);virtual;
+           constructor create_max(b: byte; max: byte);virtual;
            constructor Create_zeros(b:byte);
            constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
            procedure ppuwrite(ppufile:tcompilerppufile);override;
@@ -940,7 +967,7 @@ interface
       add_reg_instruction_hook : tadd_reg_instruction_proc;
 
     procedure maybe_new_object_file(list:TAsmList);
-    procedure new_section(list:TAsmList;Asectype:TAsmSectiontype;const Aname:string;Aalign:byte;Asecorder:TasmSectionorder=secorder_default);
+    function new_section(list:TAsmList;Asectype:TAsmSectiontype;const Aname:string;Aalign:byte;Asecorder:TasmSectionorder=secorder_default) : tai_section;
 
     function ppuloadai(ppufile:tcompilerppufile):tai;
     procedure ppuwriteai(ppufile:tcompilerppufile;n:tai);
@@ -959,7 +986,6 @@ implementation
     const
       pputaimarker = 254;
 
-
 {****************************************************************************
                                  Helpers
  ****************************************************************************}
@@ -971,9 +997,10 @@ implementation
       end;
 
 
-    procedure new_section(list:TAsmList;Asectype:TAsmSectiontype;const Aname:string;Aalign:byte;Asecorder:TasmSectionorder=secorder_default);
+    function new_section(list:TAsmList;Asectype:TAsmSectiontype;const Aname:string;Aalign:byte;Asecorder:TasmSectionorder=secorder_default) : tai_section;
       begin
-        list.concat(tai_section.create(Asectype,Aname,Aalign,Asecorder));
+        Result:=tai_section.create(Asectype,Aname,Aalign,Asecorder);
+        list.concat(Result);
         inc(list.section_count);
         list.concat(cai_align.create(Aalign));
       end;
@@ -1204,6 +1231,8 @@ implementation
         sectype:=TAsmSectiontype(ppufile.getbyte);
         secalign:=ppufile.getlongint;
         name:=ppufile.getpshortstring;
+        secflags:=TSectionFlags(ppufile.getbyte);
+        secprogbits:=TSectionProgbits(ppufile.getbyte);
         sec:=nil;
       end;
 
@@ -1220,6 +1249,8 @@ implementation
         ppufile.putbyte(byte(sectype));
         ppufile.putlongint(secalign);
         ppufile.putstring(name^);
+        ppufile.putbyte(byte(secflags));
+        ppufile.putbyte(byte(secprogbits));
       end;
 
 
@@ -1739,7 +1770,7 @@ implementation
       end;
 
 
-    constructor tai_const.Create_rel_sym_offset(_typ: taiconst_type; _sym,_endsym: tasmsymbol; _ofs: int64);
+    constructor tai_const.Create_rel_sym_offset(_typ: taiconst_type; _sym, _endsym: tasmsymbol; _ofs: int64);
        begin
          self.create_sym_offset(_sym,_ofs);
          consttype:=_typ;
@@ -1947,10 +1978,10 @@ implementation
           aitconst_16bit,aitconst_16bit_unaligned :
             result:=2;
           aitconst_32bit,aitconst_darwin_dwarf_delta32,
-	  aitconst_32bit_unaligned:
+          aitconst_32bit_unaligned:
             result:=4;
           aitconst_64bit,aitconst_darwin_dwarf_delta64,
-	  aitconst_64bit_unaligned:
+          aitconst_64bit_unaligned:
             result:=8;
           aitconst_secrel32_symbol,
           aitconst_rva_symbol :
@@ -2065,8 +2096,6 @@ implementation
             value.s128val:=ppufile.getreal;
           aitrealconst_s64comp:
             value.s64compval:=comp(ppufile.getint64);
-          else
-            internalerror(2014050602);
         end;
       end;
 
@@ -2094,8 +2123,6 @@ implementation
               c:=comp(value.s64compval);
               ppufile.putint64(int64(c));
             end
-          else
-            internalerror(2014050601);
         end;
       end;
 
@@ -2124,8 +2151,6 @@ implementation
             result:=10;
           aitrealconst_s128bit:
             result:=16;
-          else
-            internalerror(2014050603);
         end;
       end;
 
@@ -2790,6 +2815,8 @@ implementation
                     add_reg_instruction_hook(self,shifterop^.rs);
                 end;
 {$endif ARM}
+              else
+                ;
              end;
           end;
       end;
@@ -2815,6 +2842,8 @@ implementation
               top_wstring:
                 donewidestring(pwstrval);
 {$endif jvm}
+              else
+                ;
             end;
             typ:=top_none;
           end;
@@ -2868,6 +2897,8 @@ implementation
                   p.oper[i]^.shifterop^:=oper[i]^.shifterop^;
                 end;
 {$endif ARM}
+              else
+                ;
             end;
           end;
         getcopy:=p;
@@ -2902,9 +2933,10 @@ implementation
         i : integer;
       begin
         inherited ppuload(t,ppufile);
-        { hopefully, we don't get problems with big/litte endian here when cross compiling :/ }
+        { hopefully, we don't get problems with big/little endian here when cross compiling :/ }
         ppufile.getdata(condition,sizeof(tasmcond));
-        allocate_oper(ppufile.getbyte);
+        ops := ppufile.getbyte;
+        allocate_oper(ops);
         for i:=0 to ops-1 do
           ppuloadoper(ppufile,oper[i]^);
         opcode:=tasmop(ppufile.getword);
@@ -3075,6 +3107,7 @@ implementation
           fillsize:=0;
           fillop:=0;
           use_op:=false;
+          maxbytes:=aligntype;
        end;
 
 
@@ -3089,6 +3122,22 @@ implementation
           fillsize:=0;
           fillop:=_op;
           use_op:=true;
+          maxbytes:=aligntype;
+       end;
+
+
+     constructor tai_align_abstract.create_max(b : byte; max : byte);
+       begin
+          inherited Create;
+          typ:=ait_align;
+          if b in [1,2,4,8,16,32] then
+            aligntype := b
+          else
+            aligntype := 1;
+          maxbytes:=max;
+          fillsize:=0;
+          fillop:=0;
+          use_op:=false;
        end;
 
 
@@ -3103,6 +3152,7 @@ implementation
          use_op:=true;
          fillsize:=0;
          fillop:=0;
+         maxbytes:=aligntype;
        end;
 
 
@@ -3122,6 +3172,7 @@ implementation
         fillsize:=0;
         fillop:=ppufile.getbyte;
         use_op:=ppufile.getboolean;
+        maxbytes:=ppufile.getbyte;
       end;
 
 
@@ -3131,6 +3182,7 @@ implementation
         ppufile.putbyte(aligntype);
         ppufile.putbyte(fillop);
         ppufile.putboolean(use_op);
+        ppufile.putbyte(maxbytes);
       end;
 
 
@@ -3208,8 +3260,6 @@ implementation
               ppufile.getdata(data.reg,sizeof(TRegister));
               data.offset:=ppufile.getdword;
             end;
-        else
-          InternalError(2011091201);
         end;
       end;
 
@@ -3237,8 +3287,6 @@ implementation
               ppufile.putdata(data.reg,sizeof(TRegister));
               ppufile.putdword(data.offset);
             end;
-        else
-          InternalError(2011091202);
         end;
       end;
 

+ 138 - 65
compiler/aggas.pas

@@ -49,7 +49,8 @@ interface
         function sectionattrs(atype:TAsmSectiontype):string;virtual;
         function sectionattrs_coff(atype:TAsmSectiontype):string;virtual;
         function sectionalignment_aix(atype:TAsmSectiontype;secalign: longint):string;
-        procedure WriteSection(atype:TAsmSectiontype;const aname:string;aorder:TAsmSectionOrder;secalign:longint);virtual;
+        procedure WriteSection(atype:TAsmSectiontype;const aname:string;aorder:TAsmSectionOrder;secalign:longint;
+          secflags:TSectionFlags=SF_None;secprogbits:TSectionProgbits=SPB_None);virtual;
         procedure WriteExtraHeader;virtual;
         procedure WriteExtraFooter;virtual;
         procedure WriteInstruction(hp: tai);
@@ -211,7 +212,7 @@ implementation
 { vtable for a class called Window:                                       }
 { .section .data.rel.ro._ZTV6Window,"awG",@progbits,_ZTV6Window,comdat    }
 { TODO: .data.ro not yet working}
-{$if defined(arm) or defined(powerpc)}
+{$if defined(arm) or defined(riscv64) or defined(powerpc)}
           '.rodata',
 {$else arm}
           '.data',
@@ -346,9 +347,13 @@ implementation
             exit;
           end;
 
-        if (atype=sec_threadvar) and
-          (target_info.system in (systems_windows+systems_wince)) then
-          secname:='.tls';
+        if atype=sec_threadvar then
+          begin
+            if (target_info.system in (systems_windows+systems_wince)) then
+              secname:='.tls'
+            else if (target_info.system in systems_linux) then
+              secname:='.tbss';
+          end;
 
         { go32v2 stub only loads .text and .data sections, and allocates space for .bss.
           Thus, data which normally goes into .rodata and .rodata_norel sections must
@@ -370,6 +375,8 @@ implementation
                 secname:='.data.rel.ro';
               sec_rodata_norel:
                 secname:='.rodata';
+              else
+                ;
             end;
           end;
 
@@ -457,7 +464,7 @@ implementation
       end;
 
 
-    procedure TGNUAssembler.WriteSection(atype:TAsmSectiontype;const aname:string;aorder:TAsmSectionOrder;secalign:longint);
+    procedure TGNUAssembler.WriteSection(atype:TAsmSectiontype;const aname:string;aorder:TAsmSectionOrder;secalign:longint;secflags:TSectionFlags=SF_None;secprogbits:TSectionProgbits=SPB_None);
       var
         s : string;
       begin
@@ -491,58 +498,81 @@ implementation
         end;
         s:=sectionname(atype,aname,aorder);
         writer.AsmWrite(s);
-        case atype of
-          sec_fpc :
-            if aname = 'resptrs' then
-              writer.AsmWrite(', "a", @progbits');
-          sec_stub :
-            begin
-              case target_info.system of
-                { there are processor-independent shortcuts available    }
-                { for this, namely .symbol_stub and .picsymbol_stub, but }
-                { they don't work and gcc doesn't use them either...     }
-                system_powerpc_darwin,
-                system_powerpc64_darwin:
-                  if (cs_create_pic in current_settings.moduleswitches) then
-                    writer.AsmWriteln('__TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32')
-                  else
-                    writer.AsmWriteln('__TEXT,__symbol_stub1,symbol_stubs,pure_instructions,16');
-                system_i386_darwin,
-                system_i386_iphonesim:
-                  writer.AsmWriteln('__IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5');
-                system_arm_darwin:
-                  if (cs_create_pic in current_settings.moduleswitches) then
-                    writer.AsmWriteln('__TEXT,__picsymbolstub4,symbol_stubs,none,16')
-                  else
-                    writer.AsmWriteln('__TEXT,__symbol_stub4,symbol_stubs,none,12')
-                { darwin/(x86-64/AArch64) uses PC-based GOT addressing, no
-                  explicit symbol stubs }
-                else
-                  internalerror(2006031101);
-              end;
+        { flags explicitly defined? }
+        if (secflags<>SF_None) or (secprogbits<>SPB_None) then
+          begin
+            case secflags of
+              SF_A:
+                writer.AsmWrite(',"a"');
+              SF_W:
+                writer.AsmWrite(',"w"');
+              SF_X:
+                writer.AsmWrite(',"x"');
+              SF_None:
+                writer.AsmWrite(',""');
+            end;
+            case secprogbits of
+              SPB_PROGBITS:
+                writer.AsmWrite(',%progbits');
+              SPB_NOBITS:
+                writer.AsmWrite(',%nobits');
+              SPB_None:
+                ;
             end;
+          end
         else
-          { GNU AS won't recognize '.text.n_something' section name as belonging
-            to '.text' and assigns default attributes to it, which is not
-            always correct. We have to fix it.
-
-            TODO: This likely applies to all systems which smartlink without
-            creating libraries }
-          begin
-            if is_smart_section(atype) and (aname<>'') then
-              begin
-                s:=sectionattrs(atype);
-                if (s<>'') then
-                  writer.AsmWrite(',"'+s+'"');
-              end;
-            if target_info.system in systems_aix then
+          case atype of
+            sec_fpc :
+              if aname = 'resptrs' then
+                writer.AsmWrite(', "a", @progbits');
+            sec_stub :
               begin
-                s:=sectionalignment_aix(atype,secalign);
-                if s<>'' then
-                  writer.AsmWrite(','+s);
+                case target_info.system of
+                  { there are processor-independent shortcuts available    }
+                  { for this, namely .symbol_stub and .picsymbol_stub, but }
+                  { they don't work and gcc doesn't use them either...     }
+                  system_powerpc_darwin,
+                  system_powerpc64_darwin:
+                    if (cs_create_pic in current_settings.moduleswitches) then
+                      writer.AsmWriteln('__TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32')
+                    else
+                      writer.AsmWriteln('__TEXT,__symbol_stub1,symbol_stubs,pure_instructions,16');
+                  system_i386_darwin,
+                  system_i386_iphonesim:
+                    writer.AsmWriteln('__IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5');
+                  system_arm_darwin:
+                    if (cs_create_pic in current_settings.moduleswitches) then
+                      writer.AsmWriteln('__TEXT,__picsymbolstub4,symbol_stubs,none,16')
+                    else
+                      writer.AsmWriteln('__TEXT,__symbol_stub4,symbol_stubs,none,12')
+                  { darwin/(x86-64/AArch64) uses PC-based GOT addressing, no
+                    explicit symbol stubs }
+                  else
+                    internalerror(2006031101);
+                end;
               end;
+          else
+            { GNU AS won't recognize '.text.n_something' section name as belonging
+              to '.text' and assigns default attributes to it, which is not
+              always correct. We have to fix it.
+
+              TODO: This likely applies to all systems which smartlink without
+              creating libraries }
+            begin
+              if is_smart_section(atype) and (aname<>'') then
+                begin
+                  s:=sectionattrs(atype);
+                  if (s<>'') then
+                    writer.AsmWrite(',"'+s+'"');
+                end;
+              if target_info.system in systems_aix then
+                begin
+                  s:=sectionalignment_aix(atype,secalign);
+                  if s<>'' then
+                    writer.AsmWrite(','+s);
+                end;
+            end;
           end;
-        end;
         writer.AsmLn;
         LastSecType:=atype;
       end;
@@ -592,9 +622,10 @@ implementation
         end;
 
 
-      procedure doalign(alignment: byte; use_op: boolean; fillop: byte; out last_align: longint;lasthp:tai);
+      procedure doalign(alignment: byte; use_op: boolean; fillop: byte; maxbytes: byte; out last_align: longint;lasthp:tai);
         var
           i: longint;
+          alignment64 : int64;
 {$ifdef m68k}
           instr : string;
 {$endif}
@@ -621,14 +652,33 @@ implementation
                   else
                     begin
 {$endif m68k}
-                  writer.AsmWrite(#9'.balign '+tostr(alignment));
-                  if use_op then
-                    writer.AsmWrite(','+tostr(fillop))
+                      alignment64:=alignment;
+                      if (maxbytes<>alignment) and ispowerof2(alignment64,i) then
+                        begin
+                          if use_op then
+                            begin
+                              writer.AsmWrite(#9'.p2align '+tostr(i)+','+tostr(fillop)+','+tostr(maxbytes));
+                              writer.AsmLn;
+                              writer.AsmWrite(#9'.p2align '+tostr(i-1)+','+tostr(fillop));
+                            end
+                          else
+                            begin
+                              writer.AsmWrite(#9'.p2align '+tostr(i)+',,'+tostr(maxbytes));
+                              writer.AsmLn;
+                              writer.AsmWrite(#9'.p2align '+tostr(i-1));
+                            end
+                        end
+                      else
+                        begin
+                          writer.AsmWrite(#9'.balign '+tostr(alignment));
+                          if use_op then
+                            writer.AsmWrite(','+tostr(fillop))
 {$ifdef x86}
-                  { force NOP as alignment op code }
-                  else if (LastSecType=sec_code) and (asminfo^.id<>as_solaris_as) then
-                    writer.AsmWrite(',0x90');
+                          { force NOP as alignment op code }
+                          else if (LastSecType=sec_code) and (asminfo^.id<>as_solaris_as) then
+                            writer.AsmWrite(',0x90');
 {$endif x86}
+                        end;
 {$ifdef m68k}
                     end;
 {$endif m68k}
@@ -718,16 +768,18 @@ implementation
 
            ait_align :
              begin
-               doalign(tai_align_abstract(hp).aligntype,tai_align_abstract(hp).use_op,tai_align_abstract(hp).fillop,last_align,lasthp);
+               doalign(tai_align_abstract(hp).aligntype,tai_align_abstract(hp).use_op,tai_align_abstract(hp).fillop,tai_align_abstract(hp).maxbytes,last_align,lasthp);
              end;
 
            ait_section :
              begin
                if tai_section(hp).sectype<>sec_none then
                  if replaceforbidden then
-                   WriteSection(tai_section(hp).sectype,ReplaceForbiddenAsmSymbolChars(tai_section(hp).name^),tai_section(hp).secorder,tai_section(hp).secalign)
+                   WriteSection(tai_section(hp).sectype,ReplaceForbiddenAsmSymbolChars(tai_section(hp).name^),tai_section(hp).secorder,
+                     tai_section(hp).secalign,tai_section(hp).secflags,tai_section(hp).secprogbits)
                  else
-                   WriteSection(tai_section(hp).sectype,tai_section(hp).name^,tai_section(hp).secorder,tai_section(hp).secalign)
+                   WriteSection(tai_section(hp).sectype,tai_section(hp).name^,tai_section(hp).secorder,
+                     tai_section(hp).secalign,tai_section(hp).secflags,tai_section(hp).secprogbits)
                else
                  begin
 {$ifdef EXTDEBUG}
@@ -893,6 +945,11 @@ implementation
                         WriteAixIntConst(tai_const(hp));
                       writer.AsmLn;
                     end;
+                 aitconst_gottpoff:
+                   begin
+                     writer.AsmWrite(#9'.word'#9+tai_const(hp).sym.name+'(gottpoff)+(.-'+tai_const(hp).endsym.name+tostr_with_plus(tai_const(hp).symofs)+')');
+                     writer.Asmln;
+                   end;
 {$endif cpu64bitaddr}
                  aitconst_got:
                    begin
@@ -967,6 +1024,8 @@ implementation
                              WriteDecodedUleb128(qword(tai_const(hp).value));
                            aitconst_sleb128bit:
                              WriteDecodedSleb128(int64(tai_const(hp).value));
+                           else
+                             ;
                          end
                        end
                      else
@@ -1220,14 +1279,26 @@ implementation
                if replaceforbidden then
                  begin
                    { avoid string truncation }
-                   writer.AsmWrite(ReplaceForbiddenAsmSymbolChars(tai_symbolpair(hp).sym^)+s);
+                   writer.AsmWrite(ReplaceForbiddenAsmSymbolChars(tai_symbolpair(hp).sym^));
+                   writer.AsmWrite(s);
                    writer.AsmWriteLn(ReplaceForbiddenAsmSymbolChars(tai_symbolpair(hp).value^));
+                   if tai_symbolpair(hp).kind=spk_set_global then
+                     begin
+                       writer.AsmWrite(#9'.globl ');
+                       writer.AsmWriteLn(ReplaceForbiddenAsmSymbolChars(tai_symbolpair(hp).sym^));
+                     end;
                  end
                else
                  begin
                    { avoid string truncation }
-                   writer.AsmWrite(tai_symbolpair(hp).sym^+s);
+                   writer.AsmWrite(tai_symbolpair(hp).sym^);
+                   writer.AsmWrite(s);
                    writer.AsmWriteLn(tai_symbolpair(hp).value^);
+                   if tai_symbolpair(hp).kind=spk_set_global then
+                     begin
+                       writer.AsmWrite(#9'.globl ');
+                       writer.AsmWriteLn(tai_symbolpair(hp).sym^);
+                     end;
                  end;
              end;
            ait_symbol_end :
@@ -1715,6 +1786,8 @@ implementation
                 result:='.section '+objc_section_name(atype);
                 exit
               end;
+            else
+              ;
           end;
         result := inherited sectionname(atype,aname,aorder);
       end;

+ 7 - 0
compiler/aopt.pas

@@ -36,6 +36,9 @@ Unit aopt;
 
     Type
       TAsmOptimizer = class(TAoptObj)
+        { Pooled object that can be used by optimisation procedures to evaluate
+          future register usage without upsetting the current state. }
+        TmpUsedRegs: TAllUsedRegs;
 
         { _AsmL is the PAasmOutpout list that has to be optimized }
         Constructor create(_AsmL: TAsmList); virtual; reintroduce;
@@ -87,6 +90,7 @@ Unit aopt;
         inherited create(_asml,nil,nil,nil);
         { setup labeltable, always necessary }
         New(LabelInfo);
+        CreateUsedRegs(TmpUsedRegs);
       End;
 
     procedure TAsmOptimizer.FindLoHiLabels;
@@ -230,6 +234,8 @@ Unit aopt;
                           end;
                       End
                   End
+                else
+                  ;
               End;
               P := tai(p.Next);
               While Assigned(p) and
@@ -318,6 +324,7 @@ Unit aopt;
 
     Destructor TAsmOptimizer.Destroy;
       Begin
+        ReleaseUsedRegs(TmpUsedRegs);
         if assigned(LabelInfo^.LabelTable) then
           Freemem(LabelInfo^.LabelTable);
         Dispose(LabelInfo);

+ 55 - 16
compiler/aoptobj.pas

@@ -270,6 +270,8 @@ Unit AoptObj;
         Procedure UpdateUsedRegs(p : Tai);
         class procedure UpdateUsedRegs(var Regs: TAllUsedRegs; p: Tai);
         Function CopyUsedRegs(var dest : TAllUsedRegs) : boolean;
+        procedure RestoreUsedRegs(const Regs : TAllUsedRegs);
+        procedure TransferUsedRegs(var dest: TAllUsedRegs);
         class Procedure ReleaseUsedRegs(const regs : TAllUsedRegs);
         class Function RegInUsedRegs(reg : TRegister;regs : TAllUsedRegs) : boolean;
         class Procedure IncludeRegInUsedRegs(reg : TRegister;var regs : TAllUsedRegs);
@@ -327,7 +329,7 @@ Unit AoptObj;
         function RegEndOfLife(reg: TRegister;p: taicpu): boolean;
 
         { removes p from asml, updates registers and replaces it by a valid value, if this is the case true is returned }
-        function RemoveCurrentP(var p : taicpu): boolean;
+        function RemoveCurrentP(var p : tai): boolean;
 
        { traces sucessive jumps to their final destination and sets it, e.g.
          je l1                je l3
@@ -383,8 +385,8 @@ Unit AoptObj;
 
     function JumpTargetOp(ai: taicpu): poper; inline;
       begin
-{$if defined(MIPS)}
-        { MIPS branches can have 1,2 or 3 operands, target label is the last one. }
+{$if defined(MIPS) or defined(riscv64) or defined(riscv32)}
+        { MIPS or RiscV branches can have 1,2 or 3 operands, target label is the last one. }
         result:=ai.oper[ai.ops-1];
 {$elseif defined(SPARC64)}
         if ai.ops=2 then
@@ -440,6 +442,8 @@ Unit AoptObj;
                         Include(UsedRegs, getsupreg(tai_regalloc(p).reg));
                     ra_dealloc :
                       Exclude(UsedRegs, getsupreg(tai_regalloc(p).reg));
+                    else
+                      ;
                   end;
                 end;
               p := tai(p.next);
@@ -457,7 +461,7 @@ Unit AoptObj;
       End;
 
 
-    Function TUsedRegs.GetUsedRegs: TRegSet;
+    Function TUsedRegs.GetUsedRegs: TRegSet; inline;
       Begin
         GetUsedRegs := UsedRegs;
       End;
@@ -828,7 +832,7 @@ Unit AoptObj;
               If (TInstr(p).oper[Count]^.typ = Top_Ref) Then
                 TmpResult := RefsEq(Ref, PInstr(p)^.oper[Count]^.ref^);
               Inc(Count);
-            Until (Count = MaxOps) or TmpResult;
+            Until (Count = max_operands) or TmpResult;
           End;
         RefInInstruction := TmpResult;
       End;
@@ -916,6 +920,8 @@ Unit AoptObj;
                     Include(UsedRegs[getregtype(tai_regalloc(p).reg)].UsedRegs, getsupreg(tai_regalloc(p).reg));
                   ra_dealloc :
                     Exclude(UsedRegs[getregtype(tai_regalloc(p).reg)].UsedRegs, getsupreg(tai_regalloc(p).reg));
+                  else
+                    ;
                 end;
                 p := tai(p.next);
               end;
@@ -945,6 +951,30 @@ Unit AoptObj;
       end;
 
 
+      procedure TAOptObj.RestoreUsedRegs(const Regs: TAllUsedRegs);
+      var
+        i : TRegisterType;
+      begin
+        { Note that the constructor Create_Regset is being called as a regular
+          method - it is not instantiating a new object.  This is because it is
+          the only published means to modify the internal state en-masse. [Kit] }
+        for i:=low(TRegisterType) to high(TRegisterType) do
+          UsedRegs[i].Create_Regset(i,Regs[i].GetUsedRegs);
+      end;
+
+
+      procedure TAOptObj.TransferUsedRegs(var dest: TAllUsedRegs);
+      var
+        i : TRegisterType;
+      begin
+        { Note that the constructor Create_Regset is being called as a regular
+          method - it is not instantiating a new object.  This is because it is
+          the only published means to modify the internal state en-masse. [Kit] }
+        for i:=low(TRegisterType) to high(TRegisterType) do
+          dest[i].Create_Regset(i, UsedRegs[i].GetUsedRegs);
+      end;
+
+
       class procedure TAOptObj.ReleaseUsedRegs(const regs: TAllUsedRegs);
         var
           i : TRegisterType;
@@ -1059,7 +1089,10 @@ Unit AoptObj;
             Top_Reg :
               OpsEqual:=o1.reg=o2.reg;
             Top_Ref :
-              OpsEqual := references_equal(o1.ref^, o2.ref^);
+              OpsEqual:=
+                references_equal(o1.ref^, o2.ref^) and
+                (o1.ref^.volatility=[]) and
+                (o2.ref^.volatility=[]);
             Top_Const :
               OpsEqual:=o1.val=o2.val;
             Top_None :
@@ -1289,7 +1322,7 @@ Unit AoptObj;
       end;
 
 
-    function TAOptObj.RemoveCurrentP(var p : taicpu) : boolean;
+    function TAOptObj.RemoveCurrentP(var p : tai) : boolean;
       var
         hp1 : tai;
       begin
@@ -1299,7 +1332,7 @@ Unit AoptObj;
         UpdateUsedRegs(tai(p.Next));
         AsmL.Remove(p);
         p.Free;
-        p:=taicpu(hp1);
+        p:=hp1;
       end;
 
 
@@ -1342,7 +1375,12 @@ Unit AoptObj;
 {$if defined(arm) or defined(aarch64)}
           (hp.condition=c_None) and
 {$endif arm or aarch64}
+{$if defined(riscv32) or defined(riscv64)}          
           (hp.ops>0) and
+          (hp.oper[0]^.reg=NR_X0) and
+{$else riscv}
+          (hp.ops>0) and
+{$endif riscv}
           (JumpTargetOp(hp)^.typ = top_ref) and
           (JumpTargetOp(hp)^.ref^.symbol is TAsmLabel);
       end;
@@ -1390,7 +1428,7 @@ Unit AoptObj;
        to avoid endless loops with constructs such as "l5: ; jmp l5"           }
 
       var p1: tai;
-          {$if not defined(MIPS) and not defined(JVM)}
+          {$if not defined(MIPS) and not defined(riscv64) and not defined(riscv32) and not defined(JVM)}
           p2: tai;
           l: tasmlabel;
           {$endif}
@@ -1408,7 +1446,7 @@ Unit AoptObj;
               if { the next instruction after the label where the jump hp arrives}
                  { is unconditional or of the same type as hp, so continue       }
                  IsJumpToLabelUncond(taicpu(p1))
-{$if not defined(MIPS) and not defined(JVM)}
+{$if not defined(MIPS) and not defined(riscv64) and not defined(riscv32) and not defined(JVM)}
 { for MIPS, it isn't enough to check the condition; first operands must be same, too. }
                  or
                  conditions_equal(taicpu(p1).condition,hp.condition) or
@@ -1425,7 +1463,7 @@ Unit AoptObj;
                    (IsJumpToLabelUncond(taicpu(p2)) or
                    (conditions_equal(taicpu(p2).condition,hp.condition))) and
                   SkipLabels(p1,p1))
-{$endif not MIPS and not JVM}
+{$endif not MIPS and not RV64 and not RV32 and not JVM}
                  then
                 begin
                   { quick check for loops of the form "l5: ; jmp l5 }
@@ -1446,7 +1484,7 @@ Unit AoptObj;
                   JumpTargetOp(hp)^.ref^.symbol:=JumpTargetOp(taicpu(p1))^.ref^.symbol;
                   tasmlabel(JumpTargetOp(hp)^.ref^.symbol).increfs;
                 end
-{$if not defined(MIPS) and not defined(JVM)}
+{$if not defined(MIPS) and not defined(riscv64) and not defined(riscv32) and not defined(JVM)}
               else
                 if conditions_equal(taicpu(p1).condition,inverse_cond(hp.condition)) then
                   if not FindAnyLabel(p1,l) then
@@ -1477,7 +1515,7 @@ Unit AoptObj;
                       if not GetFinalDestination(hp,succ(level)) then
                         exit;
                     end;
-{$endif not MIPS and not JVM}
+{$endif not MIPS and not RV64 and not RV32 and not JVM}
           end;
         GetFinalDestination := true;
       end;
@@ -1546,7 +1584,7 @@ Unit AoptObj;
                                   and (hp1.typ <> ait_jcatch)
 {$endif}
                                   do
-                              if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then
+                              if not(hp1.typ in ([ait_label]+skipinstr)) then
                                 begin
                                   if (hp1.typ = ait_instruction) and
                                      taicpu(hp1).is_jmp and
@@ -1555,7 +1593,7 @@ Unit AoptObj;
                                      TAsmLabel(JumpTargetOp(taicpu(hp1))^.ref^.symbol).decrefs;
                                   { don't kill start/end of assembler block,
                                     no-line-info-start/end etc }
-                                  if hp1.typ<>ait_marker then
+                                  if not(hp1.typ in [ait_align,ait_marker]) then
                                     begin
 {$ifdef cpudelayslot}
                                       if (hp1.typ=ait_instruction) and (taicpu(hp1).is_jmp) then
@@ -1652,6 +1690,8 @@ Unit AoptObj;
                       begin
                       end; { if is_jmp }
                   end;
+                else
+                  ;
               end;
               if assigned(p) then
                 begin
@@ -1671,7 +1711,6 @@ Unit AoptObj;
         ClearUsedRegs;
         while (p <> BlockEnd) Do
           begin
-            UpdateUsedRegs(tai(p.next));
             if PeepHoleOptPass2Cpu(p) then
               continue;
             if assigned(p) then

+ 12 - 1
compiler/aoptutils.pas

@@ -27,10 +27,13 @@ unit aoptutils;
   interface
 
     uses
-      aasmtai,aasmcpu;
+      cpubase,aasmtai,aasmcpu;
 
     function MatchOpType(const p : taicpu;type0: toptype) : Boolean;
     function MatchOpType(const p : taicpu;type0,type1 : toptype) : Boolean;
+{$if max_operands>2}
+    function MatchOpType(const p : taicpu; type0,type1,type2 : toptype) : Boolean;
+{$endif max_operands>2}
 
     { skips all labels and returns the next "real" instruction }
     function SkipLabels(hp: tai; var hp2: tai): boolean;
@@ -49,6 +52,14 @@ unit aoptutils;
       end;
 
 
+{$if max_operands>2}
+    function MatchOpType(const p : taicpu; type0,type1,type2 : toptype) : Boolean;
+      begin
+        Result:=(p.ops=3) and (p.oper[0]^.typ=type0) and (p.oper[1]^.typ=type1) and (p.oper[2]^.typ=type2);
+      end;
+{$endif max_operands>2}
+
+
     { skips all labels and returns the next "real" instruction }
     function SkipLabels(hp: tai; var hp2: tai): boolean;
       begin

+ 204 - 68
compiler/arm/aasmcpu.pas

@@ -76,6 +76,7 @@ uses
       OT_IMMTINY   = $00002100;
       OT_IMMSHIFTER= $00002200;
       OT_IMMEDIATEZERO = $10002200;
+      OT_IMMEDIATEMM     = $00002400;
       OT_IMMEDIATE24 = OT_IMM24;
       OT_SHIFTIMM  = OT_SHIFTEROP or OT_IMMSHIFTER;
       OT_SHIFTIMMEDIATE = OT_SHIFTIMM;
@@ -200,6 +201,8 @@ uses
          procedure loadconditioncode(opidx:longint;const cond:tasmcond);
          procedure loadmodeflags(opidx:longint;const flags:tcpumodeflags);
          procedure loadspecialreg(opidx:longint;const areg:tregister; const aflags:tspecialregflags);
+         procedure loadrealconst(opidx:longint;const _value:bestreal);
+
          constructor op_none(op : tasmop);
 
          constructor op_reg(op : tasmop;_op1 : tregister);
@@ -237,6 +240,8 @@ uses
          { *M*LL }
          constructor op_reg_reg_reg_reg(op : tasmop;_op1,_op2,_op3,_op4 : tregister);
 
+         constructor op_reg_realconst(op : tasmop;_op1: tregister;_op2: bestreal);
+
          { this is for Jmp instructions }
          constructor op_cond_sym(op : tasmop;cond:TAsmCond;_op1 : tasmsymbol);
 
@@ -332,6 +337,19 @@ implementation
       end;
 
 
+    procedure taicpu.loadrealconst(opidx:longint;const _value:bestreal);
+      begin
+        allocate_oper(opidx+1);
+        with oper[opidx]^ do
+          begin
+            if typ<>top_realconst then
+              clearop(opidx);
+            val_real:=_value;
+            typ:=top_realconst;
+          end;
+      end;
+
+
     procedure taicpu.loadregset(opidx:longint; regsetregtype: tregistertype; regsetsubregtype: tsubregister; const s:tcpuregisterset; ausermode: boolean);
       var
         i : byte;
@@ -363,6 +381,8 @@ implementation
                    if assigned(add_reg_instruction_hook) and (i in regset^) then
                      add_reg_instruction_hook(self,newreg(R_MMREGISTER,i,regsetsubregtype));
                  end;
+             else
+               internalerror(2019050932);
            end;
          end;
       end;
@@ -504,6 +524,15 @@ implementation
       end;
 
 
+    constructor taicpu.op_reg_realconst(op : tasmop; _op1 : tregister; _op2 : bestreal);
+      begin
+         inherited create(op);
+         ops:=2;
+         loadreg(0,_op1);
+         loadrealconst(1,_op2);
+      end;
+
+
      constructor taicpu.op_reg_reg_const(op : tasmop;_op1,_op2 : tregister; _op3: aint);
        begin
          inherited create(op);
@@ -803,7 +832,7 @@ implementation
           end
         else
           case opcode of
-            A_ADC,A_ADD,A_AND,A_BIC,
+            A_ADC,A_ADD,A_AND,A_BIC,A_ORN,
             A_EOR,A_CLZ,A_RBIT,
             A_LDR,A_LDRB,A_LDRBT,A_LDRH,A_LDRSB,
             A_LDRSH,A_LDRT,
@@ -1114,6 +1143,8 @@ implementation
                                           begin
                                             inc(extradataoffset,multiplier*(((tai_realconst(hp).savesize-4)+3) div 4));
                                           end;
+                                        else
+                                          ;
                                       end;
                                       { check if the same constant has been already inserted into the currently handled list,
                                         if yes, reuse it }
@@ -1123,8 +1154,9 @@ implementation
                                           while assigned(hp2) do
                                             begin
                                               if (hp2.typ=ait_const) and (tai_const(hp2).sym=tai_const(hp).sym)
-                                                and (tai_const(hp2).value=tai_const(hp).value) and (tai(hp2.previous).typ=ait_label)
-                                              then
+                                                and (tai_const(hp2).value=tai_const(hp).value) and (tai(hp2.previous).typ=ait_label) and
+                                                { gottpoff symbols are PC relative, so we cannot reuse them }
+                                                (tai_const(hp2).consttype<>aitconst_gottpoff) then
                                                 begin
                                                   with taicpu(curtai).oper[curop]^.ref^ do
                                                     begin
@@ -1172,6 +1204,8 @@ implementation
                 begin
                   inc(curinspos,multiplier*((tai_realconst(hp).savesize+3) div 4));
                 end;
+              else
+                ;
             end;
             { special case for case jump tables }
             penalty:=0;
@@ -1242,6 +1276,8 @@ implementation
                           or if we splitted them so split before }
                       CheckLimit(hp,4);
                     end;
+                  else
+                    ;
                 end;
               end;
 
@@ -1396,8 +1432,11 @@ implementation
                               end;
                           end;
                       end;
+                    else;
                   end;
                 end;
+              else
+                ;
             end;
 
             curtai:=tai(curtai.Next);
@@ -1461,8 +1500,12 @@ implementation
                             taicpu(curtai).ops:=2;
                           end;
                       end;
+                    else
+                      ;
                   end;
                 end;
+              else
+                ;
             end;
 
             curtai:=tai(curtai.Next);
@@ -1508,55 +1551,59 @@ implementation
           begin
             case curtai.typ of
               ait_instruction:
-                if IsIT(taicpu(curtai).opcode) then
-                  begin
-                    levels := GetITLevels(taicpu(curtai).opcode);
-                    if levels < 4 then
-                      begin
-                        i:=levels;
-                        hp1:=tai(curtai.Next);
-                        while assigned(hp1) and
-                          (i > 0) do
-                          begin
-                            if hp1.typ=ait_instruction then
-                              begin
-                                dec(i);
-                                if (i = 0) and
-                                  mustbelast(hp1) then
-                                  begin
-                                    hp1:=nil;
-                                    break;
-                                  end;
-                              end;
-                            hp1:=tai(hp1.Next);
-                          end;
+                begin
+                  if IsIT(taicpu(curtai).opcode) then
+                    begin
+                      levels := GetITLevels(taicpu(curtai).opcode);
+                      if levels < 4 then
+                        begin
+                          i:=levels;
+                          hp1:=tai(curtai.Next);
+                          while assigned(hp1) and
+                            (i > 0) do
+                            begin
+                              if hp1.typ=ait_instruction then
+                                begin
+                                  dec(i);
+                                  if (i = 0) and
+                                    mustbelast(hp1) then
+                                    begin
+                                      hp1:=nil;
+                                      break;
+                                    end;
+                                end;
+                              hp1:=tai(hp1.Next);
+                            end;
 
-                        if assigned(hp1) then
-                          begin
-                            // We are pointing at the first instruction after the IT block
-                            while assigned(hp1) and
-                              (hp1.typ<>ait_instruction) do
-                                hp1:=tai(hp1.Next);
-
-                            if assigned(hp1) and
-                              (hp1.typ=ait_instruction) and
-                              IsIT(taicpu(hp1).opcode) then
-                              begin
-                                if (levels+GetITLevels(taicpu(hp1).opcode) <= 4) and
-                                  ((taicpu(curtai).oper[0]^.cc=taicpu(hp1).oper[0]^.cc) or
-                                   (taicpu(curtai).oper[0]^.cc=inverse_cond(taicpu(hp1).oper[0]^.cc))) then
-                                  begin
-                                    taicpu(curtai).opcode:=getMergedInstruction(taicpu(curtai).opcode,
-                                                                                taicpu(hp1).opcode,
-                                                                                taicpu(curtai).oper[0]^.cc=inverse_cond(taicpu(hp1).oper[0]^.cc));
+                          if assigned(hp1) then
+                            begin
+                              // We are pointing at the first instruction after the IT block
+                              while assigned(hp1) and
+                                (hp1.typ<>ait_instruction) do
+                                  hp1:=tai(hp1.Next);
+
+                              if assigned(hp1) and
+                                (hp1.typ=ait_instruction) and
+                                IsIT(taicpu(hp1).opcode) then
+                                begin
+                                  if (levels+GetITLevels(taicpu(hp1).opcode) <= 4) and
+                                    ((taicpu(curtai).oper[0]^.cc=taicpu(hp1).oper[0]^.cc) or
+                                     (taicpu(curtai).oper[0]^.cc=inverse_cond(taicpu(hp1).oper[0]^.cc))) then
+                                    begin
+                                      taicpu(curtai).opcode:=getMergedInstruction(taicpu(curtai).opcode,
+                                                                                  taicpu(hp1).opcode,
+                                                                                  taicpu(curtai).oper[0]^.cc=inverse_cond(taicpu(hp1).oper[0]^.cc));
 
-                                    list.Remove(hp1);
-                                    hp1.Free;
-                                  end;
-                              end;
-                          end;
-                      end;
-                  end;
+                                      list.Remove(hp1);
+                                      hp1.Free;
+                                    end;
+                                end;
+                            end;
+                        end;
+                    end;
+                end
+              else
+                ;
             end;
 
             curtai:=tai(curtai.Next);
@@ -1583,6 +1630,8 @@ implementation
                       case taicpu(curtai).opcode of
                         A_AND: taicpu(curtai).opcode:=A_BIC;
                         A_BIC: taicpu(curtai).opcode:=A_AND;
+                        else
+                          internalerror(2019050931);
                       end;
                       taicpu(curtai).oper[2]^.val:=(not taicpu(curtai).oper[2]^.val) and $FFFFFFFF;
                     end
@@ -1595,10 +1644,14 @@ implementation
                       case taicpu(curtai).opcode of
                         A_ADD: taicpu(curtai).opcode:=A_SUB;
                         A_SUB: taicpu(curtai).opcode:=A_ADD;
+                        else
+                          internalerror(2019050930);
                       end;
                       taicpu(curtai).oper[2]^.val:=-taicpu(curtai).oper[2]^.val;
                     end;
                 end;
+              else
+                ;
             end;
 
             curtai:=tai(curtai.Next);
@@ -1646,6 +1699,8 @@ implementation
                       end;
                   end;
                 end;
+              else
+                ;
             end;
 
             curtai:=tai(curtai.Next);
@@ -1671,6 +1726,7 @@ implementation
                            (taicpu(curtai).oper[2]^.typ=top_shifterop) then
                           begin
                             case taicpu(curtai).oper[2]^.shifterop^.shiftmode of
+                              SM_NONE: ;
                               SM_LSL: taicpu(curtai).opcode:=A_LSL;
                               SM_LSR: taicpu(curtai).opcode:=A_LSR;
                               SM_ASR: taicpu(curtai).opcode:=A_ASR;
@@ -1707,8 +1763,12 @@ implementation
                       begin
                         taicpu(curtai).opcode:=A_SVC;
                       end;
+                    else
+                      ;
                   end;
                 end;
+              else
+                ;
             end;
 
             curtai:=tai(curtai.Next);
@@ -2363,6 +2423,10 @@ implementation
                 begin
                   ot:=OT_MODEFLAGS;
                 end;
+              top_realconst:
+                begin
+                  ot:=OT_IMMEDIATEMM;
+                end;
               else
                 internalerror(2004022623);
             end;
@@ -2719,6 +2783,8 @@ implementation
         refoper : poper;
         msb : longint;
         r: byte;
+        singlerec : tcompsinglerec;
+        doublerec : tcompdoublerec;
 
       procedure setshifterop(op : byte);
         var
@@ -2937,6 +3003,7 @@ implementation
           shift:=0;
           typ:=0;
           case oper[op]^.shifterop^.shiftmode of
+            SM_None: ;
             SM_LSL: begin typ:=0; shift:=oper[op]^.shifterop^.shiftimm; end;
             SM_LSR: begin typ:=1; shift:=oper[op]^.shifterop^.shiftimm; if shift=32 then shift:=0; end;
             SM_ASR: begin typ:=2; shift:=oper[op]^.shifterop^.shiftimm; if shift=32 then shift:=0; end;
@@ -3881,36 +3948,76 @@ implementation
                   end;
                 PF_F32:
                   begin
-                    if (getregtype(oper[0]^.reg)<>R_MMREGISTER) or
-                       (getregtype(oper[1]^.reg)<>R_MMREGISTER) then
+                    if (getregtype(oper[0]^.reg)<>R_MMREGISTER) then
                       Message(asmw_e_invalid_opcode_and_operands);
 
+                    case oper[1]^.typ of
+                      top_realconst:
+                        begin
+                          if not(IsVFPFloatImmediate(s32real,oper[1]^.val_real)) then
+                            Message(asmw_e_invalid_opcode_and_operands);
+                          singlerec.value:=oper[1]^.val_real;
+                          singlerec:=tcompsinglerec(NtoLE(DWord(singlerec)));
+
+                          bytes:=bytes or ((singlerec.bytes[2] shr 3) and $f);
+                          bytes:=bytes or (DWord((singlerec.bytes[2] shr 7) and $1) shl 16) or (DWord(singlerec.bytes[3] and $3) shl 17) or (DWord((singlerec.bytes[3] shr 7) and $1) shl 19);
+                        end;
+                      top_reg:
+                        begin
+                          if getregtype(oper[1]^.reg)<>R_MMREGISTER then
+                            Message(asmw_e_invalid_opcode_and_operands);
+                          Rm:=getmmreg(oper[1]^.reg);
+                          bytes:=bytes or (((Rm and $1E) shr 1) shl 0);
+                          bytes:=bytes or ((Rm and $1) shl 5);
+                        end;
+                      else
+                        Message(asmw_e_invalid_opcode_and_operands);
+                    end;
                     Rd:=getmmreg(oper[0]^.reg);
-                    Rm:=getmmreg(oper[1]^.reg);
 
                     bytes:=bytes or (((Rd and $1E) shr 1) shl 12);
                     bytes:=bytes or ((Rd and $1) shl 22);
 
-                    bytes:=bytes or (((Rm and $1E) shr 1) shl 0);
-                    bytes:=bytes or ((Rm and $1) shl 5);
                   end;
                 PF_F64:
                   begin
-                    if (getregtype(oper[0]^.reg)<>R_MMREGISTER) or
-                       (getregtype(oper[1]^.reg)<>R_MMREGISTER) then
+                    if (getregtype(oper[0]^.reg)<>R_MMREGISTER) then
                       Message(asmw_e_invalid_opcode_and_operands);
 
+                    case oper[1]^.typ of
+                      top_realconst:
+                        begin
+                          if not(IsVFPFloatImmediate(s64real,oper[1]^.val_real)) then
+                            Message(asmw_e_invalid_opcode_and_operands);
+                          doublerec.value:=oper[1]^.val_real;
+                          doublerec:=tcompdoublerec(NtoLE(QWord(doublerec)));
+
+                          //      32c:       eeb41b00        vmov.f64        d1, #64 ; 0x40
+
+                          // 32c:       eeb61b00        vmov.f64        d1, #96 ; 0x60
+                          bytes:=bytes or (doublerec.bytes[6] and $f);
+                          bytes:=bytes or (DWord((doublerec.bytes[6] shr 4) and $7) shl 16) or (DWord((doublerec.bytes[7] shr 7) and $1) shl 19);
+                        end;
+                      top_reg:
+                        begin
+                          if getregtype(oper[1]^.reg)<>R_MMREGISTER then
+                            Message(asmw_e_invalid_opcode_and_operands);
+                          Rm:=getmmreg(oper[1]^.reg);
+                          bytes:=bytes or (Rm and $F);
+                          bytes:=bytes or ((Rm and $10) shl 1);
+                        end;
+                      else
+                        Message(asmw_e_invalid_opcode_and_operands);
+                    end;
                     Rd:=getmmreg(oper[0]^.reg);
-                    Rm:=getmmreg(oper[1]^.reg);
 
                     bytes:=bytes or (1 shl 8);
 
                     bytes:=bytes or ((Rd and $F) shl 12);
                     bytes:=bytes or (((Rd and $10) shr 4) shl 22);
-
-                    bytes:=bytes or (Rm and $F);
-                    bytes:=bytes or ((Rm and $10) shl 1);
                   end;
+                else
+                  Message(asmw_e_invalid_opcode_and_operands);
               end;
             end;
           #$41,#$91: // VMRS/VMSR
@@ -4071,6 +4178,8 @@ implementation
                         d:=(rd shr 4) and 1;
                         rd:=rd and $F;
                       end;
+                    else
+                      internalerror(2019050929);
                   end;
 
                   m:=0;
@@ -4091,6 +4200,8 @@ implementation
                         m:=(rm shr 4) and 1;
                         rm:=rm and $F;
                       end;
+                    else
+                      internalerror(2019050928);
                   end;
 
                   bytes:=bytes or (Rd shl 12);
@@ -4107,6 +4218,8 @@ implementation
                     PF_F64S32,
                     PF_F64U32:
                       bytes:=bytes or (1 shl 8);
+                    else
+                      ;
                   end;
 
                   if oppostfix in [PF_S32F32,PF_S32F64,PF_U32F32,PF_U32F64] then
@@ -4115,6 +4228,8 @@ implementation
                         PF_S32F64,
                         PF_S32F32:
                           bytes:=bytes or (1 shl 16);
+                        else
+                          ;
                       end;
 
                       bytes:=bytes or (1 shl 18);
@@ -4185,9 +4300,9 @@ implementation
 
                         rn:=16;
                       end;
-                  else
-                    Rn:=0;
-                    message(asmw_e_invalid_opcode_and_operands);
+                    else
+                      Rn:=0;
+                      message(asmw_e_invalid_opcode_and_operands);
                   end;
 
                   case oppostfix of
@@ -4199,10 +4314,10 @@ implementation
                         bytes:=bytes or (1 shl 8);
                         D:=(rd shr 4) and $1; Rd:=Rd and $F;
                       end;
-                  else
-                    begin
-                      D:=rd and $1; Rd:=Rd shr 1;
-                    end;
+                    else
+                      begin
+                        D:=rd and $1; Rd:=Rd shr 1;
+                      end;
                   end;
 
                   case oppostfix of
@@ -4211,6 +4326,8 @@ implementation
                     PF_F64U16,PF_F32U16,
                     PF_F32U32,PF_F64U32:
                       bytes:=bytes or (1 shl 16);
+                    else
+                      ;
                   end;
 
                   if oppostfix in [PF_S32F32,PF_S32F64,PF_U32F32,PF_U32F64,PF_S16F32,PF_S16F64,PF_U16F32,PF_U16F64] then
@@ -4263,6 +4380,8 @@ implementation
                       bytes:=bytes or (1 shl 23);
                     PF_DB,PF_DBS,PF_DBD,PF_DBX:
                       bytes:=bytes or (2 shl 23);
+                    else
+                      ;
                   end;
 
                   case oppostfix of
@@ -4271,6 +4390,8 @@ implementation
                         bytes:=bytes or (1 shl 8);
                         bytes:=bytes or (1 shl 0); // Offset is odd
                       end;
+                    else
+                      ;
                   end;
 
                   dp_operation:=(oper[1]^.subreg=R_SUBFD);
@@ -4562,6 +4683,8 @@ implementation
                         bytes:=bytes or ((oper[2]^.val shr 2) and $7F);
                       end;
                   end;
+                else
+                  internalerror(2019050926);
               end;
             end;
           #$65: { Thumb load/store }
@@ -4698,6 +4821,8 @@ implementation
                     else
                       bytes:=bytes or (getsupreg(oper[0]^.reg) shl 8);
                   end;
+                else
+                  internalerror(2019050925);
               end;
             end;
           #$6A: { Thumb: IT }
@@ -5303,6 +5428,8 @@ implementation
               case oppostfix of
                 PF_None,PF_IA,PF_FD: bytes:=bytes or ($1 shl 23);
                 PF_DB,PF_EA: bytes:=bytes or ($2 shl 23);
+              else
+                message1(asmw_e_invalid_opcode_and_operands, '"Invalid Postfix"');
               end;
             end;
           #$8D: { Thumb-2: BL/BLX }
@@ -5450,9 +5577,13 @@ implementation
                     bytes:=bytes or (1 shl 24);
 
                   case oppostfix of
+                    PF_S: bytes:=bytes or (0 shl 22) or (0 shl 15);
                     PF_D: bytes:=bytes or (0 shl 22) or (1 shl 15);
                     PF_E: bytes:=bytes or (1 shl 22) or (0 shl 15);
                     PF_P: bytes:=bytes or (1 shl 22) or (1 shl 15);
+                    PF_EP: ;
+                    else
+                      message1(asmw_e_invalid_opcode_and_operands, '"Invalid postfix"');
                   end;
                 end
               else
@@ -5527,6 +5658,7 @@ implementation
                 end;
 
               case roundingmode of
+                RM_NONE: ;
                 RM_P: bytes:=bytes or (1 shl 5);
                 RM_M: bytes:=bytes or (2 shl 5);
                 RM_Z: bytes:=bytes or (3 shl 5);
@@ -5554,6 +5686,7 @@ implementation
                     bytes:=bytes or (getsupreg(oper[1]^.reg) shl 12);
 
                     case roundingmode of
+                      RM_NONE: ;
                       RM_P: bytes:=bytes or (1 shl 5);
                       RM_M: bytes:=bytes or (2 shl 5);
                       RM_Z: bytes:=bytes or (3 shl 5);
@@ -5573,6 +5706,7 @@ implementation
                     bytes:=bytes or (getsupreg(oper[1]^.reg) shl 0);
 
                     case roundingmode of
+                      RM_NONE: ;
                       RM_P: bytes:=bytes or (1 shl 5);
                       RM_M: bytes:=bytes or (2 shl 5);
                       RM_Z: bytes:=bytes or (3 shl 5);
@@ -5602,6 +5736,8 @@ implementation
                         Message(asmw_e_invalid_opcode_and_operands);
                       end;
                   end;
+                else
+                  Message1(asmw_e_invalid_opcode_and_operands, '"Unsupported opcode"');
               end;
             end;
           #$fe: // No written data

+ 16 - 1
compiler/arm/agarmgas.pas

@@ -94,7 +94,9 @@ unit agarmgas;
       begin
         inherited;
         InstrWriter := TArmInstrWriter.create(self);
+{$ifndef llvm}
         if GenerateThumb2Code then
+{$endif}
           TArmInstrWriter(InstrWriter).unified_syntax:=true;
       end;
 
@@ -124,7 +126,11 @@ unit agarmgas;
 
         if target_info.abi = abi_eabihf then
           { options based on what gcc uses on debian armhf }
-          result:='-mfloat-abi=hard -meabi=5 '+result;
+          result:='-mfloat-abi=hard -meabi=5 '+result
+        else if (target_info.abi = abi_eabi) and not(current_settings.fputype = fpu_soft) then
+          result:='-mfloat-abi=softfp -meabi=5 '+result
+        else if (target_info.abi = abi_eabi) and (current_settings.fputype = fpu_soft) then
+          result:='-mfloat-abi=soft -meabi=5 '+result;
       end;
 
     procedure TArmGNUAssembler.WriteExtraHeader;
@@ -201,6 +207,8 @@ unit agarmgas;
                        s:=s+', rrx'
                      else if shiftmode <> SM_None then
                        s:=s+', '+gas_shiftmode2str[shiftmode]+' #'+tostr(shiftimm);
+                     if offset<>0 then
+                       Internalerror(2019012601);
                   end
                 else if offset<>0 then
                   s:=s+', #'+tostr(offset);
@@ -210,6 +218,8 @@ unit agarmgas;
                     s:=s+']';
                   AM_PREINDEXED:
                     s:=s+']!';
+                  else
+                    ;
                 end;
               end;
 
@@ -318,6 +328,11 @@ unit agarmgas;
                   if srF in o.specialflags then getopstr:=getopstr+'f';
                   if srS in o.specialflags then getopstr:=getopstr+'s';
                 end;
+            end;
+          top_realconst:
+            begin
+              str(o.val_real,Result);
+              Result:='#'+Result;
             end
           else
             internalerror(2002070604);

+ 70 - 8
compiler/arm/aoptcpu.pas

@@ -83,6 +83,10 @@ Implementation
     cgobj,procinfo,
     aasmbase,aasmdata;
 
+{ Range check must be disabled explicitly as conversions between signed and unsigned
+  32-bit values are done without explicit typecasts }
+{$R-}
+
   function CanBeCond(p : tai) : boolean;
     begin
       result:=
@@ -113,7 +117,9 @@ Implementation
         (r1.signindex = r2.signindex) and
         (r1.shiftimm = r2.shiftimm) and
         (r1.addressmode = r2.addressmode) and
-        (r1.shiftmode = r2.shiftmode);
+        (r1.shiftmode = r2.shiftmode) and
+        (r1.volatility=[]) and
+        (r2.volatility=[]);
     end;
 
   function MatchInstruction(const instr: tai; const op: TCommonAsmOps; const cond: TAsmConds; const postfix: TOpPostfixes): boolean;
@@ -235,6 +241,8 @@ Implementation
               instructionLoadsFromReg :=
                 (p.oper[I]^.ref^.base = reg) or
                 (p.oper[I]^.ref^.index = reg);
+            else
+              ;
           end;
           if instructionLoadsFromReg then exit; {Bailout if we found something}
           Inc(I);
@@ -294,6 +302,8 @@ Implementation
         A_POP:
           Result := (getsupreg(reg) in p.oper[0]^.regset^) or
                                    (reg=NR_STACK_POINTER_REG);
+        else
+          ;
       end;
 
       if Result then
@@ -310,6 +320,8 @@ Implementation
           Result :=
             (taicpu(p).oper[0]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
             (taicpu(p).oper[0]^.ref^.base = reg);
+        else
+          ;
       end;
     end;
 
@@ -433,6 +445,19 @@ Implementation
 
               { finally get rid of the mov }
               taicpu(p).loadreg(0,taicpu(movp).oper[0]^.reg);
+              { Remove preindexing and postindexing for LDR in some cases.
+                For example:
+                  ldr	reg2,[reg1, xxx]!
+                  mov reg1,reg2
+                must be translated to:
+                  ldr	reg1,[reg1, xxx]
+
+                Preindexing must be removed there, since the same register is used as the base and as the target.
+                Such case is not allowed for ARM CPU and produces crash. }
+              if (taicpu(p).opcode = A_LDR) and (taicpu(p).oper[1]^.typ = top_ref)
+                and (taicpu(movp).oper[0]^.reg = taicpu(p).oper[1]^.ref^.base)
+              then
+                taicpu(p).oper[1]^.ref^.addressmode:=AM_OFFSET;
               asml.remove(movp);
               movp.free;
             end;
@@ -627,7 +652,6 @@ Implementation
     var
       hp1,hp2,hp3,hp4: tai;
       i, i2: longint;
-      TmpUsedRegs: TAllUsedRegs;
       tempop: tasmop;
       oldreg: tregister;
       dealloc: tai_regalloc;
@@ -913,7 +937,7 @@ Implementation
                           MatchInstruction(hp2, A_STR, [taicpu(p).condition], [PF_H]) and
                           MatchOperand(taicpu(hp2).oper[0]^, taicpu(p).oper[0]^.reg) then
                           begin
-                            CopyUsedRegs(TmpUsedRegs);
+                            TransferUsedRegs(TmpUsedRegs);
                             UpdateUsedRegs(TmpUsedRegs, tai(p.next));
                             UpdateUsedRegs(TmpUsedRegs, tai(hp1.next));
                             if not(RegUsedAfterInstruction(taicpu(p).oper[0]^.reg,hp2,TmpUsedRegs)) then
@@ -927,7 +951,6 @@ Implementation
                                 p:=hp2;
                                 Result:=true;
                               end;
-                            ReleaseUsedRegs(TmpUsedRegs);
                           end
                         { fold
                           mov reg1,reg0, shift imm1
@@ -1331,7 +1354,7 @@ Implementation
                         if taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
                           taicpu(hp1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
 
-                        dealloc:=FindRegDeAlloc(taicpu(p).oper[1]^.reg, taicpu(p.Next));
+                        dealloc:=FindRegDeAlloc(taicpu(p).oper[1]^.reg, tai(p.Next));
                         if Assigned(dealloc) then
                           begin
                             asml.remove(dealloc);
@@ -1951,6 +1974,7 @@ Implementation
                       strb reg1,[...]
                     }
                     if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
+                      (taicpu(p).ops=2) and
                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       MatchInstruction(hp1, A_STR, [C_None], [PF_B]) and
                       assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
@@ -1976,6 +2000,7 @@ Implementation
                       uxtb reg3,reg1
                     }
                     else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
+                      (taicpu(p).ops=2) and
                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
                       (taicpu(hp1).ops = 2) and
@@ -1999,6 +2024,7 @@ Implementation
                       uxtb reg3,reg1
                     }
                     else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
+                      (taicpu(p).ops=2) and
                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
                       (taicpu(hp1).ops = 2) and
@@ -2022,8 +2048,8 @@ Implementation
                       uxtb reg3,reg1
                     }
                     else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
-                      GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       (taicpu(p).ops=2) and
+                      GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
                       (taicpu(hp1).ops=3) and
                       (taicpu(hp1).oper[2]^.typ=top_const) and
@@ -2058,6 +2084,7 @@ Implementation
                       strh reg1,[...]
                     }
                     if MatchInstruction(p, taicpu(p).opcode, [C_None], [PF_None]) and
+                      (taicpu(p).ops=2) and
                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       MatchInstruction(hp1, A_STR, [C_None], [PF_H]) and
                       RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
@@ -2083,6 +2110,7 @@ Implementation
                       uxth reg3,reg1
                     }
                     else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
+                      (taicpu(p).ops=2) and
                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
                       (taicpu(hp1).ops=2) and
@@ -2109,6 +2137,7 @@ Implementation
                       uxth reg3,reg1
                     }
                     else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
+                      (taicpu(p).ops=2) and
                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
                       (taicpu(hp1).ops=3) and
@@ -2229,8 +2258,12 @@ Implementation
                       RemoveSuperfluousVMov(p, hp1, 'VOpVMov2VOp') then
                       Result:=true;
                   end
+                else
+                  ;
               end;
           end;
+        else
+          ;
       end;
     end;
 
@@ -2408,8 +2441,12 @@ Implementation
                                 end;
                            end;
                       end;
+                  else
+                    ;
                 end;
               end;
+            else
+              ;
           end;
           p := tai(p.next)
         end;
@@ -2489,6 +2526,8 @@ Implementation
             for r:=RS_R0 to RS_R15 do
                if r in p.oper[i]^.regset^ then
                  CheckLiveStart(newreg(R_INTREGISTER,r,R_SUBWHOLE));
+          else
+            ;
         end;
 
       { if live of any reg used by hp1 ends at hp1 and p uses this register then
@@ -2508,6 +2547,8 @@ Implementation
             for r:=RS_R0 to RS_R15 do
                if r in hp1.oper[i]^.regset^ then
                  CheckLiveEnd(newreg(R_INTREGISTER,r,R_SUBWHOLE));
+          else
+            ;
         end;
     end;
 
@@ -2516,6 +2557,15 @@ Implementation
 
   { TODO : schedule also forward }
   { TODO : schedule distance > 1 }
+
+    { returns true if p might be a load of a pc relative tls offset }
+    function PossibleTLSLoad(const p: tai) : boolean;
+      begin
+        Result:=(p.typ=ait_instruction) and (taicpu(p).opcode=A_LDR) and (taicpu(p).oper[1]^.typ=top_ref) and (((taicpu(p).oper[1]^.ref^.base=NR_PC) and
+          (taicpu(p).oper[1]^.ref^.index<>NR_NO)) or ((taicpu(p).oper[1]^.ref^.base<>NR_NO) and
+          (taicpu(p).oper[1]^.ref^.index=NR_PC)));
+      end;
+
     var
       hp1,hp2,hp3,hp4,hp5,insertpos : tai;
       list : TAsmList;
@@ -2572,7 +2622,9 @@ Implementation
             ) and
             { if we modify the basereg AND the first instruction used that reg, we can not schedule }
             ((taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) or
-             not(instructionLoadsFromReg(taicpu(hp1).oper[1]^.ref^.base,p))) then
+             not(instructionLoadsFromReg(taicpu(hp1).oper[1]^.ref^.base,p))) and
+            not(PossibleTLSLoad(p)) and
+            not(PossibleTLSLoad(hp1)) then
             begin
               hp3:=tai(p.Previous);
               hp5:=tai(p.next);
@@ -2693,7 +2745,11 @@ Implementation
                       A_ITETT:
                         if l=4 then taicpu(hp).opcode := A_ITET;
                       A_ITTTT:
-                        if l=4 then taicpu(hp).opcode := A_ITTT;
+                        begin
+                          if l=4 then taicpu(hp).opcode := A_ITTT;
+                        end
+                      else
+                        ;
                     end;
 
                   break;
@@ -2924,8 +2980,12 @@ Implementation
                                 end;
                            end;
                       end;
+                  else
+                    ;
                 end;
               end;
+            else
+              ;
           end;
           p := tai(p.next)
         end;
@@ -3076,6 +3136,8 @@ Implementation
                 SM_LSR: taicpu(p).opcode:=A_LSR;
                 SM_ASR: taicpu(p).opcode:=A_ASR;
                 SM_ROR: taicpu(p).opcode:=A_ROR;
+                else
+                  internalerror(2019050912);
               end;
 
               if taicpu(p).oper[2]^.shifterop^.rs<>NR_NO then

+ 19 - 6
compiler/arm/aoptcpub.pas

@@ -76,10 +76,6 @@ Const
 
   MaxCh = 3;
 
-{ the maximum number of operands an instruction has }
-
-  MaxOps = 4;
-
 {Oper index of operand that contains the source (reference) with a load }
 {instruction                                                            }
 
@@ -123,14 +119,31 @@ Implementation
       i : Longint;
     begin
       result:=false;
+      case taicpu(p1).opcode of
+        A_LDR:
+          begin
+            { special handling for LDRD }
+            if (taicpu(p1).oppostfix=PF_D) and (getsupreg(taicpu(p1).oper[0]^.reg)+1=getsupreg(Reg)) then
+              begin
+                result:=true;
+                exit;
+              end;
+          end;
+        else
+          ;
+      end;
       for i:=0 to taicpu(p1).ops-1 do
         case taicpu(p1).oper[i]^.typ of
           top_reg:
             if (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
               exit(true);
           top_ref:
-            if (taicpu(p1).spilling_get_operation_type_ref(i,Reg)<>operand_read) then
-              exit(true);
+            begin
+              if (taicpu(p1).spilling_get_operation_type_ref(i,Reg)<>operand_read) then
+                exit(true);
+            end
+          else
+            ;
         end;
     end;
 

+ 6 - 1
compiler/arm/armins.dat

@@ -321,6 +321,7 @@ reg32,memam2              \x17\x04\x50                   ARM32,ARMv4
 reglo,memam3              \x65\x58\x0\2                  THUMB,ARMv4T
 reglo,memam4              \x66\x68\x0\2                  THUMB,ARMv4T
 reglo,memam5              \x67\x98\x0\2                  THUMB,ARMv4T
+reglo,memam2              \x67\x98\x0\2                  THUMB,ARMv4T
 reglo,memam6              \x67\x48\x0\2                  THUMB,ARMv4T
 reg32,memam2              \x88\xF8\x50\x0\x0\0           THUMB32,WIDE,ARMv6T2
 reg32,memam2              \x17\x04\x10                   ARM32,ARMv4
@@ -402,6 +403,7 @@ reg32,regf          \x10\x01\x0F                        ARM32,ARMv4
 regf,reg32          \x96\xF3\x80\x80\x0                 THUMB32,ARMv6
 
 regf,reg32          \x12\x01\x20\xF0                    ARM32,ARMv4
+regs,reg32          \x12\x01\x20\xF0                    ARM32,ARMv4
 regf,immshifter     \x13\x03\x20\xF0                    ARM32,ARMv4
 regs,immshifter     \x13\x03\x20\xF0                    ARM32,ARMv4
 
@@ -428,7 +430,9 @@ reg32,immshifter       \xB\x1\xE0                       ARM32,ARMv4
 
 [VMOVcc]
 vreg,vreg         \x90\xEE\xB0\xA\x40            THUMB32,VFPv2
-vreg,vreg         \x40\xE\xB0\xA\x40            ARM32,VFPv2
+vreg,vreg         \x40\xE\xB0\xA\x40             ARM32,VFPv2
+vreg,immmm         \x90\xEE\xB0\xA\x0             THUMB32,VFPv3
+vreg,immmm         \x40\xE\xB0\xA\x0              ARM32,VFPv3
 
 reg32,vreg        \x90\xEE\x10\xA\x10            THUMB32,VFPv2
 vreg,reg32        \x90\xEE\x00\xA\x10            THUMB32,VFPv2
@@ -540,6 +544,7 @@ reg32,reglist		          \x26\x80			   ARM32,ARMv4
 reglo,memam3                \x65\x50\x0\2                  THUMB,ARMv4T
 reglo,memam4                \x66\x60\x0\2                  THUMB,ARMv4T
 reglo,memam5                \x67\x90\x0\2                  THUMB,ARMv4T
+reglo,memam2                \x67\x90\x0\2                  THUMB,ARMv4T
 reg32,memam2                \x88\xF8\x40\x0\x0\0           THUMB32,WIDE,ARMv6T2
 reg32,memam2                \x17\x04\x00                   ARM32,ARMv4
 

+ 1 - 1
compiler/arm/armnop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from armins.dat }
-956;
+961;

+ 35 - 0
compiler/arm/armtab.inc

@@ -1043,6 +1043,13 @@
     code    : #103#152#0#2;
     flags   : if_thumb or if_armv4t
   ),
+  (
+    opcode  : A_LDR;
+    ops     : 2;
+    optypes : (ot_reglo,ot_memoryam2,ot_none,ot_none,ot_none,ot_none);
+    code    : #103#152#0#2;
+    flags   : if_thumb or if_armv4t
+  ),
   (
     opcode  : A_LDR;
     ops     : 2;
@@ -1351,6 +1358,13 @@
     code    : #18#1#32#240;
     flags   : if_arm32 or if_armv4
   ),
+  (
+    opcode  : A_MSR;
+    ops     : 2;
+    optypes : (ot_regs,ot_reg32,ot_none,ot_none,ot_none,ot_none);
+    code    : #18#1#32#240;
+    flags   : if_arm32 or if_armv4
+  ),
   (
     opcode  : A_MSR;
     ops     : 2;
@@ -1470,6 +1484,20 @@
     code    : #64#14#176#10#64;
     flags   : if_arm32 or if_vfpv2
   ),
+  (
+    opcode  : A_VMOV;
+    ops     : 2;
+    optypes : (ot_vreg,ot_immediatemm,ot_none,ot_none,ot_none,ot_none);
+    code    : #144#238#176#10#0;
+    flags   : if_thumb32 or if_vfpv3
+  ),
+  (
+    opcode  : A_VMOV;
+    ops     : 2;
+    optypes : (ot_vreg,ot_immediatemm,ot_none,ot_none,ot_none,ot_none);
+    code    : #64#14#176#10#0;
+    flags   : if_arm32 or if_vfpv3
+  ),
   (
     opcode  : A_VMOV;
     ops     : 2;
@@ -1995,6 +2023,13 @@
     code    : #103#144#0#2;
     flags   : if_thumb or if_armv4t
   ),
+  (
+    opcode  : A_STR;
+    ops     : 2;
+    optypes : (ot_reglo,ot_memoryam2,ot_none,ot_none,ot_none,ot_none);
+    code    : #103#144#0#2;
+    flags   : if_thumb or if_armv4t
+  ),
   (
     opcode  : A_STR;
     ops     : 2;

+ 125 - 62
compiler/arm/cgcpu.pas

@@ -42,7 +42,9 @@ unit cgcpu;
         cgsetflags : boolean;
 
         procedure a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);override;
-        procedure a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const paraloc : TCGPara);override;
+       protected
+         procedure a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation); override;
+       public
         procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);override;
 
         procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
@@ -107,13 +109,15 @@ unit cgcpu;
         { try to generate optimized 32 Bit multiplication, returns true if successful generated }
         function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
 
-        { clear out potential overflow bits from 8 or 16 bit operations  }
-        { the upper 24/16 bits of a register after an operation          }
+        { clear out potential overflow bits from 8 or 16 bit operations
+          the upper 24/16 bits of a register after an operation          }
         procedure maybeadjustresult(list: TAsmList; op: TOpCg; size: tcgsize; dst: tregister);
 
         { mla for thumb requires that none of the registers is equal to r13/r15, this method ensures this }
         procedure safe_mla(list: TAsmList;op1,op2,op3,op4 : TRegister);
 
+
+        procedure g_maybe_tls_init(list : TAsmList); override;
       end;
 
       { tcgarm is shared between normal arm and thumb-2 }
@@ -241,6 +245,10 @@ unit cgcpu;
        procinfo,cpupi,
        paramgr;
 
+{ Range check must be disabled explicitly as conversions between signed and unsigned
+  32-bit values are done without explicit typecasts }
+{$R-}
+
 
     function get_fpu_postfix(def : tdef) : toppostfix;
       begin
@@ -565,52 +573,16 @@ unit cgcpu;
       end;
 
 
-    procedure tbasecgarm.a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const paraloc : TCGPara);
-      var
-        tmpref, ref: treference;
-        location: pcgparalocation;
-        sizeleft: aint;
+    procedure tbasecgarm.a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation);
       begin
-        location := paraloc.location;
-        tmpref := r;
-        sizeleft := paraloc.intsize;
-        while assigned(location) do
+        { doubles in softemu mode have a strange order of registers and references }
+        if (cgpara.size=OS_F64) and
+           (location^.size=OS_32) then
           begin
-            paramanager.allocparaloc(list,location);
-            case location^.loc of
-              LOC_REGISTER,LOC_CREGISTER:
-                a_load_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
-              LOC_REFERENCE:
-                begin
-                  reference_reset_base(ref,location^.reference.index,location^.reference.offset,ctempposinvalid,paraloc.alignment,[]);
-                  { doubles in softemu mode have a strange order of registers and references }
-                  if location^.size=OS_32 then
-                    g_concatcopy(list,tmpref,ref,4)
-                  else
-                    begin
-                      g_concatcopy(list,tmpref,ref,sizeleft);
-                      if assigned(location^.next) then
-                        internalerror(2005010710);
-                    end;
-                end;
-              LOC_FPUREGISTER,LOC_CFPUREGISTER:
-                case location^.size of
-                   OS_F32, OS_F64:
-                     a_loadfpu_ref_reg(list,location^.size,location^.size,tmpref,location^.register);
-                   else
-                     internalerror(2002072801);
-                end;
-              LOC_VOID:
-                begin
-                  // nothing to do
-                end;
-              else
-                internalerror(2002081103);
-            end;
-            inc(tmpref.offset,tcgsize2size[location^.size]);
-            dec(sizeleft,tcgsize2size[location^.size]);
-            location := location^.next;
-          end;
+            g_concatcopy(list,ref,paralocref,4)
+          end
+        else
+          inherited;
       end;
 
 
@@ -926,9 +898,11 @@ unit cgcpu;
               a_load_const_reg(list, size, a, dst);
               exit;
             end;
+          else
+            ;
         end;
         ovloc.loc:=LOC_VOID;
-        if {$ifopt R+}(a<>-2147483648) and{$endif} not setflags and is_shifter_const(-a,shift) then
+        if (a<>-2147483648) and not setflags and is_shifter_const(-a,shift) then
           case op of
             OP_ADD:
               begin
@@ -940,6 +914,8 @@ unit cgcpu;
                 op:=OP_ADD;
                 a:=aint(dword(-a));
               end
+            else
+              ;
           end;
 
         if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
@@ -988,6 +964,8 @@ unit cgcpu;
                       ovloc.resflags:=F_CS;
                     OP_SUB:
                       ovloc.resflags:=F_CC;
+                    else
+                      internalerror(2019050922);
                   end;
                 end;
           end
@@ -1031,7 +1009,7 @@ unit cgcpu;
             { Doing two shifts instead of two bics might allow the peephole optimizer to fold the second shift
               into the following instruction}
             else if (op = OP_AND) and
-                    is_continuous_mask(a, lsb, width) and
+                    is_continuous_mask(aword(a), lsb, width) and
                     ((lsb = 0) or ((lsb + width) = 32)) then
               begin
                 shifterop_reset(so);
@@ -1899,6 +1877,10 @@ unit cgcpu;
             firstfloatreg:=RS_NO;
             mmregs:=[];
             case current_settings.fputype of
+              fpu_none,
+              fpu_soft,
+              fpu_libgcc:
+                ;
               fpu_fpa,
               fpu_fpa10,
               fpu_fpa11:
@@ -1924,6 +1906,8 @@ unit cgcpu;
                     as the even ones by with a different subtype as it is done on x86 with al/ah }
                   mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
                 end;
+              else
+                internalerror(2019050924);
             end;
             a_reg_alloc(list,NR_STACK_POINTER_REG);
             if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
@@ -2108,9 +2092,11 @@ unit cgcpu;
                      if mmregs<>[] then
                        list.concat(taicpu.op_ref_regset(A_VSTM,ref,R_MMREGISTER,R_SUBFD,mmregs));
                    end;
+                 else
+                   internalerror(2019050923);
                end;
              end;
-        end;
+          end;
       end;
 
 
@@ -2137,6 +2123,10 @@ unit cgcpu;
             mmregs:=[];
             saveregs:=[];
             case current_settings.fputype of
+              fpu_none,
+              fpu_soft,
+              fpu_libgcc:
+                ;
               fpu_fpa,
               fpu_fpa10,
               fpu_fpa11:
@@ -2166,6 +2156,8 @@ unit cgcpu;
                     as the even ones by with a different subtype as it is done on x86 with al/ah }
                   mmregs:=(rg[R_MMREGISTER].used_in_proc-paramanager.get_volatile_registers_mm(pocall_stdcall))*[0..31];
                 end;
+              else
+                internalerror(2019050926);
             end;
 
             if (firstfloatreg<>RS_NO) or
@@ -2214,6 +2206,8 @@ unit cgcpu;
                      if mmregs<>[] then
                        list.concat(taicpu.op_ref_regset(A_VLDM,ref,R_MMREGISTER,R_SUBFD,mmregs));
                     end;
+                  else
+                    internalerror(2019050921);
                 end;
               end;
 
@@ -2472,6 +2466,8 @@ unit cgcpu;
                     a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,tmpreg);
                 indirection_done:=true;
               end
+            else if ref.refaddr=addr_gottpoff then
+              current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
             else if (cs_create_pic in current_settings.moduleswitches) then
               if (tf_pic_uses_got in target_info.flags) then
                 current_procinfo.aktlocaldata.concat(tai_const.Create_type_sym(aitconst_got,ref.symbol))
@@ -2512,6 +2508,11 @@ unit cgcpu;
                (tf_pic_uses_got in target_info.flags) and
                assigned(ref.symbol) then
               begin
+                {$ifdef EXTDEBUG}
+                if not (pi_needs_got in current_procinfo.flags) then
+                	Comment(V_warning,'pi_needs_got not included');
+                {$endif EXTDEBUG}
+                Include(current_procinfo.flags,pi_needs_got);
                 reference_reset(tmpref,4,[]);
                 tmpref.base:=current_procinfo.got;
                 tmpref.index:=tmpreg;
@@ -2678,6 +2679,21 @@ unit cgcpu;
           list.concat(taicpu.op_reg_reg(A_MOV,destreg,destreg));
         end;
 
+      { save estimation, if a creating a separate ref is needed or
+        if we can keep the original reference while copying }
+      function SimpleRef(const ref : treference) : boolean;
+        begin
+          result:=((ref.base=NR_PC) and (ref.addressmode=AM_OFFSET) and (ref.refaddr in [addr_full,addr_no])) or
+              ((ref.symbol=nil) and
+               (ref.addressmode=AM_OFFSET) and
+               (((ref.offset>=0) and (ref.offset+len<=31)) or
+                (not(GenerateThumbCode) and (ref.offset>=-255) and (ref.offset+len<=255)) or
+                { ldrh has a limited offset range }
+                (not(GenerateThumbCode) and ((len mod 4) in [0,1]) and (ref.offset>=-4095) and (ref.offset+len<=4095))
+               )
+              );
+        end;
+
       { will never be called with count<=4 }
       procedure genloop_thumb(count : aword;size : byte);
 
@@ -2784,17 +2800,15 @@ unit cgcpu;
           begin
             tmpregi:=0;
 
-            srcreg:=getintregister(list,OS_ADDR);
-
-            { explicit pc relative addressing, could be
-              e.g. a floating point constant }
-            if source.base=NR_PC then
+            { loading address in a separate register needed? }
+            if SimpleRef(source) then
               begin
                 { ... then we don't need a loadaddr }
                 srcref:=source;
               end
             else
               begin
+                srcreg:=getintregister(list,OS_ADDR);
                 a_loadaddr_ref_reg(list,source,srcreg);
                 reference_reset_base(srcref,srcreg,0,source.temppos,source.alignment,source.volatility);
               end;
@@ -2808,9 +2822,15 @@ unit cgcpu;
                 dec(len,4);
               end;
 
-            destreg:=getintregister(list,OS_ADDR);
-            a_loadaddr_ref_reg(list,dest,destreg);
-            reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
+            { loading address in a separate register needed? }
+            if SimpleRef(dest) then
+              dstref:=dest
+            else
+              begin
+                destreg:=getintregister(list,OS_ADDR);
+                a_loadaddr_ref_reg(list,dest,destreg);
+                reference_reset_base(dstref,destreg,0,dest.temppos,dest.alignment,dest.volatility);
+              end;
             tmpregi2:=1;
             while (tmpregi2<=tmpregi) do
               begin
@@ -2942,7 +2962,7 @@ unit cgcpu;
               if not((def.typ=pointerdef) or
                     ((def.typ=orddef) and
                      (torddef(def).ordtype in [u64bit,u16bit,u32bit,u8bit,uchar,
-                                               pasbool8,pasbool16,pasbool32,pasbool64]))) then
+                                               pasbool1,pasbool8,pasbool16,pasbool32,pasbool64]))) then
                  ai.SetCondition(C_VC)
               else
                 if TAiCpu(List.Last).opcode in [A_RSB,A_RSC,A_SBC,A_SUB] then
@@ -3046,6 +3066,8 @@ unit cgcpu;
         case instr.opcode of
           A_VMOV:
             add_move_instruction(instr);
+          else
+            ;
         end;
       end;
 
@@ -3075,6 +3097,10 @@ unit cgcpu;
               if (fromsize<>tosize) then
                 internalerror(2009112901);
             end;
+          OS_F32,OS_F64:
+            ;
+          else
+            internalerror(2019050920);
         end;
 
         if (fromsize<>tosize) then
@@ -3136,6 +3162,10 @@ unit cgcpu;
               if (fromsize<>tosize) then
                 internalerror(2009112901);
             end;
+          OS_F32,OS_F64:
+            ;
+          else
+            internalerror(2019050919);
         end;
 
         if (fromsize<>tosize) then
@@ -3267,6 +3297,15 @@ unit cgcpu;
       end;
 
 
+    procedure tbasecgarm.g_maybe_tls_init(list : TAsmList);
+      begin
+        list.concat(tai_regalloc.alloc(NR_R0,nil));
+        a_call_name(list,'fpc_read_tp',false);
+        a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_R0,current_procinfo.tlsoffset);
+        list.concat(tai_regalloc.dealloc(NR_R0,nil));
+      end;
+
+
     procedure tcg64farm.a_op64_reg_reg(list : TAsmList;op:TOpCG;size : tcgsize;regsrc,regdst : tregister64);
       begin
         case op of
@@ -3340,6 +3379,8 @@ unit cgcpu;
           OP_NEG,
           OP_NOT :
             internalerror(2012022501);
+          else
+            ;
         end;
         if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
           begin
@@ -3404,6 +3445,8 @@ unit cgcpu;
                     ovloc.resflags:=F_CS;
                   OP_SUB:
                     ovloc.resflags:=F_CC;
+                  else
+                    internalerror(2019050918);
                 end;
               end;
           end
@@ -3477,6 +3520,8 @@ unit cgcpu;
           OP_NEG,
           OP_NOT :
             internalerror(2012022502);
+          else
+            ;
         end;
         if (setflags or tbasecgarm(cg).cgsetflags) and (op in [OP_ADD,OP_SUB]) then
           begin
@@ -3505,6 +3550,8 @@ unit cgcpu;
                     ovloc.resflags:=F_CS;
                   OP_SUB:
                     ovloc.resflags:=F_CC;
+                  else
+                    internalerror(2019050917);
                 end;
               end;
           end
@@ -4080,6 +4127,8 @@ unit cgcpu;
                 op:=OP_ADD;
                 a:=aint(dword(-a));
               end
+            else
+              ;
           end;
 
         if is_thumb_imm(a) and (op in [OP_ADD,OP_SUB]) then
@@ -4099,6 +4148,8 @@ unit cgcpu;
                   OP_SUB:
                     //!!! ovloc.resflags:=F_CC;
                     ;
+                  else
+                    ;
                 end;
               end;
           end
@@ -4428,6 +4479,11 @@ unit cgcpu;
               OS_S8: list.concat(taicpu.op_reg_reg(A_SXTB,dst,dst));
               OS_16: list.concat(taicpu.op_reg_reg(A_UXTH,dst,dst));
               OS_S16: list.concat(taicpu.op_reg_reg(A_SXTH,dst,dst));
+              OS_32,
+              OS_S32:
+                ;
+              else
+                internalerror(2019050916);
             end;
           end
         else
@@ -4443,7 +4499,7 @@ unit cgcpu;
         l1 : longint;
       begin
         ovloc.loc:=LOC_VOID;
-        if {$ifopt R+}(a<>-2147483648) and{$endif} is_shifter_const(-a,shift) then
+        if (a<>-2147483648) and is_shifter_const(-a,shift) then
           case op of
             OP_ADD:
               begin
@@ -4455,6 +4511,8 @@ unit cgcpu;
                 op:=OP_ADD;
                 a:=aint(dword(-a));
               end
+            else
+              ;
           end;
 
         if is_shifter_const(a,shift) and not(op in [OP_IMUL,OP_MUL]) then
@@ -4559,6 +4617,8 @@ unit cgcpu;
                       ovloc.resflags:=F_CS;
                     OP_SUB:
                       ovloc.resflags:=F_CC;
+                    else
+                      ;
                   end;
                 end;
           end
@@ -4616,7 +4676,7 @@ unit cgcpu;
               list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
             else if (op = OP_AND) and is_thumb32_imm(not(dword(a))) then
               list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
-            else if (op = OP_AND) and is_continuous_mask(not(a), shift, width) then
+            else if (op = OP_AND) and is_continuous_mask(aword(not(a)), shift, width) then
               begin
                 a_load_reg_reg(list,size,size,src,dst);
                 list.concat(taicpu.op_reg_const_const(A_BFC,dst,shift,width))
@@ -5022,7 +5082,10 @@ unit cgcpu;
                 cg.a_label(current_procinfo.aktlocaldata,l);
                 tmpref.symboldata:=current_procinfo.aktlocaldata.last;
 
-                current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
+                if ref.refaddr=addr_gottpoff then
+                  current_procinfo.aktlocaldata.concat(tai_const.Create_rel_sym_offset(aitconst_gottpoff,ref.symbol,ref.relsymbol,ref.offset))
+                else
+                  current_procinfo.aktlocaldata.concat(tai_const.create_sym_offset(ref.symbol,ref.offset));
 
                 { load consts entry }
                 tmpref.symbol:=l;

+ 43 - 3
compiler/arm/cpubase.pas

@@ -377,8 +377,9 @@ unit cpubase;
       doesn't handle ROR_C detection }
     function is_thumb32_imm(d : aint) : boolean;
     function split_into_shifter_const(value : aint;var imm1: dword; var imm2: dword):boolean;
-    function is_continuous_mask(d : aint;var lsb, width: byte) : boolean;
+    function is_continuous_mask(d : aword;var lsb, width: byte) : boolean;
     function dwarf_reg(r:tregister):shortint;
+    function dwarf_reg_no_error(r:tregister):shortint;
 
     function IsIT(op: TAsmOp) : boolean;
     function GetITLevels(op: TAsmOp) : longint;
@@ -387,6 +388,8 @@ unit cpubase;
     function GenerateThumbCode : boolean;
     function GenerateThumb2Code : boolean;
 
+    function IsVFPFloatImmediate(ft : tfloattype;value : bestreal) : boolean;
+
   implementation
 
     uses
@@ -413,8 +416,11 @@ unit cpubase;
           R_MMREGISTER:
             begin
               case s of
+                { records passed in MM registers }
+                OS_32,
                 OS_F32:
                   cgsize2subreg:=R_SUBFS;
+                OS_64,
                 OS_F64:
                   cgsize2subreg:=R_SUBFD;
                 else
@@ -607,7 +613,7 @@ unit cpubase;
           end;
       end;
     
-    function is_continuous_mask(d : aint;var lsb, width: byte) : boolean;
+    function is_continuous_mask(d : aword;var lsb, width: byte) : boolean;
       var
         msb : byte;
       begin
@@ -616,7 +622,7 @@ unit cpubase;
         
         width:=msb-lsb+1;
         
-        result:=(lsb<>255) and (msb<>255) and ((((1 shl (msb-lsb+1))-1) shl lsb) = d);
+        result:=(lsb<>255) and (msb<>255) and (aword(((1 shl (msb-lsb+1))-1) shl lsb) = d);
       end;
 
 
@@ -652,6 +658,11 @@ unit cpubase;
           internalerror(200603251);
       end;
 
+    function dwarf_reg_no_error(r:tregister):shortint;
+      begin
+        result:=regdwarf_table[findreg_by_number(r)];
+      end;
+
       { Low part of 64bit return value }
     function NR_FUNCTION_RESULT64_LOW_REG: tregister; {$ifdef USEINLINE}inline;{$endif USEINLINE}
     begin
@@ -736,5 +747,34 @@ unit cpubase;
       end;
 
 
+    function IsVFPFloatImmediate(ft : tfloattype;value : bestreal) : boolean;
+      var
+        singlerec : tcompsinglerec;
+        doublerec : tcompdoublerec;
+      begin
+        Result:=false;
+        case ft of
+          s32real:
+            begin
+              singlerec.value:=value;
+              singlerec:=tcompsinglerec(NtoLE(DWord(singlerec)));
+              Result:=(singlerec.bytes[0]=0) and (singlerec.bytes[1]=0) and ((singlerec.bytes[2] and 7)=0)  and
+                (((singlerec.bytes[3] and $7e)=$40) or ((singlerec.bytes[3] and $7e)=$3e));
+            end;
+          s64real:
+            begin
+              doublerec.value:=value;
+              doublerec:=tcompdoublerec(NtoLE(QWord(doublerec)));
+              Result:=(doublerec.bytes[0]=0) and (doublerec.bytes[1]=0) and (doublerec.bytes[2]=0) and
+                      (doublerec.bytes[3]=0) and (doublerec.bytes[4]=0) and (doublerec.bytes[5]=0) and
+                      ((((doublerec.bytes[6] and $7f)=$40) and ((doublerec.bytes[7] and $c0)=0)) or
+                       (((doublerec.bytes[6] and $7f)=$3f) and ((doublerec.bytes[7] and $c0)=$c0)));
+            end;
+          else
+            ;
+        end;
+      end;
+
+
 end.
 

+ 2 - 0
compiler/arm/cpuelf.pas

@@ -588,6 +588,8 @@ implementation
               data.Write(zero,4);
               continue;
             end;
+          else
+            ;
         end;
 
         if (objreloc.flags and rf_raw)=0 then

+ 1 - 0
compiler/arm/cpunode.pas

@@ -38,6 +38,7 @@ unit cpunode;
        narmcal,
        narmmat,
        narminl,
+       narmld,
        narmcnv,
        narmcon,
        narmset,

+ 174 - 58
compiler/arm/cpupara.pas

@@ -30,10 +30,10 @@ unit cpupara;
        globtype,globals,
        aasmdata,
        cpuinfo,cpubase,cgbase,cgutils,
-       symconst,symtype,symdef,parabase,paramgr;
+       symconst,symtype,symdef,parabase,paramgr,armpara;
 
     type
-       tcpuparamanager = class(tparamanager)
+       tcpuparamanager = class(tarmgenparamanager)
           function get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;override;
           function get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;override;
           function get_volatile_registers_mm(calloption : tproccalloption):tcpuregisterset;override;
@@ -42,14 +42,17 @@ unit cpupara;
           function ret_in_param(def:tdef;pd:tabstractprocdef):boolean;override;
           procedure getintparaloc(list: TAsmList; pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
-          function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
+          function create_varargs_paraloc_info(p : tabstractprocdef; side: tcallercallee; varargspara:tvarargsparalist):longint;override;
           function get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
          private
+          function usemmpararegs(calloption: tproccalloption; variadic: boolean): boolean;
+          function getparaloc(calloption : tproccalloption; p : tdef; isvariadic: boolean) : tcgloc;
           procedure init_values(p: tabstractprocdef; side: tcallercallee; var curintreg,
             curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword;
             var sparesinglereg: tregister);
           function create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
             var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword; var sparesinglereg: tregister; isvariadic: boolean):longint;
+          procedure paradeftointparaloc(paradef: tdef; paracgsize: tcgsize; out paralocdef: tdef; out paralocsize: tcgsize);
        end;
 
   implementation
@@ -84,7 +87,7 @@ unit cpupara;
 
     function tcpuparamanager.get_saved_registers_int(calloption : tproccalloption):tcpuregisterarray;
       const
-        saved_regs : array[0..6] of tsuperregister =
+        saved_regs : {$ifndef VER3_0}tcpuregisterarray{$else}array[0..6] of tsuperregister{$endif} =
           (RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,RS_R9,RS_R10);
       begin
         result:=saved_regs;
@@ -130,7 +133,9 @@ unit cpupara;
       end;
 
 
-    function getparaloc(calloption : tproccalloption; p : tdef; isvariadic: boolean) : tcgloc;
+    function tcpuparamanager.getparaloc(calloption : tproccalloption; p : tdef; isvariadic: boolean) : tcgloc;
+      var
+        basedef: tdef;
       begin
          { Later, the LOC_REFERENCE is in most cases changed into LOC_REGISTER
            if push_addr_param for the def is true
@@ -160,7 +165,11 @@ unit cpupara;
             classrefdef:
               getparaloc:=LOC_REGISTER;
             recorddef:
-              getparaloc:=LOC_REGISTER;
+              if usemmpararegs(calloption,isvariadic) and
+                 is_hfa(p,basedef) then
+                getparaloc:=LOC_MMREGISTER
+              else
+                getparaloc:=LOC_REGISTER;
             objectdef:
               getparaloc:=LOC_REGISTER;
             stringdef:
@@ -175,6 +184,9 @@ unit cpupara;
             arraydef:
               if is_dynamic_array(p) then
                 getparaloc:=LOC_REGISTER
+              else if usemmpararegs(calloption,isvariadic) and
+                 is_hfa(p,basedef) then
+                getparaloc:=LOC_MMREGISTER
               else
                 getparaloc:=LOC_REFERENCE;
             setdef:
@@ -220,6 +232,8 @@ unit cpupara;
             result:=not is_smallset(def);
           stringdef :
             result:=tstringdef(def).stringtype in [st_shortstring,st_longstring];
+          else
+            ;
         end;
       end;
 
@@ -228,12 +242,19 @@ unit cpupara;
       var
         i: longint;
         sym: tsym;
+        basedef: tdef;
       begin
         if handle_common_ret_in_param(def,pd,result) then
           exit;
         case def.typ of
           recorddef:
             begin
+              if usemmpararegs(pd.proccalloption,is_c_variadic(pd)) and
+                 is_hfa(def,basedef) then
+                begin
+                  result:=false;
+                  exit;
+                end;
               result:=def.size>4;
               if not result and
                  (target_info.abi in [abi_default,abi_armeb]) then
@@ -326,11 +347,13 @@ unit cpupara;
 
       var
         nextintreg,nextfloatreg,nextmmreg : tsuperregister;
-        paradef : tdef;
+        paradef,
+        hfabasedef : tdef;
         paraloc : pcgparalocation;
         stack_offset : aword;
         hp : tparavarsym;
         loc : tcgloc;
+        hfabasesize  : tcgsize;
         paracgsize   : tcgsize;
         paralen : longint;
         i : integer;
@@ -358,6 +381,31 @@ unit cpupara;
         end;
 
 
+      procedure updatemmregs(paradef, basedef: tdef);
+        var
+          regsavailable,
+          regsneeded: longint;
+          basesize: asizeint;
+        begin
+          basesize:=basedef.size;
+          regsneeded:=paradef.size div basesize;
+          regsavailable:=ord(RS_D7)-ord(nextmmreg)+1;
+          case basesize of
+            4:
+              regsavailable:=regsavailable*2+ord(sparesinglereg<>NR_NO);
+            8:
+              ;
+            else
+              internalerror(2019022301);
+          end;
+          if regsavailable<regsneeded then
+            begin
+              nextmmreg:=succ(RS_D7);
+              sparesinglereg:=NR_NO;
+            end;
+        end;
+
+
       begin
         result:=0;
         nextintreg:=curintreg;
@@ -377,6 +425,11 @@ unit cpupara;
             if (p.proccalloption in cstylearrayofconst) and
                is_array_of_const(paradef) then
               begin
+                hp.paraloc[side].def:=paradef;
+                hp.paraloc[side].size:=OS_NO;
+                hp.paraloc[side].alignment:=std_param_align;
+                hp.paraloc[side].intsize:=0;
+
                 paraloc:=hp.paraloc[side].add_location;
                 { hack: the paraloc must be valid, but is not actually used }
                 paraloc^.loc:=LOC_REGISTER;
@@ -423,6 +476,18 @@ unit cpupara;
              hp.paraloc[side].def:=paradef;
              firstparaloc:=true;
 
+             if (loc=LOC_MMREGISTER) and
+                is_hfa(paradef,hfabasedef) then
+               begin
+                 updatemmregs(paradef,hfabasedef);
+                 hfabasesize:=def_cgsize(hfabasedef);
+               end
+             else
+               begin
+                 hfabasedef:=nil;
+                 hfabasesize:=OS_NO;
+               end;
+
 {$ifdef EXTDEBUG}
              if paralen=0 then
                internalerror(200410311);
@@ -430,59 +495,44 @@ unit cpupara;
              while paralen>0 do
                begin
                  paraloc:=hp.paraloc[side].add_location;
-
-                 if (loc=LOC_REGISTER) and (paracgsize in [OS_F32,OS_F64,OS_F80]) then
-                   case paracgsize of
-                     OS_F32:
-                       begin
-                         paraloc^.size:=OS_32;
-                         paraloc^.def:=u32inttype;
-                       end;
-                     OS_F64:
-                       begin
-                         paraloc^.size:=OS_32;
-                         paraloc^.def:=u32inttype;
-                       end;
-                     else
-                       internalerror(2005082901);
-                   end
-                 else if (paracgsize in [OS_NO,OS_64,OS_S64]) then
-                   begin
-                     paraloc^.size:=OS_32;
-                     paraloc^.def:=u32inttype;
-                   end
-                 else
-                   begin
-                     paraloc^.size:=paracgsize;
-                     paraloc^.def:=get_paraloc_def(paradef,paralen,firstparaloc);
-                   end;
                  case loc of
                     LOC_REGISTER:
                       begin
+                        if paracgsize in [OS_F32,OS_F64,OS_F80] then
+                          case paracgsize of
+                            OS_F32,
+                            OS_F64:
+                              begin
+                                paraloc^.size:=OS_32;
+                                paraloc^.def:=u32inttype;
+                              end;
+                            else
+                              internalerror(2005082901);
+                          end;
                         { align registers for eabi }
                         if (target_info.abi in [abi_eabi,abi_eabihf]) and
                            firstparaloc and
                            (paradef.alignment=8) then
                           begin
+                            hp.paraloc[side].Alignment:=8;
                             if (nextintreg in [RS_R1,RS_R3]) then
                               inc(nextintreg)
                             else if nextintreg>RS_R3 then
                               stack_offset:=align(stack_offset,8);
                           end;
-                        { this is not abi compliant
-                          why? (FK) }
                         if nextintreg<=RS_R3 then
                           begin
+                            paradeftointparaloc(paradef,paracgsize,paraloc^.def,paraloc^.size);
                             paraloc^.loc:=LOC_REGISTER;
                             paraloc^.register:=newreg(R_INTREGISTER,nextintreg,R_SUBWHOLE);
                             inc(nextintreg);
                           end
                         else
                           begin
-                            { LOC_REFERENCE always contains everything that's left }
+                            { LOC_REFERENCE always contains everything that's left as a multiple of 4 bytes}
                             paraloc^.loc:=LOC_REFERENCE;
-                            paraloc^.size:=int_cgsize(paralen);
-                            paraloc^.def:=carraydef.getreusable_no_free(u8inttype,paralen);
+                            paraloc^.def:=get_paraloc_def(paradef,paralen,firstparaloc);
+                            paraloc^.size:=def_cgsize(paraloc^.def);
                             if (side=callerside) then
                               paraloc^.reference.index:=NR_STACK_POINTER_REG;
                             paraloc^.reference.offset:=stack_offset;
@@ -492,6 +542,8 @@ unit cpupara;
                       end;
                     LOC_FPUREGISTER:
                       begin
+                        paraloc^.size:=paracgsize;
+                        paraloc^.def:=paradef;
                         if nextfloatreg<=RS_F3 then
                           begin
                             paraloc^.loc:=LOC_FPUREGISTER;
@@ -519,8 +571,18 @@ unit cpupara;
                       end;
                     LOC_MMREGISTER:
                       begin
+                        if assigned(hfabasedef) then
+                          begin
+                            paraloc^.def:=hfabasedef;
+                            paraloc^.size:=hfabasesize;
+                          end
+                        else
+                          begin
+                            paraloc^.size:=paracgsize;
+                            paraloc^.def:=paradef;
+                          end;
                         if (nextmmreg<=RS_D7) or
-                           ((paraloc^.size = OS_F32) and
+                           ((paraloc^.size=OS_F32) and
                             (sparesinglereg<>NR_NO)) then
                           begin
                             paraloc^.loc:=LOC_MMREGISTER;
@@ -556,7 +618,6 @@ unit cpupara;
                             { LOC_REFERENCE always contains everything that's left }
                             paraloc^.loc:=LOC_REFERENCE;
                             paraloc^.size:=int_cgsize(paralen);
-                            paraloc^.def:=carraydef.getreusable_no_free(u8inttype,paralen);
                             if (side=callerside) then
                               paraloc^.reference.index:=NR_STACK_POINTER_REG;
                             paraloc^.reference.offset:=stack_offset;
@@ -566,6 +627,8 @@ unit cpupara;
                       end;
                     LOC_REFERENCE:
                       begin
+                        paraloc^.size:=paracgsize;
+                        paraloc^.def:=paradef;
                         if push_addr_param(hp.varspez,paradef,p.proccalloption) then
                           begin
                             paraloc^.size:=OS_ADDR;
@@ -578,10 +641,11 @@ unit cpupara;
                             if (target_info.abi in [abi_eabi,abi_eabihf]) and
                                firstparaloc and
                                (paradef.alignment=8) then
-                              stack_offset:=align(stack_offset,8);
+                              begin
+                                stack_offset:=align(stack_offset,8);
+                                hp.paraloc[side].Alignment:=8;
+                              end;
 
-                             paraloc^.size:=paracgsize;
-                             paraloc^.def:=paradef;
                              paraloc^.loc:=LOC_REFERENCE;
                              paraloc^.reference.index:=NR_STACK_POINTER_REG;
                              paraloc^.reference.offset:=stack_offset;
@@ -624,37 +688,72 @@ unit cpupara;
       end;
 
 
+    procedure tcpuparamanager.paradeftointparaloc(paradef: tdef; paracgsize: tcgsize; out paralocdef: tdef; out paralocsize: tcgsize);
+      begin
+        if not(paracgsize in [OS_32,OS_S32]) or
+           (paradef.typ in [arraydef,recorddef]) or
+           is_object(paradef) then
+          begin
+            paralocsize:=OS_32;
+            paralocdef:=u32inttype;
+          end
+        else
+          begin
+            paralocsize:=paracgsize;
+            paralocdef:=paradef;
+          end;
+      end;
+
+
     function  tcpuparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;
       var
-        paraloc : pcgparalocation;
+        paraloc: pcgparalocation;
         retcgsize  : tcgsize;
+        basedef: tdef;
+        i: longint;
+        mmreg: tregister;
       begin
          if set_common_funcretloc_info(p,forcetempdef,retcgsize,result) then
            exit;
 
         paraloc:=result.add_location;
         { Return in FPU register? }
-        if result.def.typ=floatdef then
+        basedef:=nil;
+        if (result.def.typ=floatdef) or
+           is_hfa(result.def,basedef) then
           begin
-            if (target_info.abi=abi_eabihf) or (p.proccalloption=pocall_hardfloat) then
+            if usemmpararegs(p.proccalloption,is_c_variadic(p)) then
               begin
-                paraloc^.loc:=LOC_MMREGISTER;
+                if assigned(basedef) then
+                  begin
+                    for i:=2 to result.def.size div basedef.size do
+                      result.add_location;
+                    retcgsize:=def_cgsize(basedef);
+                  end
+                else
+                  basedef:=result.def;
                 case retcgsize of
                   OS_64,
                   OS_F64:
                     begin
-                      paraloc^.register:=NR_MM_RESULT_REG;
+                      mmreg:=NR_MM_RESULT_REG
                     end;
                   OS_32,
                   OS_F32:
                     begin
-                      paraloc^.register:=NR_S0;
+                      mmreg:=NR_S0;
                     end;
                   else
                     internalerror(2012032501);
                 end;
-                paraloc^.size:=retcgsize;
-                paraloc^.def:=result.def;
+                repeat
+                  paraloc^.loc:=LOC_MMREGISTER;
+                  paraloc^.register:=mmreg;
+                  inc(mmreg);
+                  paraloc^.size:=retcgsize;
+                  paraloc^.def:=basedef;
+                  paraloc:=paraloc^.next;
+                until not assigned(paraloc);
               end
             else if (p.proccalloption in [pocall_softfloat]) or
                (cs_fp_emulation in current_settings.moduleswitches) or
@@ -740,8 +839,7 @@ unit cpupara;
                     end;
                   else
                     begin
-                      paraloc^.size:=retcgsize;
-                      paraloc^.def:=result.def;
+                      paradeftointparaloc(result.def,result.size,paraloc^.def,paraloc^.size);
                     end;
                 end;
               end;
@@ -749,6 +847,14 @@ unit cpupara;
       end;
 
 
+    function tcpuparamanager.usemmpararegs(calloption: tproccalloption; variadic: boolean): boolean;
+      begin
+        result:=
+         ((target_info.abi=abi_eabihf) or (calloption=pocall_hardfloat)) and
+          (not variadic);
+      end;
+
+
     function tcpuparamanager.create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;
       var
         cur_stack_offset: aword;
@@ -763,20 +869,30 @@ unit cpupara;
      end;
 
 
-    function tcpuparamanager.create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;
+    function tcpuparamanager.create_varargs_paraloc_info(p : tabstractprocdef; side: tcallercallee; varargspara:tvarargsparalist):longint;
       var
         cur_stack_offset: aword;
         curintreg, curfloatreg, curmmreg: tsuperregister;
         sparesinglereg:tregister;
       begin
-        init_values(p,callerside,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg);
+        init_values(p,side,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg);
 
-        result:=create_paraloc_info_intern(p,callerside,p.paras,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg,true);
+        result:=create_paraloc_info_intern(p,side,p.paras,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg,true);
         if (p.proccalloption in cstylearrayofconst) then
-          { just continue loading the parameters in the registers }
-          result:=create_paraloc_info_intern(p,callerside,varargspara,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg,true)
+          begin
+            { just continue loading the parameters in the registers }
+            if assigned(varargspara) then
+              begin
+                if side=callerside then
+                  result:=create_paraloc_info_intern(p,side,varargspara,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg,true)
+                else
+                  internalerror(2019021915);
+              end;
+          end
         else
           internalerror(200410231);
+
+        create_funcretloc_info(p,side);
       end;
 
 begin

+ 13 - 1
compiler/arm/cpupi.pas

@@ -49,13 +49,15 @@ unit cpupi;
           procedure generate_parameter_info;override;
           procedure allocate_got_register(list : TAsmList);override;
           procedure postprocess_code;override;
+
+          procedure allocate_tls_register(list : TAsmList);override;
        end;
 
 
   implementation
 
     uses
-       globals,systems,
+       globals,systems,verbose,
        cpubase,
        tgobj,
        symconst,symtype,symsym,symcpu,paramgr,
@@ -154,6 +156,10 @@ unit cpupi;
             maxpushedparasize:=align(maxpushedparasize,max(current_settings.alignment.localalignmin,4));
             floatsavesize:=0;
             case current_settings.fputype of
+              fpu_none,
+              fpu_soft,
+              fpu_libgcc:
+                ;
               fpu_fpa,
               fpu_fpa10,
               fpu_fpa11:
@@ -276,6 +282,12 @@ unit cpupi;
         finalizearmcode(aktproccode,aktlocaldata);
       end;
 
+
+    procedure tcpuprocinfo.allocate_tls_register(list: TAsmList);
+      begin
+        current_procinfo.tlsoffset:=cg.getaddressregister(list);
+      end;
+
 begin
    cprocinfo:=tcpuprocinfo;
 end.

+ 6 - 2
compiler/arm/narmadd.pas

@@ -344,7 +344,7 @@ interface
               cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
               current_asmdata.CurrAsmList.Concat(taicpu.op_reg_reg(A_VMRS, NR_APSR_nzcv, NR_FPSCR));
             end;
-          fpu_soft:
+          else
             { this case should be handled already by pass1 }
             internalerror(2009112404);
         end;
@@ -517,6 +517,8 @@ interface
                         cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                         nodetype:=oldnodetype;
                      end;
+                   else
+                     ;
                 end;
                 cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                 current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,left.location.register64.reglo,right.location.register64.reglo));
@@ -634,7 +636,7 @@ interface
                   end;
 
                   if nodetype in [ltn,lten,gtn,gten,equaln,unequaln] then
-                    resultdef:=pasbool8type;
+                    resultdef:=pasbool1type;
                   result:=ctypeconvnode.create_internal(ccallnode.createintern(procname,ccallparanode.create(
                       ctypeconvnode.create_internal(right,fdef),
                       ccallparanode.create(
@@ -647,6 +649,8 @@ interface
                   if notnode then
                     result:=cnotnode.create(result);
                 end;
+              else
+                internalerror(2019050933);
             end;
           end
         else

+ 3 - 0
compiler/arm/narmcnv.pas

@@ -278,6 +278,9 @@ implementation
               else
                 current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_VCVT,location.register,left.location.register), PF_F32U32));
             end;
+          else
+            { should be handled in pass 1 }
+            internalerror(2019050934);
         end;
       end;
 

+ 93 - 64
compiler/arm/narmcon.pas

@@ -26,10 +26,11 @@ unit narmcon;
 interface
 
     uses
-      ncgcon,cpubase;
+      node,ncgcon,cpubase;
 
     type
       tarmrealconstnode = class(tcgrealconstnode)
+        function pass_1 : tnode;override;
         procedure pass_generate_code;override;
       end;
 
@@ -39,9 +40,10 @@ interface
       verbose,
       globtype,globals,
       cpuinfo,
-      aasmbase,aasmtai,aasmdata,symdef,
+      aasmbase,aasmtai,aasmdata,aasmcpu,
+      symdef,
       defutil,
-      cgbase,cgutils,
+      cgbase,cgutils,cgobj,
       procinfo,
       ncon;
 
@@ -49,6 +51,17 @@ interface
                            TARMREALCONSTNODE
 *****************************************************************************}
 
+    function tarmrealconstnode.pass_1 : tnode;
+      begin
+        result:=nil;
+        if (current_settings.fputype in [fpu_vfpv3,fpu_vfpv4,fpu_vfpv3_d16,fpu_fpv4_s16]) and
+           IsVFPFloatImmediate(tfloatdef(resultdef).floattype,value_real) then
+           expectloc:=LOC_MMREGISTER
+         else
+           expectloc:=LOC_CREFERENCE;
+      end;
+
+
     procedure tarmrealconstnode.pass_generate_code;
       { I suppose the parser/pass_1 must make sure the generated real  }
       { constants are actually supported by the target processor? (JM) }
@@ -59,75 +72,91 @@ interface
          lastlabel : tasmlabel;
          realait : tairealconsttype;
          hiloswapped : boolean;
+         pf : TOpPostfix;
 
       begin
-        location_reset_ref(location,LOC_CREFERENCE,def_cgsize(resultdef),4,[]);
-        lastlabel:=nil;
-        realait:=floattype2ait[tfloatdef(resultdef).floattype];
-        hiloswapped:=is_double_hilo_swapped;
-        { const already used ? }
-        if not assigned(lab_real) then
+        if (current_settings.fputype in [fpu_vfpv3,fpu_vfpv4,fpu_vfpv3_d16,fpu_fpv4_s16]) and
+          IsVFPFloatImmediate(tfloatdef(resultdef).floattype,value_real) then
           begin
-            current_asmdata.getjumplabel(lastlabel);
-            lab_real:=lastlabel;
-            current_procinfo.aktlocaldata.concat(Tai_label.Create(lastlabel));
-            location.reference.symboldata:=current_procinfo.aktlocaldata.last;
-            case realait of
-              aitrealconst_s32bit :
-                begin
-                  current_procinfo.aktlocaldata.concat(tai_realconst.create_s32real(ts32real(value_real)));
-                  { range checking? }
-                  if floating_point_range_check_error and
-                    (tai_realconst(current_procinfo.aktlocaldata.last).value.s32val=MathInf.Value) then
-                    Message(parser_e_range_check_error);
-                end;
-
-              aitrealconst_s64bit :
-                begin
-                  if hiloswapped then
-                    current_procinfo.aktlocaldata.concat(tai_realconst.create_s64real_hiloswapped(ts64real(value_real)))
-                  else
-                    current_procinfo.aktlocaldata.concat(tai_realconst.create_s64real(ts64real(value_real)));
-
-                  { range checking? }
-                  if floating_point_range_check_error and
-                    (tai_realconst(current_procinfo.aktlocaldata.last).value.s64val=MathInf.Value) then
-                    Message(parser_e_range_check_error);
-               end;
-
-              aitrealconst_s80bit :
-                begin
-                  current_procinfo.aktlocaldata.concat(tai_realconst.create_s80real(value_real,tfloatdef(resultdef).size));
-
-                  { range checking? }
-                  if floating_point_range_check_error and
-                    (tai_realconst(current_procinfo.aktlocaldata.last).value.s80val=MathInf.Value) then
-                    Message(parser_e_range_check_error);
-                end;
+            location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
+            location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+            if tfloatdef(resultdef).floattype=s32real then
+              pf:=PF_F32
+            else
+              pf:=PF_F64;
+            current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_realconst(A_VMOV,
+               location.register,value_real),pf));
+          end
+        else
+          begin
+            location_reset_ref(location,LOC_CREFERENCE,def_cgsize(resultdef),4,[]);
+            lastlabel:=nil;
+            realait:=floattype2ait[tfloatdef(resultdef).floattype];
+            hiloswapped:=is_double_hilo_swapped;
+            { const already used ? }
+            if not assigned(lab_real) then
+              begin
+                current_asmdata.getjumplabel(lastlabel);
+                lab_real:=lastlabel;
+                current_procinfo.aktlocaldata.concat(Tai_label.Create(lastlabel));
+                location.reference.symboldata:=current_procinfo.aktlocaldata.last;
+                case realait of
+                  aitrealconst_s32bit :
+                    begin
+                      current_procinfo.aktlocaldata.concat(tai_realconst.create_s32real(ts32real(value_real)));
+                      { range checking? }
+                      if floating_point_range_check_error and
+                        (tai_realconst(current_procinfo.aktlocaldata.last).value.s32val=MathInf.Value) then
+                        Message(parser_e_range_check_error);
+                    end;
+
+                  aitrealconst_s64bit :
+                    begin
+                      if hiloswapped then
+                        current_procinfo.aktlocaldata.concat(tai_realconst.create_s64real_hiloswapped(ts64real(value_real)))
+                      else
+                        current_procinfo.aktlocaldata.concat(tai_realconst.create_s64real(ts64real(value_real)));
+
+                      { range checking? }
+                      if floating_point_range_check_error and
+                        (tai_realconst(current_procinfo.aktlocaldata.last).value.s64val=MathInf.Value) then
+                        Message(parser_e_range_check_error);
+                   end;
+
+                  aitrealconst_s80bit :
+                    begin
+                      current_procinfo.aktlocaldata.concat(tai_realconst.create_s80real(value_real,tfloatdef(resultdef).size));
+
+                      { range checking? }
+                      if floating_point_range_check_error and
+                        (tai_realconst(current_procinfo.aktlocaldata.last).value.s80val=MathInf.Value) then
+                        Message(parser_e_range_check_error);
+                    end;
 {$ifdef cpufloat128}
-              aitrealconst_s128bit :
-                begin
-                  current_procinfo.aktlocaldata.concat(tai_realconst.create_s128real(value_real));
-
-                  { range checking? }
-                  if floating_point_range_check_error and
-                    (tai_realconst(current_procinfo.aktlocaldata.last).value.s128val=MathInf.Value) then
-                    Message(parser_e_range_check_error);
-                end;
+                  aitrealconst_s128bit :
+                    begin
+                      current_procinfo.aktlocaldata.concat(tai_realconst.create_s128real(value_real));
+
+                      { range checking? }
+                      if floating_point_range_check_error and
+                        (tai_realconst(current_procinfo.aktlocaldata.last).value.s128val=MathInf.Value) then
+                        Message(parser_e_range_check_error);
+                    end;
 {$endif cpufloat128}
 
-              { the round is necessary for native compilers where comp isn't a float }
-              aitrealconst_s64comp :
-                if (value_real>9223372036854775807.0) or (value_real<-9223372036854775808.0) then
-                  message(parser_e_range_check_error)
+                  { the round is necessary for native compilers where comp isn't a float }
+                  aitrealconst_s64comp :
+                    if (value_real>9223372036854775807.0) or (value_real<-9223372036854775808.0) then
+                      message(parser_e_range_check_error)
+                    else
+                      current_procinfo.aktlocaldata.concat(tai_realconst.create_s64compreal(round(value_real)));
                 else
-                  current_procinfo.aktlocaldata.concat(tai_realconst.create_s64compreal(round(value_real)));
-            else
-              internalerror(2005092401);
-            end;
+                  internalerror(2005092401);
+                end;
+              end;
+            location.reference.symbol:=lab_real;
+            location.reference.base:=NR_R15;
           end;
-        location.reference.symbol:=lab_real;
-        location.reference.base:=NR_R15;
       end;
 
 begin

+ 95 - 0
compiler/arm/narmld.pas

@@ -0,0 +1,95 @@
+{
+    Copyright (c) 1998-2018 by Florian Klaempfl
+
+    Generate arm assembler for load nodes
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit narmld;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+      globtype,
+      symsym,
+      node,ncgld,pass_1,aasmbase;
+
+    type
+      tarmloadnode = class(tcgloadnode)
+         procedure generate_threadvar_access(gvs : tstaticvarsym); override;
+      end;
+
+
+implementation
+
+    uses
+      globals,verbose,
+      cgbase,cgobj,cgutils,
+      aasmdata,
+      systems,
+      symcpu,symdef,
+      nld,
+      cpubase,
+      parabase,
+      procinfo;
+
+{*****************************************************************************
+                            TARMLOADNODE
+*****************************************************************************}
+
+    procedure tarmloadnode.generate_threadvar_access(gvs: tstaticvarsym);
+      var
+        href: treference;
+        hregister : tregister;
+        handled: boolean;
+        l : TAsmLabel;
+      begin
+        handled:=false;
+        if tf_section_threadvars in target_info.flags then
+          begin
+            if target_info.system in [system_arm_linux] then
+              begin
+                if not(pi_uses_threadvar in current_procinfo.flags) then
+                  internalerror(2012012101);
+                current_asmdata.getjumplabel(l);
+                reference_reset_symbol(href,current_asmdata.RefAsmSymbol(gvs.mangledname,AT_DATA),-8,sizeof(AInt),[]);
+                href.refaddr:=addr_gottpoff;
+                href.relsymbol:=l;
+                hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,hregister);
+                cg.a_label(current_asmdata.CurrAsmList,l);
+                reference_reset(href,0,[]);
+                href.base:=NR_PC;
+                href.index:=hregister;
+                hregister:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,href,hregister);
+                location.reference.base:=current_procinfo.tlsoffset;
+                location.reference.index:=hregister;
+                handled:=true;
+              end;
+          end;
+
+        if not handled then
+          inherited;
+      end;
+
+
+begin
+   cloadnode:=tarmloadnode;
+end.

+ 7 - 5
compiler/arm/narmmat.pas

@@ -164,7 +164,7 @@ implementation
                       cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,OS_INT,31,numerator,helper1);
                     if GenerateThumbCode then
                       begin
-                        cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,OS_INT,32-power,helper1);
+                        cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,OS_INT,32-power,helper1);
                         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_ADD,helper2,numerator,helper1));
                       end
                     else
@@ -179,9 +179,12 @@ implementation
                else
                  cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHR,OS_INT,power,numerator,resultreg)
              end
-           else {Everything else is handled the generic code}
+           else if CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype] then
+             {Everything else is handled the generic code}
              cg.g_div_const_reg_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),
-               tordconstnode(right).value.svalue,numerator,resultreg);
+               tordconstnode(right).value.svalue,numerator,resultreg)
+           else
+             internalerror(2019012601);
          end;
 
 {
@@ -286,8 +289,7 @@ implementation
                 resultreg:=cg.getintregister(current_asmdata.CurrAsmList,size);
               end;
 
-            if (right.nodetype=ordconstn) and
-               (CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype]) then
+            if (right.nodetype=ordconstn) then
               begin
                 if nodetype=divn then
                   genOrdConstNodeDiv

+ 24 - 16
compiler/arm/narmset.pas

@@ -41,9 +41,9 @@ interface
        end;
 
       tarmcasenode = class(tcgcasenode)
-         procedure optimizevalues(var max_linear_list:aint;var max_dist:aword);override;
+         procedure optimizevalues(var max_linear_list:int64;var max_dist:qword);override;
          function  has_jumptable : boolean;override;
-         procedure genjumptable(hp : pcaselabel;min_,max_ : aint);override;
+         procedure genjumptable(hp : pcaselabel;min_,max_ : int64);override;
          procedure genlinearlist(hp : pcaselabel);override;
          procedure genjmptreeentry(p : pcaselabel;parentvalue : TConstExprInt);override;
       end;
@@ -136,7 +136,7 @@ implementation
                             TARMCASENODE
 *****************************************************************************}
 
-    procedure tarmcasenode.optimizevalues(var max_linear_list:aint;var max_dist:aword);
+    procedure tarmcasenode.optimizevalues(var max_linear_list:int64;var max_dist:qword);
       begin
         inc(max_linear_list,2)
       end;
@@ -148,7 +148,7 @@ implementation
       end;
 
 
-    procedure tarmcasenode.genjumptable(hp : pcaselabel;min_,max_ : aint);
+    procedure tarmcasenode.genjumptable(hp : pcaselabel;min_,max_ : int64);
       var
         last : TConstExprInt;
         tmpreg,
@@ -161,22 +161,30 @@ implementation
 
         procedure genitem(list:TAsmList;t : pcaselabel);
           var
-            i : aint;
+            i : int64;
           begin
             if assigned(t^.less) then
               genitem(list,t^.less);
             { fill possible hole }
-            for i:=last.svalue+1 to t^._low.svalue-1 do
-              if cs_create_pic in current_settings.moduleswitches then
-                list.concat(Tai_const.Create_rel_sym_offset(aitconst_ptr,piclabel,elselabel,picoffset))
-              else
-                list.concat(Tai_const.Create_sym(elselabel));
-            for i:=t^._low.svalue to t^._high.svalue do
-              if cs_create_pic in current_settings.moduleswitches then
-                list.concat(Tai_const.Create_rel_sym_offset(aitconst_ptr,piclabel,blocklabel(t^.blockid),picoffset))
-              else
-                list.concat(Tai_const.Create_sym(blocklabel(t^.blockid)));
-            last:=t^._high.svalue;
+            i:=last+1;
+            while i<=t^._low-1 do
+              begin
+                if cs_create_pic in current_settings.moduleswitches then
+                  list.concat(Tai_const.Create_rel_sym_offset(aitconst_ptr,piclabel,elselabel,picoffset))
+                else
+                  list.concat(Tai_const.Create_sym(elselabel));
+                i:=i+1;
+              end;
+            i:=t^._low;
+            while i<=t^._high do
+              begin
+                if cs_create_pic in current_settings.moduleswitches then
+                  list.concat(Tai_const.Create_rel_sym_offset(aitconst_ptr,piclabel,blocklabel(t^.blockid),picoffset))
+                else
+                  list.concat(Tai_const.Create_sym(blocklabel(t^.blockid)));
+                i:=i+1;
+              end;
+            last:=t^._high;
             if assigned(t^.greater) then
               genitem(list,t^.greater);
           end;

+ 7 - 0
compiler/arm/raarmgas.pas

@@ -147,6 +147,7 @@ Unit raarmgas;
           end;
       end;
 
+
     function tarmattreader.is_targetdirective(const s: string): boolean;
       begin
         case s of
@@ -723,6 +724,8 @@ Unit raarmgas;
                         end;
                     end;
                 end;
+              else
+               ;
             end;
           end;
 
@@ -816,6 +819,8 @@ Unit raarmgas;
                   oper.opr.ref.base:=NR_PC;
                   oper.opr.ref.symbol:=GetConstLabel(sym,val);
                 end;
+              else
+                ;
             end;
           end;
 
@@ -1142,6 +1147,8 @@ Unit raarmgas;
               else
                 Message(asmr_e_invalid_operand_type); // Otherwise it would have been seen as a AS_REGISTER
             end;
+          else
+            Message(asmr_e_invalid_operand_type);
         end;
       end;
 

+ 8 - 0
compiler/arm/rgcpu.pas

@@ -166,6 +166,8 @@ unit rgcpu;
                     if current_procinfo.framepointer<>r then
                       add_edge(getsupreg(taicpu(p).oper[1]^.ref^.base),getsupreg(r));
                   end;
+              else
+                ;
             end;
           end;
       end;
@@ -353,6 +355,8 @@ unit rgcpu;
                 RS_S21,RS_S23,RS_S25,RS_S27,RS_S29,RS_S31] do
                 add_edge(supreg,i);
             end;
+          else
+            ;
         end;
       end;
 
@@ -606,6 +610,8 @@ unit rgcpu;
                     if current_procinfo.framepointer<>r then
                       add_edge(getsupreg(taicpu(p).oper[1]^.ref^.base),getsupreg(r));
                   end;
+              else
+                ;
             end;
           end;
       end;
@@ -658,6 +664,8 @@ unit rgcpu;
                        add_edge(getsupreg(taicpu(p).oper[0]^.reg),i);
                      end;
                  end;
+              else
+                ;
             end;
           end;
       end;

+ 2 - 2
compiler/arm/symcpu.pas

@@ -101,7 +101,7 @@ type
     { library symbol for AROS }
     libsym : tsym;
     libsymderef : tderef;
-    function getcopyas(newtyp: tdeftyp; copytyp: tproccopytyp): tstoreddef; override;
+    function getcopyas(newtyp: tdeftyp; copytyp: tproccopytyp; const paraprefix: string): tstoreddef; override;
     procedure buildderef; override;
     procedure deref; override;
   end;
@@ -208,7 +208,7 @@ implementation
     end;
 
 
-  function tcpuprocdef.getcopyas(newtyp: tdeftyp; copytyp: tproccopytyp): tstoreddef;
+  function tcpuprocdef.getcopyas(newtyp: tdeftyp; copytyp: tproccopytyp; const paraprefix: string): tstoreddef;
     begin
       result:=inherited;
       if newtyp=procdef then

+ 121 - 0
compiler/armgen/armpara.pas

@@ -0,0 +1,121 @@
+{
+    Copyright (c) 2019 by Jonas Maebe
+
+    ARM and AArch64 common parameter helpers
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ ****************************************************************************
+}
+unit armpara;
+
+{$mode objfpc}
+
+interface
+
+uses
+  symtype,
+  paramgr;
+
+type
+  tarmgenparamanager = class(tparamanager)
+   protected
+    { Returns whether a def is a "homogeneous float array" at the machine level.
+      This means that in the memory layout, the def only consists of maximally
+      4 floating point values that appear consecutively in memory }
+    function is_hfa(p: tdef; out basedef: tdef) : boolean;
+   private
+    function is_hfa_internal(p: tdef; var basedef: tdef; var elecount: longint): boolean;
+  end;
+
+
+implementation
+
+  uses
+    symconst,symdef,symsym,defutil;
+
+
+  function tarmgenparamanager.is_hfa(p: tdef; out basedef: tdef): boolean;
+    var
+      elecount: longint;
+    begin
+      result:=false;
+      basedef:=nil;
+      elecount:=0;
+      result:=is_hfa_internal(p,basedef,elecount);
+      result:=
+        result and
+        (elecount>0) and
+        (elecount<=4) and
+        (p.size=basedef.size*elecount)
+      end;
+
+
+  function tarmgenparamanager.is_hfa_internal(p: tdef; var basedef: tdef; var elecount: longint): boolean;
+    var
+      i: longint;
+      sym: tsym;
+      tmpelecount: longint;
+    begin
+      result:=false;
+      case p.typ of
+        arraydef:
+          begin
+            if is_special_array(p) then
+              exit;
+            { an array of empty records has no influence }
+            if tarraydef(p).elementdef.size=0 then
+              begin
+                result:=true;
+                exit
+              end;
+            tmpelecount:=0;
+            if not is_hfa_internal(tarraydef(p).elementdef,basedef,tmpelecount) then
+              exit;
+            { tmpelecount now contains the number of hfa elements in a
+              single array element (e.g. 2 if it's an array of a record
+              containing two singles) -> multiply by number of elements
+              in the array }
+            inc(elecount,tarraydef(p).elecount*tmpelecount);
+            if elecount>4 then
+              exit;
+            result:=true;
+          end;
+        floatdef:
+          begin
+            if not assigned(basedef) then
+              basedef:=p
+            else if basedef<>p then
+              exit;
+            inc(elecount);
+            result:=true;
+          end;
+        recorddef:
+          begin
+            for i:=0 to tabstractrecorddef(p).symtable.symlist.count-1 do
+              begin
+                sym:=tsym(tabstractrecorddef(p).symtable.symlist[i]);
+                if sym.typ<>fieldvarsym then
+                  continue;
+                if not is_hfa_internal(tfieldvarsym(sym).vardef,basedef,elecount) then
+                  exit
+              end;
+            result:=true;
+          end;
+        else
+          exit
+      end;
+    end;
+
+end.

+ 23 - 0
compiler/assemble.pas

@@ -418,6 +418,7 @@ Implementation
         s: ansistring;
       begin
         MaybeAddLinePrefix;
+        s:='';
         setlength(s,len);
         move(p^,s[1],len);
         AsmWriteAnsiStringUnfiltered(decorator.LineFilter(s));
@@ -1646,6 +1647,10 @@ Implementation
                      { ai_directive(hp).name can be only 16 or 32, this is checked by the reader }
                      ObjData.ThumbFunc:=tai_directive(hp).name='16';
 {$endif ARM}
+{$ifdef RISCV}
+                   asd_option:
+                     internalerror(2019031701);
+{$endif RISCV}
                    else
                      internalerror(2010011101);
                  end;
@@ -1676,6 +1681,8 @@ Implementation
              ait_cutobject :
                if SmartAsm then
                 break;
+             else
+               ;
            end;
            hp:=Tai(hp.next);
          end;
@@ -1699,6 +1706,13 @@ Implementation
                      { here we must determine the fillsize which is used in pass2 }
                      Tai_align_abstract(hp).fillsize:=align(ObjData.CurrObjSec.Size,Tai_align_abstract(hp).aligntype)-
                        ObjData.CurrObjSec.Size;
+
+                     { maximum number of bytes for alignment exeeded? }
+                     if (Tai_align_abstract(hp).aligntype<>Tai_align_abstract(hp).maxbytes) and
+                       (Tai_align_abstract(hp).fillsize>Tai_align_abstract(hp).maxbytes) then
+                       Tai_align_abstract(hp).fillsize:=align(ObjData.CurrObjSec.Size,Byte(Tai_align_abstract(hp).aligntype div 2))-
+                         ObjData.CurrObjSec.Size;
+
                      ObjData.alloc(Tai_align_abstract(hp).fillsize);
                    end;
                end;
@@ -1792,6 +1806,9 @@ Implementation
                    asd_code:
                      { ignore for now, but should be added}
                      ;
+                   asd_option:
+                     { ignore for now, but should be added}
+                     ;
 {$ifdef OMFOBJSUPPORT}
                    asd_omf_linnum_line:
                      { ignore for now, but should be added}
@@ -1811,6 +1828,8 @@ Implementation
                      internalerror(2010011102);
                  end;
                end;
+             else
+               ;
            end;
            hp:=Tai(hp.next);
          end;
@@ -2077,6 +2096,8 @@ Implementation
                          ));
                      end;
 {$endif OMFOBJSUPPORT}
+                   else
+                     ;
                  end
                end;
              ait_symbolpair:
@@ -2097,6 +2118,8 @@ Implementation
              ait_seh_directive :
                tai_seh_directive(hp).generate_code(objdata);
 {$endif DISABLE_WIN64_SEH}
+             else
+               ;
            end;
            hp:=Tai(hp.next);
          end;

+ 132 - 54
compiler/avr/aoptcpu.pas

@@ -42,6 +42,8 @@ Type
     function RegLoadedWithNewValue(reg : tregister; hp : tai) : boolean; override;
     function InstructionLoadsFromReg(const reg : TRegister; const hp : tai) : boolean; override;
 
+    function InvertSkipInstruction(var p: tai): boolean;
+
     { uses the same constructor as TAopObj }
     function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
     procedure PeepHoleOptPass2;override;
@@ -75,7 +77,9 @@ Implementation
         (r1.index = r2.index) and (r1.scalefactor = r2.scalefactor) and
         (r1.symbol=r2.symbol) and (r1.refaddr = r2.refaddr) and
         (r1.relsymbol = r2.relsymbol) and
-        (r1.addressmode = r2.addressmode);
+        (r1.addressmode = r2.addressmode) and
+        (r1.volatility=[]) and
+        (r2.volatility=[]);
     end;
 
 
@@ -223,13 +227,77 @@ Implementation
         end;
     end;
 
+
+  {
+    Turns
+      sbis ?
+      jmp .Lx
+      op
+    .Lx:
+
+    Into
+      sbic ?
+      op
+
+    For all types of skip instructions
+  }
+  function TCpuAsmOptimizer.InvertSkipInstruction(var p: tai): boolean;
+
+    function GetNextInstructionWithoutLabel(p: tai; var next: tai): boolean;
+      begin
+        repeat
+          result:=GetNextInstruction(p,next);
+          p:=next;
+        until
+          (not result) or
+          (not assigned(next)) or
+          (next.typ in [ait_instruction]);
+
+        result:=assigned(next) and (next.typ in [ait_instruction]);
+      end;
+
+    var
+      hp1, hp2, hp3: tai;
+      s: string;
+    begin
+      result:=false;
+
+      if GetNextInstruction(taicpu(p),hp1) and
+        (hp1.typ=ait_instruction) and
+        (taicpu(hp1).opcode in [A_RJMP,A_JMP]) and
+        (taicpu(hp1).ops=1) and
+        (taicpu(hp1).oper[0]^.typ=top_ref) and
+        (taicpu(hp1).oper[0]^.ref^.offset=0) and
+        (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
+        GetNextInstructionWithoutLabel(hp1,hp2) and
+        (hp2.typ=ait_instruction) and
+        (not taicpu(hp2).is_jmp) and
+        GetNextInstruction(hp2,hp3) and
+        FindLabel(TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol),hp3) then
+        begin
+          DebugMsg('SkipJump2InvertedSkip', p);
+
+          case taicpu(p).opcode of
+            A_SBIS: taicpu(p).opcode:=A_SBIC;
+            A_SBIC: taicpu(p).opcode:=A_SBIS;
+            A_SBRS: taicpu(p).opcode:=A_SBRC;
+            A_SBRC: taicpu(p).opcode:=A_SBRS;
+          end;
+
+          TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol).decrefs;
+
+          asml.remove(hp1);
+          hp1.free;
+        end;
+    end;
+
+
   function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
     var
       hp1,hp2,hp3,hp4,hp5: tai;
       alloc, dealloc: tai_regalloc;
       i: integer;
       l: TAsmLabel;
-      TmpUsedRegs : TAllUsedRegs;
     begin
       result := false;
       case p.typ of
@@ -323,7 +391,7 @@ Implementation
                        (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
                        not(MatchOperand(taicpu(hp1).oper[0]^,taicpu(hp1).oper[1]^)) then
                       begin
-                        CopyUsedRegs(TmpUsedRegs);
+                        TransferUsedRegs(TmpUsedRegs);
                         if not(RegUsedAfterInstruction(taicpu(hp1).oper[1]^.reg, hp1, TmpUsedRegs)) then
                           begin
                             case taicpu(hp1).opcode of
@@ -349,9 +417,8 @@ Implementation
 
                             DebugMsg('Peephole LdiMov/Cp2Ldi/Cpi performed', p);
 
-                            RemoveCurrentP(taicpu(p));
+                            RemoveCurrentP(p);
                           end;
-                        ReleaseUsedRegs(TmpUsedRegs);
                       end;
                   end;
                 A_STS:
@@ -484,6 +551,46 @@ Implementation
                             result:=true;
                           end;
                       end;
+                A_SBRS,
+                A_SBRC:
+                  begin
+                    {
+                      Turn
+                        in rx, y
+                        sbr* rx, z
+                      Into
+                        sbi* y, z
+                    }
+                    if (taicpu(p).ops=2) and
+                       (taicpu(p).oper[0]^.typ=top_reg) and
+                       assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(p.next))) and
+                       GetLastInstruction(p,hp1) and
+                       (hp1.typ=ait_instruction) and
+                       (taicpu(hp1).opcode=A_IN) and
+                       (taicpu(hp1).ops=2) and
+                       (taicpu(hp1).oper[1]^.typ=top_const) and
+                       (taicpu(hp1).oper[1]^.val in [0..31]) and
+                       MatchOperand(taicpu(hp1).oper[0]^,taicpu(p).oper[0]^.reg) and
+                       (not RegModifiedBetween(taicpu(p).oper[0]^.reg, hp1, p)) then
+                      begin
+                        if taicpu(p).opcode=A_SBRS then
+                          taicpu(p).opcode:=A_SBIS
+                        else
+                          taicpu(p).opcode:=A_SBIC;
+
+                        taicpu(p).loadconst(0, taicpu(hp1).oper[1]^.val);
+
+                        DebugMsg('Peephole InSbrx2Sbix performed', p);
+
+                        asml.Remove(hp1);
+                        hp1.free;
+
+                        result:=true;
+                      end;
+
+                    if InvertSkipInstruction(p) then
+                      result:=true;
+                  end;
                 A_ANDI:
                   begin
                     {
@@ -541,7 +648,7 @@ Implementation
                       begin
                         DebugMsg('Redundant Andi removed', p);
 
-                        result:=RemoveCurrentP(taicpu(p));
+                        result:=RemoveCurrentP(p);
                       end;
                   end;
                 A_ADD:
@@ -552,7 +659,7 @@ Implementation
                     begin
                       DebugMsg('Peephole AddAdc2Add performed', p);
 
-                      result:=RemoveCurrentP(taicpu(p));
+                      result:=RemoveCurrentP(p);
                     end;
                   end;
                 A_SUB:
@@ -565,7 +672,7 @@ Implementation
 
                       taicpu(hp1).opcode:=A_SUB;
 
-                      result:=RemoveCurrentP(taicpu(p));
+                      result:=RemoveCurrentP(p);
                     end;
                   end;
                 A_CLR:
@@ -588,7 +695,7 @@ Implementation
                       begin
                         DebugMsg('Peephole ClrMov2Mov performed', p);
 
-                        result:=RemoveCurrentP(taicpu(p));
+                        result:=RemoveCurrentP(p);
                       end
                     { turn
                       clr rX
@@ -625,7 +732,7 @@ Implementation
                             dealloc.Free;
                           end;
 
-                        result:=RemoveCurrentP(taicpu(p));
+                        result:=RemoveCurrentP(p);
                       end;
                   end;
                 A_PUSH:
@@ -667,9 +774,9 @@ Implementation
 
                            taicpu(hp3).loadreg(1, taicpu(p).oper[0]^.reg);
 
-                           RemoveCurrentP(taicpu(p));
-                           RemoveCurrentP(taicpu(p));
-                           result:=RemoveCurrentP(taicpu(p));
+                           RemoveCurrentP(p);
+                           RemoveCurrentP(p);
+                           result:=RemoveCurrentP(p);
                          end
                        else
                          begin
@@ -688,14 +795,12 @@ Implementation
                            taicpu(hp1).loadreg(0, taicpu(hp2).oper[0]^.reg);
 
                            { life range of reg2 and reg3 is increased, fix register allocation entries }
-                           CopyUsedRegs(TmpUsedRegs);
+                           TransferUsedRegs(TmpUsedRegs);
                            UpdateUsedRegs(TmpUsedRegs,tai(p.Next));
                            AllocRegBetween(taicpu(hp2).oper[0]^.reg,hp1,hp2,TmpUsedRegs);
-                           ReleaseUsedRegs(TmpUsedRegs);
 
-                           CopyUsedRegs(TmpUsedRegs);
+                           TransferUsedRegs(TmpUsedRegs);
                            AllocRegBetween(taicpu(hp3).oper[0]^.reg,p,hp3,TmpUsedRegs);
-                           ReleaseUsedRegs(TmpUsedRegs);
 
                            IncludeRegInUsedRegs(taicpu(hp3).oper[0]^.reg,UsedRegs);
                            UpdateUsedRegs(tai(p.Next));
@@ -748,7 +853,7 @@ Implementation
                     }
                     if MatchOpType(taicpu(p),top_reg,top_reg) then
                       begin
-                        CopyUsedRegs(TmpUsedRegs);
+                        TransferUsedRegs(TmpUsedRegs);
                         UpdateUsedRegs(TmpUsedRegs,tai(p.Next));
                         if not(RegInUsedRegs(taicpu(p).oper[0]^.reg,TmpUsedRegs)) and
                           { reg. allocation information before calls is not perfect, so don't do this before
@@ -757,11 +862,9 @@ Implementation
                           not(MatchInstruction(hp1,[A_CALL,A_RCALL])) then
                           begin
                             DebugMsg('Peephole Mov2Nop performed', p);
-                            result:=RemoveCurrentP(taicpu(p));
-                            ReleaseUsedRegs(TmpUsedRegs);
+                            result:=RemoveCurrentP(p);
                             exit;
                           end;
-                        ReleaseUsedRegs(TmpUsedRegs);
                       end;
 
                     { turn
@@ -777,7 +880,7 @@ Implementation
                        (MatchInstruction(hp1,[A_PUSH,A_MOV,A_CP,A_CPC,A_ADD,A_SUB,A_ADC,A_SBC,A_EOR,A_AND,A_OR,
                                                A_OUT,A_IN]) or
                        { the reference register of ST/STD cannot be replaced }
-                       (MatchInstruction(hp1,[A_STD,A_ST]) and (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^)))) and
+                       (MatchInstruction(hp1,[A_STD,A_ST,A_STS]) and (MatchOperand(taicpu(p).oper[0]^,taicpu(hp1).oper[1]^)))) and
                        (not RegModifiedByInstruction(taicpu(p).oper[0]^.reg, hp1)) and
                        {(taicpu(hp1).ops=1) and
                        (taicpu(hp1).oper[0]^.typ = top_reg) and
@@ -807,7 +910,7 @@ Implementation
                         { p will be removed, update used register as we continue
                           with the next instruction after p }
 
-                        result:=RemoveCurrentP(taicpu(p));
+                        result:=RemoveCurrentP(p);
                       end
                     { remove
                       mov reg0,reg0
@@ -819,7 +922,7 @@ Implementation
                       begin
                         DebugMsg('Peephole RedundantMov performed', p);
 
-                        result:=RemoveCurrentP(taicpu(p));
+                        result:=RemoveCurrentP(p);
                       end
                     {
                       Turn
@@ -870,7 +973,7 @@ Implementation
                         asml.remove(hp2);
                         hp2.free;
 
-                        result:=RemoveCurrentP(taicpu(p));
+                        result:=RemoveCurrentP(p);
                       end
                     {
                       Turn
@@ -913,7 +1016,7 @@ Implementation
                             dealloc.Free;
                           end;
 
-                        result:=RemoveCurrentP(taicpu(p));
+                        result:=RemoveCurrentP(p);
 
                         asml.remove(hp2);
                         hp2.free;
@@ -968,7 +1071,7 @@ Implementation
                         begin
                           DebugMsg('Peephole MovMov2Mov performed', p);
 
-                          result:=RemoveCurrentP(taicpu(p));
+                          result:=RemoveCurrentP(p);
 
                           GetNextInstruction(hp1,hp1);
                           if not assigned(hp1) then
@@ -990,33 +1093,8 @@ Implementation
                           op
                         .L1:
                     }
-                    if GetNextInstruction(p, hp1) and
-                       (hp1.typ=ait_instruction) and
-                       (taicpu(hp1).opcode in [A_JMP,A_RJMP]) and
-                       (taicpu(hp1).ops>0) and
-                       (taicpu(hp1).oper[0]^.typ = top_ref) and
-                       (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
-                       GetNextInstruction(hp1, hp2) and
-                       (hp2.typ=ait_instruction) and
-                       (not taicpu(hp2).is_jmp) and
-                       GetNextInstruction(hp2, hp3) and
-                       (hp3.typ=ait_label) and
-                       (taicpu(hp1).oper[0]^.ref^.symbol=tai_label(hp3).labsym) then
-                      begin
-                        DebugMsg('Peephole SbiJmp2Sbi performed',p);
-
-                        if taicpu(p).opcode=A_SBIC then
-                          taicpu(p).opcode:=A_SBIS
-                        else
-                          taicpu(p).opcode:=A_SBIC;
-
-                        tai_label(hp3).labsym.decrefs;
-
-                        AsmL.remove(hp1);
-                        taicpu(hp1).Free;
-
-                        result:=true;
-                      end
+                    if InvertSkipInstruction(p) then
+                      result:=true
                     {
                       Turn
                           sbiX X, y

+ 0 - 4
compiler/avr/aoptcpub.pas

@@ -75,10 +75,6 @@ Const
 
   MaxCh = 2;
 
-{ the maximum number of operands an instruction has }
-
-  MaxOps = 2;
-
 {Oper index of operand that contains the source (reference) with a load }
 {instruction                                                            }
 

+ 3 - 1
compiler/avr/ccpuinnr.inc

@@ -16,4 +16,6 @@
   in_avr_sei = fpc_in_cpu_first+1,
   in_avr_wdr = fpc_in_cpu_first+2,
   in_avr_sleep = fpc_in_cpu_first+3,
-  in_avr_nop = fpc_in_cpu_first+4
+  in_avr_nop = fpc_in_cpu_first+4,
+  in_avr_save = fpc_in_cpu_first+5,
+  in_avr_restore = fpc_in_cpu_first+6

+ 137 - 68
compiler/avr/cgcpu.pas

@@ -274,8 +274,9 @@ unit cgcpu;
                   begin
                     load_para_loc(r,hp);
 
-                    for i2:=1 to tcgsize2size[hp^.Size] do
-                      r:=GetNextReg(r);
+                    if i<tcgsize2size[cgpara.Size] then
+                      for i2:=1 to tcgsize2size[hp^.Size] do
+                        r:=GetNextReg(r);
 
                     hp:=hp^.Next;
                   end;
@@ -302,30 +303,30 @@ unit cgcpu;
           begin
             if not(assigned(hp)) then
               internalerror(2014011105);
-             //paramanager.allocparaloc(list,hp);
-             case hp^.loc of
-               LOC_REGISTER,LOC_CREGISTER:
-                 begin
-                   if (tcgsize2size[hp^.size]<>1) or
-                     (hp^.shiftval<>0) then
-                     internalerror(2015041101);
-                   a_load_const_reg(list,hp^.size,(a shr (8*(i-1))) and $ff,hp^.register);
+            paramanager.allocparaloc(list,hp);
+            case hp^.loc of
+              LOC_REGISTER,LOC_CREGISTER:
+                begin
+                  if (tcgsize2size[hp^.size]<>1) or
+                    (hp^.shiftval<>0) then
+                    internalerror(2015041101);
+                  a_load_const_reg(list,hp^.size,(a shr (8*(i-1))) and $ff,hp^.register);
 
-                   inc(i,tcgsize2size[hp^.size]);
-                   hp:=hp^.Next;
-                 end;
-               LOC_REFERENCE,LOC_CREFERENCE:
-                 begin
-                   reference_reset(ref,paraloc.alignment,[]);
-                   ref.base:=hp^.reference.index;
-                   ref.offset:=hp^.reference.offset;
-                   a_load_const_ref(list,hp^.size,a shr (8*(i-1)),ref);
+                  inc(i,tcgsize2size[hp^.size]);
+                  hp:=hp^.Next;
+                end;
+              LOC_REFERENCE,LOC_CREFERENCE:
+                begin
+                  reference_reset(ref,paraloc.alignment,[]);
+                  ref.base:=hp^.reference.index;
+                  ref.offset:=hp^.reference.offset;
+                  a_load_const_ref(list,hp^.size,a shr (8*(i-1)),ref);
 
-                   inc(i,tcgsize2size[hp^.size]);
-                   hp:=hp^.Next;
-                 end;
-               else
-                 internalerror(2002071004);
+                  inc(i,tcgsize2size[hp^.size]);
+                  hp:=hp^.Next;
+                end;
+              else
+                internalerror(2002071004);
             end;
           end;
       end;
@@ -1345,21 +1346,38 @@ unit cgcpu;
            end;
          if not conv_done then
            begin
-             for i:=1 to tcgsize2size[fromsize] do
+             // CC
+             // Write to 16 bit ioreg, first high byte then low byte
+             // sequence required for 16 bit timer registers
+             // See e.g. atmega328p manual para 15.3 Accessing 16 bit registers
+             if (fromsize in [OS_16, OS_S16]) and QuickRef and (href.offset > 31)
+               and (href.offset < cpuinfo.embedded_controllers[current_settings.controllertype].srambase) then
                begin
-                   if not(QuickRef) and (i<tcgsize2size[fromsize]) then
-                     href.addressmode:=AM_POSTINCREMENT
-                   else
-                     href.addressmode:=AM_UNCHANGED;
-
+                 tmpreg:=GetNextReg(reg);
+                 href.addressmode:=AM_UNCHANGED;
+                 inc(href.offset);
+                 list.concat(taicpu.op_ref_reg(GetStore(href),href,tmpreg));
+                 dec(href.offset);
                  list.concat(taicpu.op_ref_reg(GetStore(href),href,reg));
+               end
+             else
+               begin
+                 for i:=1 to tcgsize2size[fromsize] do
+                   begin
+                       if not(QuickRef) and (i<tcgsize2size[fromsize]) then
+                         href.addressmode:=AM_POSTINCREMENT
+                       else
+                         href.addressmode:=AM_UNCHANGED;
 
-                 if QuickRef then
-                   inc(href.offset);
+                     list.concat(taicpu.op_ref_reg(GetStore(href),href,reg));
 
-                 { check if we are not in the last iteration to avoid an internalerror in GetNextReg }
-                 if i<tcgsize2size[fromsize] then
-                   reg:=GetNextReg(reg);
+                     if QuickRef then
+                       inc(href.offset);
+
+                     { check if we are not in the last iteration to avoid an internalerror in GetNextReg }
+                     if i<tcgsize2size[fromsize] then
+                       reg:=GetNextReg(reg);
+                   end;
                end;
            end;
 
@@ -2124,7 +2142,7 @@ unit cgcpu;
 
     procedure tcgavr.g_concatcopy(list : TAsmList;const source,dest : treference;len : tcgint);
       var
-        countreg,tmpreg : tregister;
+        countreg,tmpreg,tmpreg2: tregister;
         srcref,dstref : treference;
         copysize,countregsize : tcgsize;
         l : TAsmLabel;
@@ -2269,40 +2287,91 @@ unit cgcpu;
                 dstref:=dest;
               end;
 
-            for i:=1 to len do
-              begin
-                if not(SrcQuickRef) and (i<len) then
-                  srcref.addressmode:=AM_POSTINCREMENT
-                else
-                  srcref.addressmode:=AM_UNCHANGED;
+              // CC
+              // If dest is an ioreg (31 < offset < srambase) and size = 16 bit then
+              // load high byte first, then low byte
+              if (len = 2) and DestQuickRef
+                and (dest.offset > 31)
+                and (dest.offset < cpuinfo.embedded_controllers[current_settings.controllertype].srambase) then
+                begin
+                  // If src is also a 16 bit ioreg then read low byte then high byte
+                  if SrcQuickRef and (srcref.offset > 31)
+                    and (srcref.offset < cpuinfo.embedded_controllers[current_settings.controllertype].srambase) then
+                    begin
+                      // First read source into temp registers
+                      tmpreg:=getintregister(list, OS_16);
+                      list.concat(taicpu.op_reg_ref(GetLoad(srcref),tmpreg,srcref));
+                      inc(srcref.offset);
+                      tmpreg2:=GetNextReg(tmpreg);
+                      list.concat(taicpu.op_reg_ref(GetLoad(srcref),tmpreg2,srcref));
+
+                      // then move temp registers to dest in reverse order
+                      inc(dstref.offset);
+                      list.concat(taicpu.op_ref_reg(GetStore(dstref),dstref,tmpreg2));
+                      dec(dstref.offset);
+                      list.concat(taicpu.op_ref_reg(GetStore(dstref),dstref,tmpreg));
+                    end
+                  else
+                    begin
+                      srcref.addressmode:=AM_UNCHANGED;
+                      inc(srcref.offset);
+                      dstref.addressmode:=AM_UNCHANGED;
+                      inc(dstref.offset);
+
+                      cg.getcpuregister(list,NR_R0);
+                      list.concat(taicpu.op_reg_ref(GetLoad(srcref),NR_R0,srcref));
+                      list.concat(taicpu.op_ref_reg(GetStore(dstref),dstref,NR_R0));
+                      cg.ungetcpuregister(list,NR_R0);
+
+                      if not(SrcQuickRef) then
+                        srcref.addressmode:=AM_POSTINCREMENT
+                      else
+                        srcref.addressmode:=AM_UNCHANGED;
+
+                      dec(srcref.offset);
+                      dec(dstref.offset);
+
+                      cg.getcpuregister(list,NR_R0);
+                      list.concat(taicpu.op_reg_ref(GetLoad(srcref),NR_R0,srcref));
+                      list.concat(taicpu.op_ref_reg(GetStore(dstref),dstref,NR_R0));
+                      cg.ungetcpuregister(list,NR_R0);
+                    end;
+                end
+              else
+              for i:=1 to len do
+                begin
+                  if not(SrcQuickRef) and (i<len) then
+                    srcref.addressmode:=AM_POSTINCREMENT
+                  else
+                    srcref.addressmode:=AM_UNCHANGED;
 
-                if not(DestQuickRef) and (i<len) then
-                  dstref.addressmode:=AM_POSTINCREMENT
-                else
-                  dstref.addressmode:=AM_UNCHANGED;
+                  if not(DestQuickRef) and (i<len) then
+                    dstref.addressmode:=AM_POSTINCREMENT
+                  else
+                    dstref.addressmode:=AM_UNCHANGED;
 
-                cg.getcpuregister(list,NR_R0);
-                list.concat(taicpu.op_reg_ref(GetLoad(srcref),NR_R0,srcref));
-                list.concat(taicpu.op_ref_reg(GetStore(dstref),dstref,NR_R0));
-                cg.ungetcpuregister(list,NR_R0);
+                  cg.getcpuregister(list,NR_R0);
+                  list.concat(taicpu.op_reg_ref(GetLoad(srcref),NR_R0,srcref));
+                  list.concat(taicpu.op_ref_reg(GetStore(dstref),dstref,NR_R0));
+                  cg.ungetcpuregister(list,NR_R0);
 
-                if SrcQuickRef then
-                  inc(srcref.offset);
-                if DestQuickRef then
-                  inc(dstref.offset);
-              end;
-            if not(SrcQuickRef) then
-              begin
-                ungetcpuregister(list,srcref.base);
-                ungetcpuregister(list,TRegister(ord(srcref.base)+1));
-              end;
-            if not(DestQuickRef) then
-              begin
-                ungetcpuregister(list,dstref.base);
-                ungetcpuregister(list,TRegister(ord(dstref.base)+1));
-              end;
-          end;
-      end;
+                  if SrcQuickRef then
+                    inc(srcref.offset);
+                  if DestQuickRef then
+                    inc(dstref.offset);
+                end;
+              if not(SrcQuickRef) then
+                begin
+                  ungetcpuregister(list,srcref.base);
+                  ungetcpuregister(list,TRegister(ord(srcref.base)+1));
+                end;
+              if not(DestQuickRef) then
+                begin
+                  ungetcpuregister(list,dstref.base);
+                  ungetcpuregister(list,TRegister(ord(dstref.base)+1));
+                end;
+            end;
+        end;
 
 
     procedure tcgavr.g_overflowCheck(list : TAsmList;const l : tlocation;def : tdef);
@@ -2317,7 +2386,7 @@ unit cgcpu;
         if not ((def.typ=pointerdef) or
                ((def.typ=orddef) and
                 (torddef(def).ordtype in [u64bit,u16bit,u32bit,u8bit,uchar,
-                                          pasbool8,pasbool16,pasbool32,pasbool64]))) then
+                                          pasbool1,pasbool8,pasbool16,pasbool32,pasbool64]))) then
           cond:=C_VC
         else
           cond:=C_CC;

+ 6 - 1
compiler/avr/cpubase.pas

@@ -171,7 +171,7 @@ unit cpubase;
 *****************************************************************************}
 
     const
-      max_operands = 4;
+      max_operands = 2;
 
       maxintregs = 15;
       maxfpuregs = 0;
@@ -304,6 +304,7 @@ unit cpubase;
     function conditions_equal(const c1, c2: TAsmCond): boolean; {$ifdef USEINLINE}inline;{$endif USEINLINE}
 
     function dwarf_reg(r:tregister):byte;
+    function dwarf_reg_no_error(r:tregister):shortint;
 
     function is_calljmp(o:tasmop):boolean;{$ifdef USEINLINE}inline;{$endif USEINLINE}
 
@@ -426,6 +427,10 @@ unit cpubase;
         result:=reg;
       end;
 
+    function dwarf_reg_no_error(r:tregister):shortint;
+      begin
+        result:=regdwarf_table[findreg_by_number(r)];
+      end;
 
     function is_calljmp(o:tasmop):boolean;{$ifdef USEINLINE}inline;{$endif USEINLINE}
       begin

+ 1 - 1
compiler/avr/cpuinfo.pas

@@ -50,7 +50,7 @@ Type
    tfputype =
      (fpu_none,
       fpu_soft,
-      fp_libgcc
+      fpu_libgcc
      );
 
    tcontrollertype =

+ 16 - 5
compiler/avr/cpupara.pas

@@ -39,7 +39,7 @@ unit cpupara;
           function push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;override;
           function ret_in_param(def:tdef;pd:tabstractprocdef):boolean;override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
-          function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
+          function create_varargs_paraloc_info(p : tabstractprocdef; side: tcallercallee; varargspara:tvarargsparalist):longint;override;
           function  get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
          private
           procedure init_values(var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword);
@@ -220,7 +220,10 @@ unit cpupara;
                paraloc^.loc:=LOC_REFERENCE;
                paraloc^.reference.index:=NR_STACK_POINTER_REG;
                paraloc^.reference.offset:=stack_offset;
+{$push}
+{$R-}
                dec(stack_offset,2);
+{$pop}
             end;
         end;
 
@@ -526,17 +529,25 @@ unit cpupara;
       end;
 
 
-    function tcpuparamanager.create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;
+    function tcpuparamanager.create_varargs_paraloc_info(p : tabstractprocdef; side: tcallercallee; varargspara:tvarargsparalist):longint;
       var
         cur_stack_offset: aword;
         curintreg, curfloatreg, curmmreg: tsuperregister;
       begin
         init_values(curintreg,curfloatreg,curmmreg,cur_stack_offset);
 
-        result:=create_paraloc_info_intern(p,callerside,p.paras,curintreg,curfloatreg,curmmreg,cur_stack_offset);
+        result:=create_paraloc_info_intern(p,side,p.paras,curintreg,curfloatreg,curmmreg,cur_stack_offset);
         if (p.proccalloption in cstylearrayofconst) then
-          { just continue loading the parameters in the registers }
-          result:=create_paraloc_info_intern(p,callerside,varargspara,curintreg,curfloatreg,curmmreg,cur_stack_offset)
+          begin
+            { just continue loading the parameters in the registers }
+            if assigned(varargspara) then
+              begin
+                if side=callerside then
+                  result:=create_paraloc_info_intern(p,side,varargspara,curintreg,curfloatreg,curmmreg,cur_stack_offset)
+                else
+                  internalerror(2019021914);
+              end;
+          end
         else
           internalerror(200410231);
       end;

+ 34 - 2
compiler/avr/navrinl.pas

@@ -42,7 +42,9 @@ unit navrinl;
       aasmdata,
       aasmcpu,
       symdef,
-      cgbase,
+      hlcgobj,
+      pass_2,
+      cgbase, cgobj, cgutils,
       cpubase;
 
     function tavrinlinenode.pass_typecheck_cpu : tnode;
@@ -58,6 +60,16 @@ unit navrinl;
               CheckParameters(0);
               resultdef:=voidtype;
             end;
+          in_avr_save:
+            begin
+              CheckParameters(0);
+              resultdef:=u8inttype;
+            end;
+          in_avr_restore:
+            begin
+              CheckParameters(1);
+              resultdef:=voidtype;
+            end;
           else
             Result:=inherited pass_typecheck_cpu;
         end;
@@ -72,11 +84,17 @@ unit navrinl;
           in_avr_sleep,
           in_avr_sei,
           in_avr_wdr,
-          in_avr_cli:
+          in_avr_cli,
+          in_avr_restore:
             begin
               expectloc:=LOC_VOID;
               resultdef:=voidtype;
             end;
+          in_avr_save:
+            begin
+              expectloc:=LOC_REGISTER;
+              resultdef:=u8inttype;
+            end;
           else
             Result:=inherited first_cpu;
         end;
@@ -96,6 +114,20 @@ unit navrinl;
             current_asmdata.CurrAsmList.concat(taicpu.op_none(A_WDR));
           in_avr_cli:
             current_asmdata.CurrAsmList.concat(taicpu.op_none(A_CLI));
+          in_avr_save:
+            begin
+              location_reset(location,LOC_CREGISTER,OS_8);
+              location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_8);
+
+              current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_IN, location.register, NIO_SREG));
+              current_asmdata.CurrAsmList.concat(taicpu.op_none(A_CLI));
+            end;
+          in_avr_restore:
+            begin
+              secondpass(left);
+              hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
+              current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_OUT, NIO_SREG, left.location.register));
+            end;
           else
             inherited pass_generate_code_cpu;
         end;

+ 101 - 97
compiler/avr/raavr.pas

@@ -58,204 +58,208 @@ unit raavr;
         Operands: array[0..max_operands-1] of TAVROpConstraint;
       end;
 
+{$PUSH}
+{$WARN 3177 off : Some fields coming after "$1" were not initialized}
   const
     AVRInstrConstraint: array[TAsmOp] of TAVRInstrConstraint =
       // A_NONE
-      ((numOperands: 0; Operands: ((typ: top_none), (typ: top_none), (typ: top_none), (typ: top_none))),
+      ((numOperands: 0; Operands: ((typ: top_none), (typ: top_none))),
       // A_ADD
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_reg; rt: rt_all), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_reg; rt: rt_all))),
        // A_ADC
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_reg; rt: rt_all), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_reg; rt: rt_all))),
        // A_ADIW
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_even_24_30), (typ: top_const; max: 63; min: 0), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_even_24_30), (typ: top_const; max: 63; min: 0))),
        // A_SUB
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_reg; rt: rt_all), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_reg; rt: rt_all))),
        // A_SUBI
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_16_31), (typ: top_const; max: 255; min: -128), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_16_31), (typ: top_const; max: 255; min: -128))),
        // A_SBC
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_reg; rt: rt_all), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_reg; rt: rt_all))),
        // A_SBCI
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_16_31), (typ: top_const; max: 255; min: -128), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_16_31), (typ: top_const; max: 255; min: -128))),
        // A_SBRC
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_const; max: 7; min: 0), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_const; max: 7; min: 0))),
        // A_SBRS
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_const; max: 7; min: 0), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_const; max: 7; min: 0))),
        // A_SBIW
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_even_24_30), (typ: top_const; max: 63; min: 0), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_even_24_30), (typ: top_const; max: 63; min: 0))),
        // A_AND
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_reg; rt: rt_all), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_reg; rt: rt_all))),
        // A_ANDI
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_16_31), (typ: top_const; max: 255; min: -128), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_16_31), (typ: top_const; max: 255; min: -128))),
        // A_OR
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_reg; rt: rt_all), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_reg; rt: rt_all))),
        // A_ORI
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_16_31), (typ: top_const; max: 255; min: -128), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_16_31), (typ: top_const; max: 255; min: -128))),
        // A_EOR
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_reg; rt: rt_all), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_reg; rt: rt_all))),
        // A_COM
-       (numOperands: (1 shl 1); Operands: ((typ: top_reg; rt: rt_all), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 1); Operands: ((typ: top_reg; rt: rt_all), (typ: top_none))),
        // A_NEG
-       (numOperands: (1 shl 1); Operands: ((typ: top_reg; rt: rt_all), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 1); Operands: ((typ: top_reg; rt: rt_all), (typ: top_none))),
        // A_SBR
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_16_31), (typ: top_const; max: 255; min: -128), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_16_31), (typ: top_const; max: 255; min: -128))),
        // A_CBR
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_16_31), (typ: top_const; max: 255; min: -128), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_16_31), (typ: top_const; max: 255; min: -128))),
        // A_INC
-       (numOperands: (1 shl 1); Operands: ((typ: top_reg; rt: rt_all), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 1); Operands: ((typ: top_reg; rt: rt_all), (typ: top_none))),
        // A_DEC
-       (numOperands: (1 shl 1); Operands: ((typ: top_reg; rt: rt_all), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 1); Operands: ((typ: top_reg; rt: rt_all), (typ: top_none))),
        // A_TST
-       (numOperands: (1 shl 1); Operands: ((typ: top_reg; rt: rt_all), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 1); Operands: ((typ: top_reg; rt: rt_all), (typ: top_none))),
        // A_MUL
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_reg; rt: rt_all), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_reg; rt: rt_all))),
        // A_MULS
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_16_31), (typ: top_reg; rt: rt_16_31), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_16_31), (typ: top_reg; rt: rt_16_31))),
        // A_MULSU
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_16_23), (typ: top_reg; rt: rt_16_23), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_16_23), (typ: top_reg; rt: rt_16_23))),
        // A_FMUL
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_16_23), (typ: top_reg; rt: rt_16_23), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_16_23), (typ: top_reg; rt: rt_16_23))),
        // A_FMULS
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_16_23), (typ: top_reg; rt: rt_16_23), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_16_23), (typ: top_reg; rt: rt_16_23))),
        // A_FMULSU
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_16_23), (typ: top_reg; rt: rt_16_23), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_16_23), (typ: top_reg; rt: rt_16_23))),
        // A_RJMP
-       (numOperands: (1 shl 1); Operands: ((typ: top_const; max: 2047; min: -2048), (typ: top_reg; rt: rt_all), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 1); Operands: ((typ: top_const; max: 2047; min: -2048), (typ: top_reg; rt: rt_all))),
        // A_IJMP
-       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none))),
        // A_EIJMP
-       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none))),
        // A_JMP, max size depends on size op PC
-       (numOperands: (1 shl 1); Operands: ((typ: top_const; max: (1 shl 22 - 1); min: 0), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 1); Operands: ((typ: top_const; max: (1 shl 22 - 1); min: 0), (typ: top_none))),
        // A_RCALL
-       (numOperands: (1 shl 1); Operands: ((typ: top_const; max: 2047; min: -2048), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 1); Operands: ((typ: top_const; max: 2047; min: -2048), (typ: top_none))),
        // A_ICALL
-       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none))),
        // A_EICALL
-       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none))),
        // A_CALL, max size depends on size op PC
-       (numOperands: (1 shl 1); Operands: ((typ: top_const; max: (1 shl 22 - 1); min: 0), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 1); Operands: ((typ: top_const; max: (1 shl 22 - 1); min: 0), (typ: top_none))),
        // A_RET
-       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none))),
        // A_IRET
-       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none))),
        // A_CPSE
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_reg; rt: rt_all), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_reg; rt: rt_all))),
        // A_CP
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_reg; rt: rt_all), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_reg; rt: rt_all))),
        // A_CPC
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_reg; rt: rt_all), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_reg; rt: rt_all))),
        // A_CPI
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_16_31), (typ: top_const; max: 255; min: -128), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_16_31), (typ: top_const; max: 255; min: -128))),
        // A_SBIC
-       (numOperands: (1 shl 2); Operands: ((typ: top_const; max: 31; min: 0), (typ: top_const; max: 7; min: 0), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_const; max: 31; min: 0), (typ: top_const; max: 7; min: 0))),
        // A_SBIS
-       (numOperands: (1 shl 2); Operands: ((typ: top_const; max: 31; min: 0), (typ: top_const; max: 7; min: 0), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_const; max: 31; min: 0), (typ: top_const; max: 7; min: 0))),
        // A_BRxx
-       (numOperands: (1 shl 1); Operands: ((typ: top_const; max: 63; min: -64), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 1); Operands: ((typ: top_const; max: 63; min: -64), (typ: top_none))),
        // A_MOV
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_reg; rt: rt_all), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_reg; rt: rt_all))),
        // A_MOVW
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_even), (typ: top_reg; rt: rt_even), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_even), (typ: top_reg; rt: rt_even))),
        // A_LDI
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_16_31), (typ: top_const; max: 255; min: -128), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_16_31), (typ: top_const; max: 255; min: -128))),
        // A_LDS TODO: There are 2 versions with different machine codes and constant ranges. Could possibly distinguish based on size of PC?
        // Perhaps handle separately with a check on sub-architecture? Range check only important if smaller instruction code selected on larger arch
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_const; max: 65535; min: 0), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_const; max: 65535; min: 0))),
        // A_LD
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_reg; rt: rt_XYZ; am: [AM_UNCHANGED, AM_POSTINCREMENT, AM_PREDRECEMENT]), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_reg; rt: rt_XYZ; am: [AM_UNCHANGED, AM_POSTINCREMENT, AM_PREDRECEMENT]))),
        // A_LDD
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_reg; rt: rt_YZ; am: [AM_UNCHANGED]; minconst: 0; maxconst: 63), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_reg; rt: rt_YZ; am: [AM_UNCHANGED]; minconst: 0; maxconst: 63))),
        // A_STS TODO: See LDS above
-       (numOperands: (1 shl 2); Operands: ((typ: top_const; max: 65535; min: 0), (typ: top_reg; rt: rt_all), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_const; max: 65535; min: 0), (typ: top_reg; rt: rt_all))),
        // A_ST
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_XYZ; am: [AM_UNCHANGED, AM_POSTINCREMENT, AM_PREDRECEMENT]), (typ: top_reg; rt: rt_all), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_XYZ; am: [AM_UNCHANGED, AM_POSTINCREMENT, AM_PREDRECEMENT]), (typ: top_reg; rt: rt_all))),
        // A_STD
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_YZ; am: [AM_UNCHANGED]; minconst: 0; maxconst: 63), (typ: top_reg; rt: rt_all), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_YZ; am: [AM_UNCHANGED]; minconst: 0; maxconst: 63), (typ: top_reg; rt: rt_all))),
        // A_LPM
-       (numOperands: (1 shl 0 + 1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_reg; rt: rt_Z; am: [AM_UNCHANGED, AM_POSTINCREMENT]), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 0 + 1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_reg; rt: rt_Z; am: [AM_UNCHANGED, AM_POSTINCREMENT]))),
        // A_ELPM
-       (numOperands: (1 shl 0 + 1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_reg; rt: rt_Z; am: [AM_UNCHANGED, AM_POSTINCREMENT]), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 0 + 1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_reg; rt: rt_Z; am: [AM_UNCHANGED, AM_POSTINCREMENT]))),
        // A_SPM
-       (numOperands: (1 shl 0 + 1 shl 1); Operands: ((typ: top_reg; rt: rt_Z; am: [AM_UNCHANGED, AM_POSTINCREMENT]), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 0 + 1 shl 1); Operands: ((typ: top_reg; rt: rt_Z; am: [AM_UNCHANGED, AM_POSTINCREMENT]), (typ: top_none))),
        // A_IN
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_const; max: 63; min: 0), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_const; max: 63; min: 0))),
        // A_OUT
-       (numOperands: (1 shl 2); Operands: ((typ: top_const; max: 63; min: 0), (typ: top_reg; rt: rt_all), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_const; max: 63; min: 0), (typ: top_reg; rt: rt_all))),
        // A_PUSH
-       (numOperands: (1 shl 1); Operands: ((typ: top_reg; rt: rt_all), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 1); Operands: ((typ: top_reg; rt: rt_all), (typ: top_none))),
        // A_POP
-       (numOperands: (1 shl 1); Operands: ((typ: top_reg; rt: rt_all), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 1); Operands: ((typ: top_reg; rt: rt_all), (typ: top_none))),
        // A_LSL
-       (numOperands: (1 shl 1); Operands: ((typ: top_reg; rt: rt_all), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 1); Operands: ((typ: top_reg; rt: rt_all), (typ: top_none))),
        // A_LSR
-       (numOperands: (1 shl 1); Operands: ((typ: top_reg; rt: rt_all), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 1); Operands: ((typ: top_reg; rt: rt_all), (typ: top_none))),
        // A_ROL
-       (numOperands: (1 shl 1); Operands: ((typ: top_reg; rt: rt_all), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 1); Operands: ((typ: top_reg; rt: rt_all), (typ: top_none))),
        // A_ROR
-       (numOperands: (1 shl 1); Operands: ((typ: top_reg; rt: rt_all), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 1); Operands: ((typ: top_reg; rt: rt_all), (typ: top_none))),
        // A_ASR
-       (numOperands: (1 shl 1); Operands: ((typ: top_reg; rt: rt_all), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 1); Operands: ((typ: top_reg; rt: rt_all), (typ: top_none))),
        // A_SWAP
-       (numOperands: (1 shl 1); Operands: ((typ: top_reg; rt: rt_all), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 1); Operands: ((typ: top_reg; rt: rt_all), (typ: top_none))),
        // A_BSET
-       (numOperands: (1 shl 1); Operands: ((typ: top_const; max: 7; min: 0), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 1); Operands: ((typ: top_const; max: 7; min: 0), (typ: top_none))),
        // A_BCLR
-       (numOperands: (1 shl 1); Operands: ((typ: top_const; max: 7; min: 0), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 1); Operands: ((typ: top_const; max: 7; min: 0), (typ: top_none))),
        // A_SBI
-       (numOperands: (1 shl 2); Operands: ((typ: top_const; max: 32; min: 0), (typ: top_const; max: 7; min: 0), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_const; max: 32; min: 0), (typ: top_const; max: 7; min: 0))),
        // A_CBI
-       (numOperands: (1 shl 2); Operands: ((typ: top_const; max: 32; min: 0), (typ: top_const; max: 7; min: 0), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_const; max: 32; min: 0), (typ: top_const; max: 7; min: 0))),
        // A_SEC
-       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none))),
        // A_SEH
-       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none))),
        // A_SEI
-       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none))),
        // A_SEN
-       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none))),
        // A_SER
-       (numOperands: (1 shl 1); Operands: ((typ: top_reg; rt: rt_16_31), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 1); Operands: ((typ: top_reg; rt: rt_16_31), (typ: top_none))),
        // A_SES
-       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none))),
        // A_SET
-       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none))),
        // A_SEV
-       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none))),
        // A_SEZ
-       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none))),
        // A_CLC
-       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none))),
        // A_CLH
-       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none))),
        // A_CLI
-       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none))),
        // A_CLN
-       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none))),
        // A_CLR
-       (numOperands: (1 shl 1); Operands: ((typ: top_reg; rt: rt_all), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 1); Operands: ((typ: top_reg; rt: rt_all), (typ: top_none))),
        // A_CLS
-       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none))),
        // A_CLT
-       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none))),
        // A_CLV
-       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none))),
        // A_CLZ
-       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none))),
        // A_BST
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_const; max: 7; min: 0), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_const; max: 7; min: 0))),
        // A_BLD
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_const; max: 7; min: 0), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_all), (typ: top_const; max: 7; min: 0))),
        // A_BREAK
-       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none))),
        // A_NOP
-       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none))),
        // A_SLEEP
-       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none))),
        // A_WDR
-       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none), (typ: top_none), (typ: top_none))),
+       (numOperands: (1 shl 0); Operands: ((typ: top_none), (typ: top_none))),
        // A_XCH
-       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_Z), (typ: top_reg; rt: rt_all), (typ: top_none), (typ: top_none)))
+       (numOperands: (1 shl 2); Operands: ((typ: top_reg; rt: rt_Z), (typ: top_reg; rt: rt_all)))
        );
+{$POP}
+
 
   implementation
     uses

+ 20 - 7
compiler/avr/rgcpu.pas

@@ -155,6 +155,13 @@ unit rgcpu;
               A_LDI:
                 for r:=RS_R0 to RS_R15 do
                   add_edge(r,GetSupReg(taicpu(p).oper[0]^.reg));
+              A_STS:
+                for r:=RS_R0 to RS_R15 do
+                  add_edge(r,GetSupReg(taicpu(p).oper[1]^.reg));
+              A_ADIW:
+                for r:=RS_R0 to RS_R31 do
+                  if not (r in [RS_R24,RS_R26,RS_R28,RS_R30]) then
+                    add_edge(r,GetSupReg(taicpu(p).oper[0]^.reg));
               A_MULS:
                 begin
                   for r:=RS_R0 to RS_R15 do
@@ -162,6 +169,14 @@ unit rgcpu;
                   for r:=RS_R0 to RS_R15 do
                     add_edge(r,GetSupReg(taicpu(p).oper[1]^.reg));
                 end;
+              A_LDD:
+                for r:=RS_R0 to RS_R31 do
+                  if not (r in [RS_R28,RS_R30]) then
+                    add_edge(r,GetSupReg(taicpu(p).oper[1]^.ref^.base));
+              A_STD:
+                for r:=RS_R0 to RS_R31 do
+                  if not (r in [RS_R28,RS_R30]) then
+                    add_edge(r,GetSupReg(taicpu(p).oper[0]^.ref^.base));
             end;
           end;
       end;
@@ -175,8 +190,8 @@ unit rgcpu;
         if not(spilltemp.offset in [0..63]) then
           exit;
 
-        { Replace 'mov  dst,orgreg' with 'ld  dst,spilltemp'
-          and     'mov  orgreg,src' with 'st  dst,spilltemp' }
+        { Replace 'mov  dst,orgreg' with 'ldd  dst,spilltemp'
+          and     'mov  orgreg,src' with 'std  spilltemp,src' }
         with instr do
           begin
             if (opcode=A_MOV) and (ops=2) and (oper[1]^.typ=top_reg) and (oper[0]^.typ=top_reg) then
@@ -185,10 +200,8 @@ unit rgcpu;
                    (get_alias(getsupreg(oper[0]^.reg))=orgreg) and
                    (get_alias(getsupreg(oper[1]^.reg))<>orgreg) then
                   begin
-                    { str expects the register in oper[0] }
-                    instr.loadreg(0,oper[1]^.reg);
-                    instr.loadref(1,spilltemp);
-                    opcode:=A_ST;
+                    instr.loadref(0,spilltemp);
+                    opcode:=A_STD;
                     result:=true;
                   end
                 else if (getregtype(oper[1]^.reg)=regtype) and
@@ -196,7 +209,7 @@ unit rgcpu;
                    (get_alias(getsupreg(oper[0]^.reg))<>orgreg) then
                   begin
                     instr.loadref(1,spilltemp);
-                    opcode:=A_LD;
+                    opcode:=A_LDD;
                     result:=true;
                   end;
               end;

+ 1 - 1
compiler/blockutl.pas

@@ -207,7 +207,7 @@ implementation
           exit;
         end;
       { bare copy, so that self etc are not inserted }
-      result:=tprocdef(orgpd.getcopyas(procdef,pc_bareproc));
+      result:=tprocdef(orgpd.getcopyas(procdef,pc_bareproc,''));
       { will be called accoding to the ABI conventions }
       result.proccalloption:=pocall_cdecl;
       { add po_is_block so that a block "self" pointer gets added (of the type

+ 9 - 4
compiler/browcol.pas

@@ -23,12 +23,17 @@
 {$ifdef TP}
   {$N+,E+}
 {$endif}
+
 unit browcol;
 
 {$i fpcdefs.inc}
 { $define use_refs}
 {$H-}
 
+{$ifdef cpullvm}
+{$modeswitch nestedprocvars}
+{$endif}
+
 interface
 
 uses
@@ -1745,7 +1750,7 @@ var P: PModuleSymbol;
 begin
   P:=nil;
   if Assigned(Modules) then
-    P:=Modules^.FirstThat(@Match);
+    P:=Modules^.FirstThat(TCallbackFunBoolParam(@Match));
   SearchModule:=P;
 end;
 
@@ -2198,7 +2203,7 @@ begin
        FixupSymbol(At(I));
 end;
 begin
-  Modules^.ForEach(@FixupSymbol);
+  Modules^.ForEach(TCallbackProcParam(@FixupSymbol));
 end;
 procedure ReadSymbolPointers(P: PSymbol);
 var I: sw_integer;
@@ -2222,7 +2227,7 @@ begin
   ReadPointers(S,ModuleNames,PD);
   ReadPointers(S,TypeNames,PD);
   ReadPointers(S,Modules,PD);
-  Modules^.ForEach(@ReadSymbolPointers);
+  Modules^.ForEach(TCallbackProcParam(@ReadSymbolPointers));
   FixupPointers;
   Dispose(PD, Done);
 
@@ -2261,7 +2266,7 @@ begin
   StorePointers(S,ModuleNames);
   StorePointers(S,TypeNames);
   StorePointers(S,Modules);
-  Modules^.ForEach(@WriteSymbolPointers);
+  Modules^.ForEach(TCallbackProcParam(@WriteSymbolPointers));
   StoreBrowserCol:=(S^.Status=stOK);
 end;
 

+ 87 - 93
compiler/cclasses.pas

@@ -365,21 +365,21 @@ type
           { Gets last Item }
           function  GetLast:TLinkedListItem;
           { inserts another List at the begin and make this List empty }
-          procedure insertList(p : TLinkedList);
+          procedure insertList(p : TLinkedList); virtual;
           { inserts another List before the provided item and make this List empty }
-          procedure insertListBefore(Item:TLinkedListItem;p : TLinkedList);
+          procedure insertListBefore(Item:TLinkedListItem;p : TLinkedList); virtual;
           { inserts another List after the provided item and make this List empty }
-          procedure insertListAfter(Item:TLinkedListItem;p : TLinkedList);
+          procedure insertListAfter(Item:TLinkedListItem;p : TLinkedList); virtual;
           { concats another List at the end and make this List empty }
-          procedure concatList(p : TLinkedList);
+          procedure concatList(p : TLinkedList); virtual;
           { concats another List at the start and makes a copy
             the list is ordered in reverse.
           }
-          procedure insertListcopy(p : TLinkedList);
+          procedure insertListcopy(p : TLinkedList); virtual;
           { concats another List at the end and makes a copy }
-          procedure concatListcopy(p : TLinkedList);
+          procedure concatListcopy(p : TLinkedList); virtual;
           { removes all items from the list, the items are not freed }
-          procedure RemoveAll;
+          procedure RemoveAll; virtual;
           property First:TLinkedListItem read FFirst;
           property Last:TLinkedListItem read FLast;
           property Count:Integer read FCount;
@@ -397,7 +397,7 @@ type
           constructor Create(const s:TCmdStr);
           destructor  Destroy;override;
           function GetCopy:TLinkedListItem;override;
-          function Str:TCmdStr; {$ifdef CCLASSESINLINE}inline;{$endif}
+          property Str: TCmdStr read FPStr;
        end;
 
        { string container }
@@ -422,9 +422,9 @@ type
           { true if string is in the container }
           function Find(const s:TCmdStr):TCmdStrListItem;
           { inserts an item }
-          procedure InsertItem(item:TCmdStrListItem); {$ifdef CCLASSESINLINE}inline;{$endif}
+          procedure InsertItem(item:TCmdStrListItem);
           { concats an item }
-          procedure ConcatItem(item:TCmdStrListItem); {$ifdef CCLASSESINLINE}inline;{$endif}
+          procedure ConcatItem(item:TCmdStrListItem);
           property Doubles:boolean read FDoubles write FDoubles;
        end;
 
@@ -615,6 +615,7 @@ implementation
           s : string;
         begin
           l := c-b;
+          s:='';
           if (l > 0) or AddEmptyStrings then
             begin
               setlength(s, l);
@@ -1052,6 +1053,11 @@ begin
   FFreeObjects := True;
 end;
 
+function TFPObjectList.IndexOf(AObject: TObject): Integer;
+begin
+  Result := FList.IndexOf(Pointer(AObject));
+end;
+
 function TFPObjectList.GetCount: integer;
 begin
   Result := FList.Count;
@@ -1124,11 +1130,6 @@ begin
   end;
 end;
 
-function TFPObjectList.IndexOf(AObject: TObject): Integer;
-begin
-  Result := FList.IndexOf(Pointer(AObject));
-end;
-
 function TFPObjectList.FindInstanceOf(AClass: TClass; AExact: Boolean; AStartAt : Integer): Integer;
 var
   I : Integer;
@@ -1763,72 +1764,6 @@ begin
 end;
 
 
-{*****************************************************************************
-                               TFPHashObject
-*****************************************************************************}
-
-procedure TFPHashObject.InternalChangeOwner(HashObjectList:TFPHashObjectList;const s:TSymStr);
-var
-  Index : integer;
-begin
-  FOwner:=HashObjectList;
-  Index:=HashObjectList.Add(s,Self);
-  FStrIndex:=HashObjectList.List.List^[Index].StrIndex;
-end;
-
-
-constructor TFPHashObject.CreateNotOwned;
-begin
-  FStrIndex:=-1;
-end;
-
-
-constructor TFPHashObject.Create(HashObjectList:TFPHashObjectList;const s:TSymStr);
-begin
-  InternalChangeOwner(HashObjectList,s);
-end;
-
-
-procedure TFPHashObject.ChangeOwner(HashObjectList:TFPHashObjectList);
-begin
-  InternalChangeOwner(HashObjectList,PSymStr(@FOwner.List.Strs[FStrIndex])^);
-end;
-
-
-procedure TFPHashObject.ChangeOwnerAndName(HashObjectList:TFPHashObjectList;const s:TSymStr);
-begin
-  InternalChangeOwner(HashObjectList,s);
-end;
-
-
-procedure TFPHashObject.Rename(const ANewName:TSymStr);
-var
-  Index : integer;
-begin
-  Index:=FOwner.Rename(PSymStr(@FOwner.List.Strs[FStrIndex])^,ANewName);
-  if Index<>-1 then
-    FStrIndex:=FOwner.List.List^[Index].StrIndex;
-end;
-
-
-function TFPHashObject.GetName:TSymStr;
-begin
-  if FOwner<>nil then
-    Result:=PSymStr(@FOwner.List.Strs[FStrIndex])^
-  else
-    Result:='';
-end;
-
-
-function TFPHashObject.GetHash:Longword;
-begin
-  if FOwner<>nil then
-    Result:=FPHash(PSymStr(@FOwner.List.Strs[FStrIndex])^)
-  else
-    Result:=$ffffffff;
-end;
-
-
 {*****************************************************************************
             TFPHashObjectList (Copied from rtl/objpas/classes/lists.inc)
 *****************************************************************************}
@@ -1861,6 +1796,11 @@ begin
   FHashList.Clear;
 end;
 
+function TFPHashObjectList.IndexOf(AObject: TObject): Integer;
+begin
+  Result := FHashList.IndexOf(Pointer(AObject));
+end;
+
 function TFPHashObjectList.GetCount: integer;
 begin
   Result := FHashList.Count;
@@ -1943,12 +1883,6 @@ begin
     end;
 end;
 
-function TFPHashObjectList.IndexOf(AObject: TObject): Integer;
-begin
-  Result := FHashList.IndexOf(Pointer(AObject));
-end;
-
-
 function TFPHashObjectList.Find(const s:TSymStr): TObject;
 begin
   result:=TObject(FHashList.Find(s));
@@ -2030,6 +1964,72 @@ begin
 end;
 
 
+{*****************************************************************************
+                               TFPHashObject
+*****************************************************************************}
+
+procedure TFPHashObject.InternalChangeOwner(HashObjectList:TFPHashObjectList;const s:TSymStr);
+var
+  Index : integer;
+begin
+  FOwner:=HashObjectList;
+  Index:=HashObjectList.Add(s,Self);
+  FStrIndex:=HashObjectList.List.List^[Index].StrIndex;
+end;
+
+
+constructor TFPHashObject.CreateNotOwned;
+begin
+  FStrIndex:=-1;
+end;
+
+
+constructor TFPHashObject.Create(HashObjectList:TFPHashObjectList;const s:TSymStr);
+begin
+  InternalChangeOwner(HashObjectList,s);
+end;
+
+
+procedure TFPHashObject.ChangeOwner(HashObjectList:TFPHashObjectList);
+begin
+  InternalChangeOwner(HashObjectList,PSymStr(@FOwner.List.Strs[FStrIndex])^);
+end;
+
+
+procedure TFPHashObject.ChangeOwnerAndName(HashObjectList:TFPHashObjectList;const s:TSymStr);
+begin
+  InternalChangeOwner(HashObjectList,s);
+end;
+
+
+procedure TFPHashObject.Rename(const ANewName:TSymStr);
+var
+  Index : integer;
+begin
+  Index:=FOwner.Rename(PSymStr(@FOwner.List.Strs[FStrIndex])^,ANewName);
+  if Index<>-1 then
+    FStrIndex:=FOwner.List.List^[Index].StrIndex;
+end;
+
+
+function TFPHashObject.GetName:TSymStr;
+begin
+  if FOwner<>nil then
+    Result:=PSymStr(@FOwner.List.Strs[FStrIndex])^
+  else
+    Result:='';
+end;
+
+
+function TFPHashObject.GetHash:Longword;
+begin
+  if FOwner<>nil then
+    Result:=FPHash(PSymStr(@FOwner.List.Strs[FStrIndex])^)
+  else
+    Result:=$ffffffff;
+end;
+
+
 {****************************************************************************
                              TLinkedListItem
  ****************************************************************************}
@@ -2383,12 +2383,6 @@ end;
       end;
 
 
-    function TCmdStrListItem.Str:TCmdStr;
-      begin
-        Str:=FPStr;
-      end;
-
-
     function TCmdStrListItem.GetCopy:TLinkedListItem;
       begin
         Result:=(inherited GetCopy);

+ 0 - 2
compiler/cfidwarf.pas

@@ -213,8 +213,6 @@ implementation
                 list.concat(tai_const.create_rel_sym(enc2ait_const[oper[i].enc],oper[i].beginsym,oper[i].endsym));
               dop_reg :
                 list.concat(tai_const.create(enc2ait_const[oper[i].enc],dwarf_reg(oper[i].register)));
-              else
-                internalerror(200404128);
             end;
           end;
       end;

+ 16 - 3
compiler/cfileutl.pas

@@ -37,6 +37,9 @@ interface
 {$if defined(go32v2) or defined(watcom)}
       Dos,
 {$endif}
+{$ifdef macos}
+      macutils,
+{$endif macos}
 {$IFNDEF USE_FAKE_SYSUTILS}
       SysUtils,
 {$ELSE}
@@ -146,7 +149,7 @@ interface
 {$NOTE TODO Amiga: implement PathConv() in System unit, which works with AnsiString}
 function Unix2AmigaPath(path: ShortString): ShortString; external name 'PATHCONV';
 {$ELSE}
-function Unix2AmigaPath(path: String): String;{$IFDEF USEINLINE}inline;{$ENDIF}
+function Unix2AmigaPath(path: String): String;
 {$ENDIF}
 
 {$if FPC_FULLVERSION < 20701}
@@ -191,7 +194,7 @@ implementation
 {$IFNDEF HASAMIGA}
 { Stub function for Unix2Amiga Path conversion functionality, only available in
   Amiga/MorphOS RTL. I'm open for better solutions. (KB) }
-function Unix2AmigaPath(path: String): String;{$IFDEF USEINLINE}inline;{$ENDIF}
+function Unix2AmigaPath(path: String): String;
 begin
   Unix2AmigaPath:=path;
 end;
@@ -495,6 +498,7 @@ end;
       var
          i : longint;
       begin
+        Result:='';
         setlength(bstoslash,length(s));
         for i:=1to length(s) do
          if s[i]='\' then
@@ -1280,8 +1284,16 @@ end;
 
 
    function  FindExe(const bin:TCmdStr;allowcache:boolean;var foundfile:TCmdStr):boolean;
+     var
+       b : TCmdStr;
      begin
-       FindExe:=FindFileInExeLocations(ChangeFileExt(bin,source_info.exeext),allowcache,foundfile);
+       { change extension only on platforms that use an exe extension, otherwise on OpenBSD
+         'ld.bfd' gets converted to 'ld' }
+       if source_info.exeext<>'' then
+         b:=ChangeFileExt(bin,source_info.exeext)
+       else
+         b:=bin;
+       FindExe:=FindFileInExeLocations(b,allowcache,foundfile);
      end;
 
 
@@ -1299,6 +1311,7 @@ end;
         GetShortName:=n;
 {$ifdef win32}
         hs:=n+#0;
+        hs2:='';
         { may become longer in case of e.g. ".a" -> "a~1" or so }
         setlength(hs2,length(hs)*2);
         i:=Windows.GetShortPathName(@hs[1],@hs2[1],length(hs)*2);

+ 22 - 2
compiler/cgbase.pas

@@ -63,8 +63,6 @@ interface
        TCGNonRefLoc=low(TCGLoc)..pred(LOC_CREFERENCE);
        TCGRefLoc=LOC_CREFERENCE..LOC_REFERENCE;
 
-       { since we have only 16bit offsets, we need to be able to specify the high
-         and lower 16 bits of the address of a symbol of up to 64 bit }
        trefaddr = (
          addr_no,
          addr_full,
@@ -72,6 +70,8 @@ interface
          addr_pic_no_got
          {$IF defined(POWERPC) or defined(POWERPC64) or defined(SPARC) or defined(MIPS) or defined(SPARC64)}
          ,
+         { since we have only 16bit offsets, we need to be able to specify the high
+           and lower 16 bits of the address of a symbol of up to 64 bit }
          addr_low,         // bits 48-63
          addr_high,        // bits 32-47
          {$IF defined(POWERPC64)}
@@ -93,6 +93,14 @@ interface
          addr_low_call,    // counterpart of two above, generate call_hi16 and call_lo16 relocs
          addr_high_call
          {$ENDIF}
+         {$if defined(RISCV32) or defined(RISCV64)}
+         ,
+         addr_hi20,
+         addr_lo12,
+         addr_pcrel_hi20,
+         addr_pcrel_lo12,
+         addr_pcrel
+         {$endif RISCV}
          {$IFDEF AVR}
          ,addr_lo8
          ,addr_lo8_gs
@@ -114,6 +122,18 @@ interface
          ,addr_gdop_hix22
          ,addr_gdop_lox22
          {$endif SPARC64}
+         {$IFDEF ARM}
+         ,addr_gottpoff
+         ,addr_tpoff
+         {$ENDIF}
+         {$IFDEF i386}
+         ,addr_ntpoff
+         ,addr_tlsgd
+         {$ENDIF}
+{$ifdef x86_64}
+          ,addr_tpoff
+          ,addr_tlsgd
+{$endif x86_64}
          );
 
 

+ 74 - 14
compiler/cgobj.pas

@@ -170,6 +170,9 @@ unit cgobj;
              @param(cgpara where the parameter will be stored)
           }
           procedure a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const cgpara : TCGPara);virtual;
+         protected
+          procedure a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation); virtual;
+         public
           {# Pass the value of a parameter, which can be located either in a register or memory location,
              to a routine.
 
@@ -437,6 +440,8 @@ unit cgobj;
 
           { initialize the pic/got register }
           procedure g_maybe_got_init(list: TAsmList); virtual;
+          { initialize the tls register if needed }
+          procedure g_maybe_tls_init(list : TAsmList); virtual;
           { allocallcpuregisters, a_call_name, deallocallcpuregisters sequence }
           procedure g_call(list: TAsmList; const s: string);
           { Generate code to exit an unwind-protected region. The default implementation
@@ -446,6 +451,10 @@ unit cgobj;
             generic version is suitable for 3-address CPUs }
           procedure g_div_const_reg_reg(list:tasmlist; size: TCgSize; a: tcgint; src,dst: tregister); virtual;
 
+          { some CPUs do not support hardware fpu exceptions, this procedure is called after instructions which
+            might set FPU exception related flags, so it has to check these flags if needed and throw an exeception }
+          procedure g_check_for_fpu_exception(list: TAsmList); virtual;
+
          protected
           function g_indirect_sym_load(list:TAsmList;const symname: string; const flags: tindsymflags): tregister;virtual;
        end;
@@ -1123,16 +1132,8 @@ implementation
                 end;
               LOC_REFERENCE,LOC_CREFERENCE:
                 begin
-                   if assigned(location^.next) then
-                     internalerror(2010052906);
-                   reference_reset_base(ref,location^.reference.index,location^.reference.offset,ctempposinvalid,newalignment(cgpara.alignment,cgpara.intsize-sizeleft),[]);
-                   if (size <> OS_NO) and
-                      (tcgsize2size[size] <= sizeof(aint)) then
-                     a_load_ref_ref(list,size,location^.size,tmpref,ref)
-                   else
-                     { use concatcopy, because the parameter can be larger than }
-                     { what the OS_* constants can handle                       }
-                     g_concatcopy(list,tmpref,ref,sizeleft);
+                  reference_reset_base(ref,location^.reference.index,location^.reference.offset,ctempposinvalid,newalignment(cgpara.alignment,cgpara.intsize-sizeleft),[]);
+                  a_load_ref_cgparalocref(list,size,sizeleft,tmpref,ref,cgpara,location);
                 end;
               LOC_MMREGISTER,LOC_CMMREGISTER:
                 begin
@@ -1147,6 +1148,10 @@ implementation
                      else
                        internalerror(2010053101);
                    end;
+                end;
+              LOC_FPUREGISTER,LOC_CFPUREGISTER:
+                begin
+                  a_loadfpu_ref_reg(list,size,location^.size,tmpref,location^.register);
                 end
               else
                 internalerror(2010053111);
@@ -1157,6 +1162,19 @@ implementation
           end;
       end;
 
+    procedure tcg.a_load_ref_cgparalocref(list: TAsmList; sourcesize: tcgsize; sizeleft: tcgint; const ref, paralocref: treference; const cgpara: tcgpara; const location: PCGParaLocation);
+      begin
+        if assigned(location^.next) then
+          internalerror(2010052906);
+        if (sourcesize<>OS_NO) and
+           (tcgsize2size[sourcesize]<=sizeof(aint)) then
+           a_load_ref_ref(list,sourcesize,location^.size,ref,paralocref)
+        else
+          { use concatcopy, because the parameter can be larger than }
+          { what the OS_* constants can handle                       }
+          g_concatcopy(list,ref,paralocref,sizeleft);
+       end;
+
 
     procedure tcg.a_load_loc_cgpara(list : TAsmList;const l:tlocation;const cgpara : TCGPara);
       begin
@@ -1775,10 +1793,14 @@ implementation
                 a:=a and 15;
               OS_8,OS_S8:
                 a:=a and 7;
+              else
+                internalerror(2019050521);
             end;
             if a = 0 then
               op:=OP_NONE;
           end;
+        else
+          ;
         end;
       end;
 
@@ -1921,11 +1943,20 @@ implementation
     procedure tcg.a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference);
       var
         tmpreg : tregister;
+        tmpref : treference;
       begin
+        if assigned(ref.symbol) then
+          begin
+            tmpreg:=getaddressregister(list);
+            a_loadaddr_ref_reg(list,ref,tmpreg);
+            reference_reset_base(tmpref,tmpreg,0,ref.temppos,ref.alignment,[]);
+          end
+        else
+          tmpref:=ref;
         tmpreg:=getintregister(list,size);
-        a_load_ref_reg(list,size,size,ref,tmpreg);
+        a_load_ref_reg(list,size,size,tmpref,tmpreg);
         a_op_const_reg(list,op,size,a,tmpreg);
-        a_load_reg_ref(list,size,size,tmpreg,ref);
+        a_load_reg_ref(list,size,size,tmpreg,tmpref);
       end;
 
 
@@ -1945,9 +1976,18 @@ implementation
     procedure tcg.a_op_reg_ref(list : TAsmList; Op: TOpCG; size: TCGSize;reg: TRegister;  const ref: TReference);
       var
         tmpreg : tregister;
+        tmpref : treference;
       begin
+        if assigned(ref.symbol) then
+          begin
+            tmpreg:=getaddressregister(list);
+            a_loadaddr_ref_reg(list,ref,tmpreg);
+            reference_reset_base(tmpref,tmpreg,0,ref.temppos,ref.alignment,[]);
+          end
+        else
+          tmpref:=ref;
         tmpreg:=getintregister(list,size);
-        a_load_ref_reg(list,size,size,ref,tmpreg);
+        a_load_ref_reg(list,size,size,tmpref,tmpreg);
         if op in [OP_NEG,OP_NOT] then
           begin
             if reg<>NR_NO then
@@ -1956,7 +1996,7 @@ implementation
           end
         else
           a_op_reg_reg(list,op,size,reg,tmpreg);
-        a_load_reg_ref(list,size,size,tmpreg,ref);
+        a_load_reg_ref(list,size,size,tmpreg,tmpref);
       end;
 
 
@@ -2087,6 +2127,8 @@ implementation
                     a_load_const_reg(list,OS_16,0,dst);
                     exit;
                   end;
+                else
+                  ;
               end;
           end;
         OP_SHR:
@@ -2099,9 +2141,13 @@ implementation
                     a_load_const_reg(list,OS_16,0,GetNextReg(dst));
                     exit;
                   end;
+                else
+                  ;
               end;
           end;
 {$endif cpu16bitalu}
+        else
+          ;
       end;
       a_load_reg_reg(list,size,size,src,dst);
       a_op_const_reg(list,op,size,a,dst);
@@ -2751,6 +2797,8 @@ implementation
               { a_load_ref_reg will turn this into a pic-load if needed }
               a_load_ref_reg(list,OS_ADDR,OS_ADDR,ref,result);
             end;
+          else
+            ;
         end;
       end;
 
@@ -2759,6 +2807,12 @@ implementation
       begin
       end;
 
+
+    procedure tcg.g_maybe_tls_init(list: TAsmList);
+      begin
+      end;
+
+
     procedure tcg.g_call(list: TAsmList;const s: string);
       begin
         allocallcpuregisters(list);
@@ -2876,6 +2930,12 @@ implementation
       end;
 
 
+    procedure tcg.g_check_for_fpu_exception(list: TAsmList);
+      begin
+        { empty by default }
+      end;
+
+
 {*****************************************************************************
                                     TCG64
 *****************************************************************************}

+ 9 - 6
compiler/cgutils.pas

@@ -74,7 +74,10 @@ unit cgutils;
          base,
          index       : tregister;
          refaddr     : trefaddr;
-         scalefactor : byte;
+         scalefactor : byte;     
+{$if defined(riscv32) or defined(riscv64)}
+         symboldata  : tlinkedlistitem;
+{$endif riscv32/64}
 {$ifdef arm}
          symboldata  : tlinkedlistitem;
          signindex   : shortint;
@@ -132,15 +135,15 @@ unit cgutils;
 {$endif cpuflags}
             LOC_CONSTANT : (
               case longint of
-{$ifdef cpu64bitalu}
+{$if defined(cpu64bitalu) or defined(cpuhighleveltarget)}
                 1 : (value : Int64);
-{$else cpu64bitalu}
+{$else cpu64bitalu or cpuhighleveltarget}
     {$ifdef FPC_BIG_ENDIAN}
                 1 : (_valuedummy,value : longint);
     {$else FPC_BIG_ENDIAN}
                 1 : (value : longint);
     {$endif FPC_BIG_ENDIAN}
-{$endif cpu64bitalu}
+{$endif cpu64bitalu or cpuhighleveltarget}
                 2 : (value64 : Int64);
               );
             LOC_CREFERENCE,
@@ -161,10 +164,10 @@ unit cgutils;
 {$ifdef cpu64bitalu}
                 { overlay a 128 Bit register type }
                 2 : (register128 : tregister128);
-{$else cpu64bitalu}
+{$else if not defined(cpuhighleveltarget}
                 { overlay a 64 Bit register type }
                 2 : (register64 : tregister64);
-{$endif cpu64bitalu}
+{$endif cpu64bitalu and not cpuhighleveltarget}
 {$ifdef cpu8bitalu}
                 3 : (registers : array[0..3] of tregister);
 {$endif cpu8bitalu}

+ 5 - 0
compiler/compinnr.pas

@@ -117,6 +117,7 @@ type
      in_not_assign_x      = 95,
      in_gettypekind_x     = 96,
      in_faraddr_x         = 97,
+     in_volatile_x        = 98,
 
 { Internal constant functions }
      in_const_sqr        = 100,
@@ -164,6 +165,10 @@ type
 
      { SSE }
 
+{$if defined(X86)}
+     ,
+     {$i x86/cx86innr.inc}
+{$endif }
 {$if defined(AVR)}
      ,
      {$i ccpuinnr.inc}

+ 2 - 2
compiler/cresstr.pas

@@ -119,7 +119,7 @@ uses
 
     Constructor Tresourcestrings.Create;
       begin
-        List:=TLinkedList.Create;
+        List:=TAsmList.Create;
       end;
 
 
@@ -308,7 +308,7 @@ uses
         resstrs.RegisterResourceStrings;
         if not resstrs.List.Empty then
           begin
-            current_module.flags:=current_module.flags or uf_has_resourcestrings;
+            include(current_module.moduleflags,mf_has_resourcestrings);
             resstrs.CreateResourceStringData;
             resstrs.WriteRSJFile;
           end;

+ 1 - 0
compiler/cstreams.pas

@@ -333,6 +333,7 @@ implementation
     TheSize : Longint;
     P : PByte ;
   begin
+    Result:='';
     ReadBuffer (TheSize,SizeOf(TheSize));
     SetLength(Result,TheSize);
     // Illegal typecast if no AnsiStrings defined.

+ 151 - 8
compiler/cutils.pas

@@ -42,9 +42,11 @@ interface
     {# Returns the minimal value between @var(a) and @var(b) }
     function min(a,b : longint) : longint;{$ifdef USEINLINE}inline;{$endif}
     function min(a,b : int64) : int64;{$ifdef USEINLINE}inline;{$endif}
+    function min(a,b : qword) : qword;{$ifdef USEINLINE}inline;{$endif}
     {# Returns the maximum value between @var(a) and @var(b) }
     function max(a,b : longint) : longint;{$ifdef USEINLINE}inline;{$endif}
     function max(a,b : int64) : int64;{$ifdef USEINLINE}inline;{$endif}
+    function max(a,b : qword) : qword;{$ifdef USEINLINE}inline;{$endif}
 
     { These functions are intenionally put here and not in the constexp unit.
       Since Tconstexprint may be automatically converted to int, which causes
@@ -69,6 +71,8 @@ interface
     function reverse_byte(b: byte): byte;
     {# Return @var(w) with the bit order reversed }
     function reverse_word(w: word): word;
+    {# Return @var(l) with the bit order reversed }
+    function reverse_longword(l: longword): longword;
 
     function next_prime(l: longint): longint;
 
@@ -140,7 +144,8 @@ interface
 
     { allocates mem for a copy of s, copies s to this mem and returns }
     { a pointer to this mem                                           }
-    function stringdup(const s : string) : pshortstring;{$ifdef USEINLINE}inline;{$endif}
+    function stringdup(const s : shortstring) : pshortstring;{$ifdef USEINLINE}inline;{$endif}
+    function stringdup(const s : ansistring) : pshortstring;{$ifdef USEINLINE}inline;{$endif}
 
     {# Allocates memory for the string @var(s) and copies s as zero
        terminated string to that allocated memory and returns a pointer
@@ -175,6 +180,11 @@ interface
 
     Function nextafter(x,y:double):double;
 
+    function LengthUleb128(a: qword) : byte;
+    function LengthSleb128(a: int64) : byte;
+    function EncodeUleb128(a: qword;out buf) : byte;
+    function EncodeSleb128(a: int64;out buf) : byte;
+
   { hide Sysutils.ExecuteProcess in units using this one after SysUtils}
   const
     ExecuteProcess = 'Do not use' deprecated 'Use cfileutil.RequotedExecuteProcess instead, ExecuteProcess cannot deal with single quotes as used by Unix command lines';
@@ -225,6 +235,18 @@ implementation
       end;
 
 
+    function min(a,b : qword) : qword;
+    {
+      return the minimal of a and b
+    }
+      begin
+         if a<=b then
+           min:=a
+         else
+           min:=b;
+      end;
+
+
     function max(a,b : longint) : longint;{$ifdef USEINLINE}inline;{$endif}
     {
       return the maximum of a and b
@@ -249,6 +271,18 @@ implementation
       end;
 
 
+    function max(a,b : qword) : qword;{$ifdef USEINLINE}inline;{$endif}
+    {
+      return the maximum of a and b
+    }
+      begin
+         if a>=b then
+           max:=a
+         else
+           max:=b;
+      end;
+
+
     function max(const a,b : Tconstexprint) : Tconstexprint;{$ifdef USEINLINE}inline;{$endif}
     {
       return the maximum of a and b
@@ -292,6 +326,21 @@ implementation
         TWordRec(reverse_word).lo := reverse_byte(TWordRec(w).hi);
       end;
 
+
+    function reverse_longword(l: longword): longword;
+      type
+        TLongWordRec = packed record
+          b: array[0..3] of Byte;
+        end;
+
+      begin
+        TLongWordRec(reverse_longword).b[0] := reverse_byte(TLongWordRec(l).b[3]);
+        TLongWordRec(reverse_longword).b[1] := reverse_byte(TLongWordRec(l).b[2]);
+        TLongWordRec(reverse_longword).b[2] := reverse_byte(TLongWordRec(l).b[1]);
+        TLongWordRec(reverse_longword).b[3] := reverse_byte(TLongWordRec(l).b[0]);
+      end;
+
+
     function align(i,a:longint):longint;{$ifdef USEINLINE}inline;{$endif}
     {
       return value <i> aligned <a> boundary
@@ -303,9 +352,9 @@ implementation
         else
           begin
             if i<0 then
-              result:=((i-a+1) div a) * a
+              result:=((i+1-a) div a) * a
             else
-              result:=((i+a-1) div a) * a;
+              result:=((i-1+a) div a) * a;
           end;
       end;
 
@@ -321,9 +370,9 @@ implementation
         else
           begin
             if i<0 then
-              result:=((i-a+1) div a) * a
+              result:=((i+1-a) div a) * a
             else
-              result:=((i+a-1) div a) * a;
+              result:=((i-1+a) div a) * a;
           end;
       end;
 
@@ -334,10 +383,10 @@ implementation
     }
       begin
         { for 0 and 1 no aligning is needed }
-        if a<=1 then
+        if (a<=1) or (i=0) then
           result:=i
         else
-          result:=((i+a-1) div a) * a;
+          result:=((i-1+a) div a) * a;
       end;
 
 
@@ -620,6 +669,7 @@ implementation
       var
         i  : longint;
       begin
+        Result:='';
         setlength(upper,length(s));
         for i:=1 to length(s) do
           upper[i]:=uppertbl[s[i]];
@@ -655,6 +705,7 @@ implementation
       var
         i : longint;
       begin
+        Result:='';
         setlength(lower,length(s));
         for i:=1 to length(s) do
           lower[i]:=lowertbl[s[i]];
@@ -1060,6 +1111,7 @@ implementation
         t: string;
         ch: Char;
     begin
+      t:='';
       DePascalQuote:= false;
       len:= length(s);
       if (len >= 1) and (s[1] = '''') then
@@ -1175,13 +1227,19 @@ implementation
       end;
 
 
-    function stringdup(const s : string) : pshortstring;{$ifdef USEINLINE}inline;{$endif}
+    function stringdup(const s : shortstring) : pshortstring;{$ifdef USEINLINE}inline;{$endif}
       begin
          getmem(result,length(s)+1);
          result^:=s;
       end;
 
 
+    function stringdup(const s : ansistring) : pshortstring;{$ifdef USEINLINE}inline;{$endif}
+      begin
+         getmem(result,length(s)+1);
+         result^:=s;
+      end;
+
     function CompareStr(const S1, S2: string): Integer;
       var
         count, count1, count2: integer;
@@ -1578,6 +1636,91 @@ implementation
     end;
 
 
+    function LengthUleb128(a: qword) : byte;
+      begin
+        result:=0;
+        repeat
+          a := a shr 7;
+          inc(result);
+          if a=0 then
+            break;
+        until false;
+      end;
+
+
+    function LengthSleb128(a: int64) : byte;
+      var
+        b, size: byte;
+        more: boolean;
+      begin
+        more := true;
+        size := sizeof(a)*8;
+        result:=0;
+        repeat
+          b := a and $7f;
+          a := SarInt64(a, 7);
+
+          if (
+            ((a = 0) and (b and $40 = 0)) or
+            ((a = -1) and (b and $40 <> 0))
+          ) then
+            more := false;
+          inc(result);
+          if not(more) then
+            break;
+        until false;
+      end;
+
+
+    function EncodeUleb128(a: qword;out buf) : byte;
+      var
+        b: byte;
+        pbuf : pbyte;
+      begin
+        result:=0;
+        pbuf:=@buf;
+        repeat
+          b := a and $7f;
+          a := a shr 7;
+          if a<>0 then
+            b := b or $80;
+          pbuf^:=b;
+          inc(pbuf);
+          inc(result);
+          if a=0 then
+            break;
+        until false;
+      end;
+
+
+    function EncodeSleb128(a: int64;out buf) : byte;
+      var
+        b, size: byte;
+        more: boolean;
+        pbuf : pbyte;
+      begin
+        more := true;
+        size := sizeof(a)*8;
+        result:=0;
+        pbuf:=@buf;
+        repeat
+          b := a and $7f;
+          a := SarInt64(a, 7);
+
+          if (
+            ((a = 0) and (b and $40 = 0)) or
+            ((a = -1) and (b and $40 <> 0))
+          ) then
+            more := false
+          else
+            b := b or $80;
+          pbuf^:=b;
+          inc(pbuf);
+          inc(result);
+        until not more;
+      end;
+
+
 initialization
   internalerrorproc:=@defaulterror;
   initupperlower;

+ 10 - 2
compiler/dbgbase.pas

@@ -339,8 +339,6 @@ implementation
                       else
                         internalerror(2012072402);
                     end;
-                  else
-                    internalerror(200610054);
                 end;
               end;
             looplist.clear;
@@ -476,6 +474,8 @@ implementation
             list.concat(tai_comment.Create(strpnew('Defs - Begin Staticsymtable')));
           globalsymtable :
             list.concat(tai_comment.Create(strpnew('Defs - Begin unit '+st.name^+' has index '+tostr(st.moduleid))));
+          else
+            ;
         end;
         repeat
           nonewadded:=true;
@@ -494,6 +494,8 @@ implementation
             list.concat(tai_comment.Create(strpnew('Defs - End Staticsymtable')));
           globalsymtable :
             list.concat(tai_comment.Create(strpnew('Defs - End unit '+st.name^+' has index '+tostr(st.moduleid))));
+          else
+            ;
         end;
       end;
 
@@ -528,6 +530,8 @@ implementation
             list.concat(tai_comment.Create(strpnew('Syms - Begin Staticsymtable')));
           globalsymtable :
             list.concat(tai_comment.Create(strpnew('Syms - Begin unit '+st.name^+' has index '+tostr(st.moduleid))));
+          else
+            ;
         end;
         for i:=0 to st.SymList.Count-1 do
           begin
@@ -545,6 +549,8 @@ implementation
             list.concat(tai_comment.Create(strpnew('Syms - End Staticsymtable')));
           globalsymtable :
             list.concat(tai_comment.Create(strpnew('Syms - End unit '+st.name^+' has index '+tostr(st.moduleid))));
+          else
+            ;
         end;
       end;
 
@@ -568,6 +574,8 @@ implementation
                 begin
                   write_symtable_procdefs(list,tabstractrecorddef(def).symtable);
                 end;
+              else
+                ;
             end;
           end;
       end;

+ 4 - 0
compiler/dbgcodeview.pas

@@ -229,8 +229,12 @@ implementation
                       inc(nolineinfolevel);
                     mark_NoLineInfoEnd:
                       dec(nolineinfolevel);
+                    else
+                      ;
                   end;
                 end;
+              else
+                ;
             end;
 
             { OMF LINNUM records do not support multiple source files }

+ 328 - 53
compiler/dbgdwarf.pas

@@ -41,6 +41,7 @@ interface
 
     uses
       cclasses,globtype,
+      cgbase,
       aasmbase,aasmtai,aasmdata,
       symbase,symconst,symtype,symdef,symsym,
       finput,
@@ -196,6 +197,11 @@ interface
         DW_AT_HP_all_variables_modifiable := $2019,
         DW_AT_HP_linkage_name := $201a,DW_AT_HP_prof_flags := $201b,
 
+        { WATCOM extensions. }
+        DW_AT_WATCOM_memory_model := $2082,
+        DW_AT_WATCOM_references_start := $2083,
+        DW_AT_WATCOM_parm_entry := $2084,
+
         { GNU extensions.   }
         DW_AT_sf_names := $2101,DW_AT_src_info := $2102,
         DW_AT_mac_info := $2103,DW_AT_src_coords := $2104,
@@ -250,6 +256,17 @@ interface
         DW_ADDR_far32 := 5
       );
 
+      { values of DW_AT_WATCOM_memory_model }
+      Tdwarf_watcom_memory_model = (
+        DW_WATCOM_MEMORY_MODEL_none := 0,
+        DW_WATCOM_MEMORY_MODEL_flat := 1,
+        DW_WATCOM_MEMORY_MODEL_small := 2,
+        DW_WATCOM_MEMORY_MODEL_medium := 3,
+        DW_WATCOM_MEMORY_MODEL_compact := 4,
+        DW_WATCOM_MEMORY_MODEL_large := 5,
+        DW_WATCOM_MEMORY_MODEL_huge := 6
+      );
+
       TDwarfFile = record
         Index: integer;
         Name: PChar;
@@ -345,6 +362,8 @@ interface
 
         function get_def_dwarf_labs(def:tdef): PDwarfHashSetItem;
 
+        function is_fbreg(reg:tregister):boolean;
+
         { Convenience version of the method below, so the compiler creates the
           tvarrec for us (must only pass one element in the last parameter).  }
         procedure append_attribute(attr: tdwarf_attribute; form: tdwarf_form; const values: array of const);
@@ -357,6 +376,12 @@ interface
         procedure append_labelentry_dataptr_abs(attr : tdwarf_attribute;sym : tasmsymbol);
         procedure append_labelentry_dataptr_rel(attr : tdwarf_attribute;sym,endsym : tasmsymbol);
         procedure append_labelentry_dataptr_common(attr : tdwarf_attribute);
+        procedure append_pointerclass(list:TAsmList;def:tpointerdef);
+        procedure append_proc_frame_base(list:TAsmList;def:tprocdef);
+{$ifdef i8086}
+        procedure append_seg_name(const name:string);
+        procedure append_seg_reg(const segment_register:tregister);
+{$endif i8086}
 
         procedure beforeappenddef(list:TAsmList;def:tdef);override;
         procedure afterappenddef(list:TAsmList;def:tdef);override;
@@ -458,9 +483,9 @@ implementation
     uses
       sysutils,cutils,cfileutl,constexp,
       version,globals,verbose,systems,
-      cpubase,cpuinfo,cgbase,paramgr,
+      cpubase,cpuinfo,paramgr,
       fmodule,
-      defutil,symtable,ppu
+      defutil,symtable,symcpu,ppu
 {$ifdef OMFOBJSUPPORT}
       ,dbgcodeview
 {$endif OMFOBJSUPPORT}
@@ -679,6 +704,9 @@ implementation
       DW_LNE_end_sequence = $01;
       DW_LNE_set_address  = $02;
       DW_LNE_define_file  = $03;
+      { DW_LNE_set_segment is a non-standard Open Watcom extension. It might
+        create conflicts with future versions of the DWARF standard. }
+      DW_LNE_set_segment  = $04;
       DW_LNE_lo_user      = $80;
       DW_LNE_hi_user      = $ff;
 
@@ -1014,6 +1042,16 @@ implementation
           end;
       end;
 
+    function TDebugInfoDwarf.is_fbreg(reg: tregister): boolean;
+      begin
+{$ifdef i8086}
+        result:=reg=NR_BP;
+{$else i8086}
+        { always return false, because we don't emit DW_AT_frame_base attributes yet }
+        result:=false;
+{$endif i8086}
+      end;
+
     function TDebugInfoDwarf.def_dwarf_lab(def: tdef): tasmsymbol;
       begin
         result:=get_def_dwarf_labs(def)^.lab;
@@ -1072,6 +1110,8 @@ implementation
             appendsym_property(TAsmList(arg),tpropertysym(p));
           constsym:
             appendsym_const_member(TAsmList(arg),tconstsym(p),true);
+          else
+            ;
         end;
       end;
 
@@ -1283,7 +1323,12 @@ implementation
     procedure TDebugInfoDwarf.append_labelentry_addr_ref(attr : tdwarf_attribute;sym : tasmsymbol);
       begin
         AddConstToAbbrev(ord(DW_FORM_ref_addr));
-        current_asmdata.asmlists[al_dwarf_info].concat(tai_const.create_type_sym(aitconst_ptr_unaligned,sym))
+{$ifdef i8086}
+        { DW_FORM_ref_addr is treated as 32-bit by Open Watcom on i8086 }
+        current_asmdata.asmlists[al_dwarf_info].concat(tai_const.Create_type_sym(aitconst_32bit_unaligned,sym));
+{$else i8086}
+        current_asmdata.asmlists[al_dwarf_info].concat(tai_const.create_type_sym(aitconst_ptr_unaligned,sym));
+{$endif i8086}
       end;
 
     procedure TDebugInfoDwarf.append_labelentry_ref(attr : tdwarf_attribute;sym : tasmsymbol);
@@ -1308,6 +1353,95 @@ implementation
           AddConstToAbbrev(ord(DW_FORM_data4));
       end;
 
+    procedure TDebugInfoDwarf.append_pointerclass(list: TAsmList;
+      def: tpointerdef);
+      begin
+{$ifdef i8086}
+        case tcpupointerdef(def).x86pointertyp of
+          x86pt_near,
+          { todo: is there a way to specify these somehow? }
+          x86pt_near_cs,x86pt_near_ds,x86pt_near_ss,
+          x86pt_near_es,x86pt_near_fs,x86pt_near_gs:
+            append_attribute(DW_AT_address_class,DW_FORM_data1,[DW_ADDR_near16]);
+          x86pt_far:
+            append_attribute(DW_AT_address_class,DW_FORM_data1,[DW_ADDR_far16]);
+          x86pt_huge:
+            append_attribute(DW_AT_address_class,DW_FORM_data1,[DW_ADDR_huge16]);
+        end;
+{$else i8086}
+        { Theoretically, we could do this, but it might upset some debuggers, }
+        { even though it's part of the DWARF standard. }
+        { append_attribute(DW_AT_address_class,DW_FORM_data1,[DW_ADDR_none]); }
+{$endif i8086}
+      end;
+
+    procedure TDebugInfoDwarf.append_proc_frame_base(list: TAsmList;
+      def: tprocdef);
+{$ifdef i8086}
+      var
+        dreg: longint;
+        blocksize: longint;
+        templist: TAsmList;
+      begin
+        dreg:=dwarf_reg(NR_BP);
+        templist:=TAsmList.create;
+        if dreg<=31 then
+          begin
+            templist.concat(tai_const.create_8bit(ord(DW_OP_reg0)+dreg));
+            blocksize:=1;
+          end
+        else
+          begin
+            templist.concat(tai_const.create_8bit(ord(DW_OP_regx)));
+            templist.concat(tai_const.create_uleb128bit(dreg));
+            blocksize:=1+Lengthuleb128(dreg);
+          end;
+        append_block1(DW_AT_frame_base,blocksize);
+        current_asmdata.asmlists[al_dwarf_info].concatlist(templist);
+        templist.free;
+      end;
+{$else i8086}
+      begin
+        { problem: base reg isn't known here
+          DW_AT_frame_base,DW_FORM_block1,1
+        }
+      end;
+{$endif i8086}
+
+
+{$ifdef i8086}
+    procedure TDebugInfoDwarf.append_seg_name(const name:string);
+      begin
+        append_block1(DW_AT_segment,3);
+        current_asmdata.asmlists[al_dwarf_info].concat(tai_const.create_8bit(ord(DW_OP_const2u)));
+        current_asmdata.asmlists[al_dwarf_info].concat(tai_const.Create_seg_name(name));
+      end;
+
+    procedure TDebugInfoDwarf.append_seg_reg(const segment_register: tregister);
+      var
+        dreg: longint;
+        blocksize: longint;
+        templist: TAsmList;
+      begin
+        dreg:=dwarf_reg(segment_register);
+        templist:=TAsmList.create;
+        if dreg<=31 then
+          begin
+            templist.concat(tai_const.create_8bit(ord(DW_OP_reg0)+dreg));
+            blocksize:=1;
+          end
+        else
+          begin
+            templist.concat(tai_const.create_8bit(ord(DW_OP_regx)));
+            templist.concat(tai_const.create_uleb128bit(dreg));
+            blocksize:=1+Lengthuleb128(dreg);
+          end;
+        append_block1(DW_AT_segment,blocksize);
+        current_asmdata.asmlists[al_dwarf_info].concatlist(templist);
+        templist.free;
+      end;
+{$endif i8086}
+
 
     procedure TDebugInfoDwarf.append_labelentry_dataptr_abs(attr : tdwarf_attribute;sym : tasmsymbol);
       begin
@@ -1355,8 +1489,12 @@ implementation
         sign         : tdwarf_type;
         signform     : tdwarf_form;
         fullbytesize : byte;
+        ordtype      : tordtype;
       begin
-        case def.ordtype of
+        ordtype:=def.ordtype;
+        if ordtype=customint then
+          ordtype:=range_to_basetype(def.low,def.high);
+        case ordtype of
           s8bit,
           s16bit,
           s32bit,
@@ -1390,7 +1528,7 @@ implementation
                     basedef:=s16inttype
                   else
                     basedef:=u16inttype;
-                4:
+                3,4:
                   if (sign=DW_ATE_signed) then
                     basedef:=s32inttype
                   else
@@ -1462,7 +1600,7 @@ implementation
                 ]);
               finish_entry;
             end;
-          pasbool8 :
+          pasbool1 :
             begin
               append_entry(DW_TAG_base_type,false,[
                 DW_AT_name,DW_FORM_string,'Boolean'#0,
@@ -1471,6 +1609,15 @@ implementation
                 ]);
               finish_entry;
             end;
+          pasbool8 :
+            begin
+              append_entry(DW_TAG_base_type,false,[
+                DW_AT_name,DW_FORM_string,'Boolean8'#0,
+                DW_AT_encoding,DW_FORM_data1,DW_ATE_boolean,
+                DW_AT_byte_size,DW_FORM_data1,1
+                ]);
+              finish_entry;
+            end;
           bool8bit :
             begin
               append_entry(DW_TAG_base_type,false,[
@@ -1792,6 +1939,7 @@ implementation
     procedure TDebugInfoDwarf.appenddef_pointer(list:TAsmList;def:tpointerdef);
       begin
         append_entry(DW_TAG_pointer_type,false,[]);
+        append_pointerclass(list,def);
         if not(is_voidpointer(def)) then
           append_labelentry_ref(DW_AT_type,def_dwarf_lab(def.pointeddef));
         finish_entry;
@@ -2117,7 +2265,7 @@ implementation
 
       var
         procendlabel   : tasmlabel;
-        procentry      : string;
+        procentry,s    : string;
         cc             : Tdwarf_calling_convention;
         st             : tsymtable;
         vmtoffset      : pint;
@@ -2165,20 +2313,25 @@ implementation
         current_asmdata.asmlists[al_dwarf_info].concat(tai_comment.Create(strpnew('Procdef '+def.fullprocname(true))));
         if not is_objc_class_or_protocol(def.struct) then
           append_entry(DW_TAG_subprogram,true,
-            [DW_AT_name,DW_FORM_string,symname(def.procsym, false)+#0
-            { data continues below }
-            { problem: base reg isn't known here
-              DW_AT_frame_base,DW_FORM_block1,1
-            }
-            ])
+            [DW_AT_name,DW_FORM_string,symname(def.procsym, false)+#0])
         else
           append_entry(DW_TAG_subprogram,true,
-            [DW_AT_name,DW_FORM_string,def.mangledname+#0
-            { data continues below }
-            { problem: base reg isn't known here
-              DW_AT_frame_base,DW_FORM_block1,1
-            }
-            ]);
+            [DW_AT_name,DW_FORM_string,def.mangledname+#0]);
+
+        if (ds_dwarf_cpp in current_settings.debugswitches) and (def.owner.symtabletype in [objectsymtable,recordsymtable]) then
+          begin
+            { If C++ emulation is enabled, add DW_AT_linkage_name attribute for methods.
+              LLDB uses it to display fully qualified method names.
+              Add a simple C++ mangled name without params to achieve at least "Class::Method()"
+              instead of just "Method" in LLDB. }
+            s:=tabstractrecorddef(def.owner.defowner).objrealname^;
+            procentry:=Format('_ZN%d%s', [Length(s), s]);
+            s:=symname(def.procsym, false);
+            procentry:=Format('%s%d%sEv'#0, [procentry, Length(s), s]);
+            append_attribute(DW_AT_linkage_name,DW_FORM_string, [procentry]);
+          end;
+
+        append_proc_frame_base(list,def);
 
         { Append optional flags. }
 
@@ -2188,6 +2341,13 @@ implementation
         cc:=dwarf_calling_convention(def);
         if (cc<>DW_CC_normal) then
           append_attribute(DW_AT_calling_convention,DW_FORM_data1,[ord(cc)]);
+{$ifdef i8086}
+        { Call model (near or far). Open Watcom compatible. }
+        if tcpuprocdef(def).is_far then
+          append_attribute(DW_AT_address_class,DW_FORM_data1,[DW_ADDR_far16])
+        else
+          append_attribute(DW_AT_address_class,DW_FORM_data1,[DW_ADDR_none]);
+{$endif i8086}
         { Externally visible.  }
         if (po_global in def.procoptions) and
            (def.parast.symtablelevel<=normal_function_level) then
@@ -2234,6 +2394,9 @@ implementation
             else
               procentry := def.mangledname;
 
+{$ifdef i8086}
+            append_seg_name(procentry);
+{$endif i8086}
             append_labelentry(DW_AT_low_pc,current_asmdata.RefAsmSymbol(procentry,AT_FUNCTION));
             append_labelentry(DW_AT_high_pc,procendlabel);
 
@@ -2241,8 +2404,18 @@ implementation
               begin
                 current_asmdata.asmlists[al_dwarf_aranges].Concat(
                   tai_const.create_type_sym(aitconst_ptr_unaligned,current_asmdata.RefAsmSymbol(procentry,AT_FUNCTION)));
+{$ifdef i8086}
+                { bits 16..31 of the offset }
+                current_asmdata.asmlists[al_dwarf_aranges].concat(tai_const.Create_16bit_unaligned(0));
+                { segment }
+                current_asmdata.asmlists[al_dwarf_aranges].concat(tai_const.Create_seg_name(procentry));
+{$endif i8086}
                 current_asmdata.asmlists[al_dwarf_aranges].Concat(
                   tai_const.Create_rel_sym(aitconst_ptr_unaligned,current_asmdata.RefAsmSymbol(procentry,AT_FUNCTION),procendlabel));
+{$ifdef i8086}
+                { bits 16..31 of length }
+                current_asmdata.asmlists[al_dwarf_aranges].concat(tai_const.Create_16bit_unaligned(0));
+{$endif i8086}
               end;
           end;
 
@@ -2340,7 +2513,7 @@ implementation
             sl_absolutetype,
             sl_typeconv:
               begin
-                currdef:=tfieldvarsym(symlist^.sym).vardef;
+                currdef:=symlist^.def;
                 { ignore, these don't change the address }
               end;
             sl_vec:
@@ -2387,7 +2560,12 @@ implementation
         blocksize,size_of_int : longint;
         tag : tdwarf_tag;
         has_high_reg : boolean;
-        dreg,dreghigh : byte;
+        dreg,dreghigh : shortint;
+{$ifdef i8086}
+        has_segment_sym_name : boolean=false;
+        segment_sym_name : TSymStr='';
+        segment_reg: TRegister=NR_NO;
+{$endif i8086}
       begin
         blocksize:=0;
         dreghigh:=0;
@@ -2413,15 +2591,19 @@ implementation
           LOC_FPUREGISTER,
           LOC_CFPUREGISTER :
             begin
-              dreg:=dwarf_reg(sym.localloc.register);
+              { dwarf_reg_no_error might return -1
+                in case the register variable has been optimized out }
+              dreg:=dwarf_reg_no_error(sym.localloc.register);
               has_high_reg:=(sym.localloc.loc in [LOC_REGISTER,LOC_CREGISTER]) and (sym.localloc.registerhi<>NR_NO);
               if has_high_reg then
-                dreghigh:=dwarf_reg(sym.localloc.registerhi);
+                dreghigh:=dwarf_reg_no_error(sym.localloc.registerhi);
+              if dreghigh=-1 then
+                has_high_reg:=false;
               if (sym.localloc.loc in [LOC_REGISTER,LOC_CREGISTER]) and
                  (sym.typ=paravarsym) and
                   paramanager.push_addr_param(sym.varspez,sym.vardef,tprocdef(sym.owner.defowner).proccalloption) and
                   not(vo_has_local_copy in sym.varoptions) and
-                  not is_open_string(sym.vardef) then
+                  not is_open_string(sym.vardef) and (dreg>=0) then
                 begin
                   templist.concat(tai_const.create_8bit(ord(DW_OP_bregx)));
                   templist.concat(tai_const.create_uleb128bit(dreg));
@@ -2447,7 +2629,7 @@ implementation
                       templist.concat(tai_const.create_uleb128bit(size_of_int));
                       blocksize:=blocksize+1+Lengthuleb128(size_of_int);
                     end
-                  else
+                  else if (dreg>=0) then
                     begin
                       templist.concat(tai_const.create_8bit(ord(DW_OP_regx)));
                       templist.concat(tai_const.create_uleb128bit(dreg));
@@ -2475,6 +2657,10 @@ implementation
                         templist.concat(tai_const.create_8bit(ord(DW_OP_addr)));
                         templist.concat(tai_const.Create_type_name(aitconst_ptr_unaligned,sym.mangledname,offset));
                         blocksize:=1+sizeof(puint);
+{$ifdef i8086}
+                        segment_sym_name:=sym.mangledname;
+                        has_segment_sym_name:=true;
+{$endif i8086}
                       end;
                   end;
                 paravarsym,
@@ -2486,10 +2672,32 @@ implementation
                     }
                     if sym.localloc.loc<> LOC_INVALID then
                       begin
-                        dreg:=dwarf_reg(sym.localloc.reference.base);
-                        templist.concat(tai_const.create_8bit(ord(DW_OP_breg0)+dreg));
-                        templist.concat(tai_const.create_sleb128bit(sym.localloc.reference.offset+offset));
-                        blocksize:=1+Lengthsleb128(sym.localloc.reference.offset);
+                        if is_fbreg(sym.localloc.reference.base) then
+                          begin
+                            templist.concat(tai_const.create_8bit(ord(DW_OP_fbreg)));
+                            templist.concat(tai_const.create_sleb128bit(sym.localloc.reference.offset+offset));
+                            blocksize:=1+Lengthsleb128(sym.localloc.reference.offset+offset);
+                          end
+                        else
+                          begin
+                            dreg:=dwarf_reg(sym.localloc.reference.base);
+                            if dreg<=31 then
+                              begin
+                                templist.concat(tai_const.create_8bit(ord(DW_OP_breg0)+dreg));
+                                templist.concat(tai_const.create_sleb128bit(sym.localloc.reference.offset+offset));
+                                blocksize:=1+Lengthsleb128(sym.localloc.reference.offset+offset);
+                              end
+                            else
+                              begin
+                                templist.concat(tai_const.create_8bit(ord(DW_OP_bregx)));
+                                templist.concat(tai_const.create_uleb128bit(dreg));
+                                templist.concat(tai_const.create_sleb128bit(sym.localloc.reference.offset+offset));
+                                blocksize:=1+Lengthuleb128(dreg)+LengthSleb128(sym.localloc.reference.offset+offset);
+                              end;
+                          end;
+{$ifdef i8086}
+                        segment_reg:=sym.localloc.reference.segment;
+{$endif i8086}
 {$ifndef gdb_supports_DW_AT_variable_parameter}
                         { Parameters which are passed by reference. (var and the like)
                           Hide the reference-pointer and dereference the pointer
@@ -2591,6 +2799,12 @@ implementation
         if (vo_is_self in sym.varoptions) then
           append_attribute(DW_AT_artificial,DW_FORM_flag,[true]);
         append_labelentry_ref(DW_AT_type,def_dwarf_lab(def));
+{$ifdef i8086}
+        if has_segment_sym_name then
+          append_seg_name(segment_sym_name)
+        else if segment_reg<>NR_NO then
+          append_seg_reg(segment_reg);
+{$endif i8086}
 
         templist.free;
 
@@ -2952,8 +3166,6 @@ implementation
               templist.free;
               exit;
             end;
-          else
-            internalerror(2013120111);
         end;
 
         append_entry(DW_TAG_variable,false,[
@@ -3173,7 +3385,7 @@ implementation
         bind: tasmsymbind;
         lang: tdwarf_source_language;
       begin
-        current_module.flags:=current_module.flags or uf_has_dwarf_debuginfo;
+        include(current_module.moduleflags,mf_has_dwarf_debuginfo);
         storefilepos:=current_filepos;
         current_filepos:=current_module.mainfilepos;
 
@@ -3230,10 +3442,20 @@ implementation
                 current_asmdata.DefineAsmSymbol(target_asm.labelprefix+'debug_infosection0',AB_LOCAL,AT_METADATA,voidpointertype),
                 current_asmdata.DefineAsmSymbol(target_asm.labelprefix+'debug_info0',AB_LOCAL,AT_METADATA,voidpointertype)));
 
+{$ifdef i8086}
+            { address_size }
+            current_asmdata.asmlists[al_dwarf_aranges].concat(tai_const.create_8bit(4));
+            { segment_size }
+            current_asmdata.asmlists[al_dwarf_aranges].concat(tai_const.create_8bit(2));
+            { no alignment/padding bytes on i8086 for Open Watcom compatibility }
+{$else i8086}
+            { address_size }
             current_asmdata.asmlists[al_dwarf_aranges].concat(tai_const.create_8bit(sizeof(pint)));
+            { segment_size }
             current_asmdata.asmlists[al_dwarf_aranges].concat(tai_const.create_8bit(0));
             { alignment }
             current_asmdata.asmlists[al_dwarf_aranges].concat(tai_const.create_32bit_unaligned(0));
+{$endif i8086}
 
             { start ranges section }
             new_section(current_asmdata.asmlists[al_dwarf_ranges],sec_debug_ranges,'',0);
@@ -3274,6 +3496,22 @@ implementation
           DW_AT_language,DW_FORM_data1,lang,
           DW_AT_identifier_case,DW_FORM_data1,DW_ID_case_insensitive]);
 
+{$ifdef i8086}
+        case current_settings.x86memorymodel of
+          mm_tiny,
+          mm_small:
+            append_attribute(DW_AT_WATCOM_memory_model,DW_FORM_data1,[DW_WATCOM_MEMORY_MODEL_small]);
+          mm_medium:
+            append_attribute(DW_AT_WATCOM_memory_model,DW_FORM_data1,[DW_WATCOM_MEMORY_MODEL_medium]);
+          mm_compact:
+            append_attribute(DW_AT_WATCOM_memory_model,DW_FORM_data1,[DW_WATCOM_MEMORY_MODEL_compact]);
+          mm_large:
+            append_attribute(DW_AT_WATCOM_memory_model,DW_FORM_data1,[DW_WATCOM_MEMORY_MODEL_large]);
+          mm_huge:
+            append_attribute(DW_AT_WATCOM_memory_model,DW_FORM_data1,[DW_WATCOM_MEMORY_MODEL_huge]);
+        end;
+{$endif i8086}
+
         { reference to line info section }
         if not(tf_dwarf_relative_addresses in target_info.flags) then
           append_labelentry_dataptr_abs(DW_AT_stmt_list,current_asmdata.DefineAsmSymbol(target_asm.labelprefix+'debug_line0',AB_LOCAL,AT_METADATA,voidpointertype))
@@ -3340,8 +3578,19 @@ implementation
         if not(target_info.system in systems_darwin) then
           begin
             { end of aranges table }
+{$ifdef i8086}
+            { 32-bit offset }
+            current_asmdata.asmlists[al_dwarf_aranges].concat(tai_const.Create_32bit_unaligned(0));
+            { 16-bit segment }
+            current_asmdata.asmlists[al_dwarf_aranges].concat(tai_const.Create_16bit_unaligned(0));
+            { 32-bit length }
+            current_asmdata.asmlists[al_dwarf_aranges].concat(tai_const.Create_32bit_unaligned(0));
+{$else i8086}
+            { offset }
             current_asmdata.asmlists[al_dwarf_aranges].concat(tai_const.Create_aint(0));
+            { length }
             current_asmdata.asmlists[al_dwarf_aranges].concat(tai_const.Create_aint(0));
+{$endif i8086}
             current_asmdata.asmlists[al_dwarf_aranges].concat(tai_symbol.createname(target_asm.labelprefix+'earanges0',AT_METADATA,0,voidpointertype));
           end;
 
@@ -3378,7 +3627,7 @@ implementation
         hp:=tmodule(loaded_units.first);
         while assigned(hp) do
           begin
-            If ((hp.flags and uf_has_dwarf_debuginfo)=uf_has_dwarf_debuginfo) and not assigned(hp.package) then
+            If (mf_has_dwarf_debuginfo in hp.moduleflags) and not assigned(hp.package) then
               begin
                 list.concat(Tai_const.Createname(make_mangledname('DEBUGSTART',hp.localsymtable,''),0));
                 list.concat(Tai_const.Createname(make_mangledname('DEBUGEND',hp.localsymtable,''),0));
@@ -3425,17 +3674,21 @@ implementation
       end;
 
 
-    procedure tdebuginfodwarf.append_visibility(vis: tvisibility);
+        procedure TDebugInfoDwarf.append_visibility(vis: tvisibility);
       begin
         case vis of
+          vis_hidden,
           vis_private,
           vis_strictprivate:
             append_attribute(DW_AT_accessibility,DW_FORM_data1,[ord(DW_ACCESS_private)]);
           vis_protected,
           vis_strictprotected:
             append_attribute(DW_AT_accessibility,DW_FORM_data1,[ord(DW_ACCESS_protected)]);
+          vis_published,
           vis_public:
             { default };
+          vis_none:
+            internalerror(2019050720);
         end;
       end;
 
@@ -3501,8 +3754,12 @@ implementation
                       inc(nolineinfolevel);
                     mark_NoLineInfoEnd:
                       dec(nolineinfolevel);
+                    else
+                      ;
                   end;
                 end;
+              else
+                ;
             end;
 
             if (currsectype=sec_code) and
@@ -3551,16 +3808,25 @@ implementation
                     asmline.concat(tai_comment.Create(strpnew('['+tostr(currfileinfo.line)+':'+tostr(currfileinfo.column)+']')));
 
                     if (prevlabel = nil) or
-                       { darwin's assembler cannot create an uleb128 of the difference }
-                       { between to symbols                                            }
-                       { same goes for Solaris native assembler                        }
-                       (target_info.system in systems_darwin) or
+                       { darwin's assembler cannot create an uleb128 of the difference
+                         between to symbols
+                         same goes for Solaris native assembler
+                         ... and riscv }
+
+                       (target_info.system in systems_darwin+[system_riscv32_linux,system_riscv64_linux]) or
                        (target_asm.id=as_solaris_as) then
                       begin
                         asmline.concat(tai_const.create_8bit(DW_LNS_extended_op));
                         asmline.concat(tai_const.create_uleb128bit(1+sizeof(pint)));
                         asmline.concat(tai_const.create_8bit(DW_LNE_set_address));
                         asmline.concat(tai_const.create_type_sym(aitconst_ptr_unaligned,currlabel));
+{$ifdef i8086}
+                        { on i8086 we also emit an Open Watcom-specific 'set segment' op }
+                        asmline.concat(tai_const.create_8bit(DW_LNS_extended_op));
+                        asmline.concat(tai_const.create_uleb128bit(3));
+                        asmline.concat(tai_const.create_8bit(DW_LNE_set_segment));
+                        asmline.concat(tai_const.Create_seg_name(currlabel.Name));
+{$endif i8086}
                       end
                     else
                       begin
@@ -4003,17 +4269,17 @@ implementation
           { now the information about the length of the string }
           if deref then
             begin
-              if (chardef.size=1) then
+              if not (is_widestring(def) and (tf_winlikewidestring in target_info.flags)) then
                 upperopcodes:=13
               else
-                upperopcodes:=15;
+                upperopcodes:=16;
               { lower bound is always 1, upper bound (length) needs to be calculated }
               append_entry(DW_TAG_subrange_type,false,[
                 DW_AT_lower_bound,DW_FORM_udata,1,
                 DW_AT_upper_bound,DW_FORM_block1,upperopcodes
                 ]);
 
-              { high(string) is stored sizeof(ptrint) bytes before the string data }
+              { high(string) is stored sizeof(sizeint) bytes before the string data }
               current_asmdata.asmlists[al_dwarf_info].concat(tai_const.create_8bit(ord(DW_OP_push_object_address)));
               current_asmdata.asmlists[al_dwarf_info].concat(tai_const.create_8bit(ord(DW_OP_deref)));
               current_asmdata.asmlists[al_dwarf_info].concat(tai_const.create_8bit(ord(DW_OP_dup)));
@@ -4025,12 +4291,22 @@ implementation
               current_asmdata.asmlists[al_dwarf_info].concat(tai_const.create_8bit(ord(DW_OP_skip)));
               current_asmdata.asmlists[al_dwarf_info].concat(tai_const.create_16bit_unaligned(3));
               { no -> load length }
-              current_asmdata.asmlists[al_dwarf_info].concat(tai_const.create_8bit(ord(DW_OP_lit0)+sizeof(ptrint)));
+              if upperopcodes=16 then
+                { for Windows WideString the size is always a DWORD }
+                current_asmdata.asmlists[al_dwarf_info].concat(tai_const.create_8bit(ord(DW_OP_lit4)))
+              else
+                current_asmdata.asmlists[al_dwarf_info].concat(tai_const.create_8bit(ord(DW_OP_lit0)+sizesinttype.size));
               current_asmdata.asmlists[al_dwarf_info].concat(tai_const.create_8bit(ord(DW_OP_minus)));
-              current_asmdata.asmlists[al_dwarf_info].concat(tai_const.create_8bit(ord(DW_OP_deref)));
+              if upperopcodes=16 then
+                begin
+                  current_asmdata.asmlists[al_dwarf_info].concat(tai_const.create_8bit(ord(DW_OP_deref_size)));
+                  current_asmdata.asmlists[al_dwarf_info].concat(tai_const.create_8bit(4));
+                end
+              else
+                current_asmdata.asmlists[al_dwarf_info].concat(tai_const.create_8bit(ord(DW_OP_deref)));
 
               { for widestrings, the length is specified in bytes, so divide by two }
-              if (upperopcodes=15) then
+              if (upperopcodes=16) then
                 begin
                   current_asmdata.asmlists[al_dwarf_info].concat(tai_const.create_8bit(ord(DW_OP_lit1)));
                   current_asmdata.asmlists[al_dwarf_info].concat(tai_const.create_8bit(ord(DW_OP_shr)));
@@ -4054,6 +4330,13 @@ implementation
         end;
 
       begin
+        if (ds_dwarf_cpp in current_settings.debugswitches) then
+          begin
+            // At least LLDB 6.0.0 does not like this implementation of string types.
+            // Call the inherited DWARF 2 implementation, which works fine.
+            inherited;
+            exit;
+          end;
         case def.stringtype of
           st_shortstring:
             begin
@@ -4077,15 +4360,7 @@ implementation
            end;
          st_widestring:
            begin
-             if not(tf_winlikewidestring in target_info.flags) then
-               addstringdef('WideString',cwidechartype,true,-1)
-             else
-               begin
-                 { looks like a pwidechar (no idea about length location) }
-                 append_entry(DW_TAG_pointer_type,false,[]);
-                 append_labelentry_ref(DW_AT_type,def_dwarf_lab(cwidechartype));
-                 finish_entry;
-              end;
+             addstringdef('WideString',cwidechartype,true,-1)
            end;
         end;
       end;

+ 16 - 3
compiler/dbgstabs.pas

@@ -250,6 +250,7 @@ implementation
       varcounter:=0;
       varptr:=@varvaluedata[0];
       varvalues[0]:=nil;
+      result:='';
       while i<=length(s) do
         begin
           if (s[i]='$') and (i<length(s)) then
@@ -515,6 +516,10 @@ implementation
                         argnames:=argnames+'3out';
                       vs_constref :
                         argnames:=argnames+'8constref';
+                      vs_value :
+                        ;
+                      vs_final:
+                        internalerror(2019050911);
                     end;
                   end
                 else
@@ -695,6 +700,7 @@ implementation
             case def.ordtype of
               uvoid :
                 ss:=def_stab_number(def);
+              pasbool1,
               pasbool8,
               pasbool16,
               pasbool32,
@@ -723,6 +729,7 @@ implementation
                 ss:='-20;';
               uwidechar :
                 ss:='-30;';
+              pasbool1,
               pasbool8,
               bool8bit :
                 ss:='-21;';
@@ -1076,6 +1083,8 @@ implementation
                         def.dbg_state:=dbg_state_queued;
                         break;
                       end;
+                    else
+                      ;
                   end;
                 end;
               appenddef(list,vmtarraytype);
@@ -1103,6 +1112,8 @@ implementation
                       appenddef(list,TImplementedInterface(anc.ImplementedInterfaces[i]).IntfDef);
                 end;
             end;
+          else
+            ;
         end;
       end;
 
@@ -1631,7 +1642,7 @@ implementation
         ss:='';
         if not assigned(sym.typedef) then
           internalerror(200509262);
-        if sym.typedef.typ in tagtypes then
+        if use_tag_prefix(sym.typedef) then
           stabchar:=tagtypeprefix
         else
           stabchar:='t';
@@ -1676,7 +1687,7 @@ implementation
 
         { include symbol that will be referenced from the main to be sure to
           include this debuginfo .o file }
-        current_module.flags:=current_module.flags or uf_has_stabs_debuginfo;
+        include(current_module.moduleflags,mf_has_stabs_debuginfo);
         if not(target_info.system in systems_darwin) then
           begin
             new_section(current_asmdata.asmlists[al_stabs],sec_data,GetSymTableName(current_module.localsymtable),sizeof(pint));
@@ -1757,6 +1768,8 @@ implementation
                 currfuncname:=tai_function_name(hp).funcname;
               ait_force_line :
                 lastfileinfo.line:=-1;
+              else
+                ;
             end;
 
             if (currsectype=sec_code) and
@@ -1864,7 +1877,7 @@ implementation
         hp:=tmodule(loaded_units.first);
         while assigned(hp) do
           begin
-            If ((hp.flags and uf_has_stabs_debuginfo)=uf_has_stabs_debuginfo) and not assigned(hp.package) then
+            If (mf_has_stabs_debuginfo in hp.moduleflags) and not assigned(hp.package) then
               begin
                 list.concat(Tai_const.Createname(make_mangledname('DEBUGINFO',hp.localsymtable,''),0));
                 list.concat(Tai_const.Createname(make_mangledname('DEBUGSTART',hp.localsymtable,''),0));

+ 5 - 1
compiler/dbgstabx.pas

@@ -158,7 +158,7 @@ implementation
           declstabnr:=def_stab_number(def)
         end;
       if (symname='') or
-         not(def.typ in tagtypes) then
+         not(use_tag_prefix(def)) then
         begin
           st:=def_stabstr_evaluate(def,':$1$2=',[stabchar,declstabnr]);
           st:='"'+def_stabstr_evaluate(def,symname,[])+st+ss;
@@ -341,8 +341,12 @@ implementation
                     inc(nolineinfolevel);
                   mark_NoLineInfoEnd:
                     dec(nolineinfolevel);
+                  else
+                    ;
                 end;
               end;
+            else
+              ;
           end;
 
           if (currsectype=sec_code) and

+ 48 - 13
compiler/defcmp.pas

@@ -187,7 +187,7 @@ implementation
            u8bit,u16bit,u32bit,u64bit,
            s8bit,s16bit,s32bit,s64bit,
            pasbool, bool8bit,bool16bit,bool32bit,bool64bit,
-           uchar,uwidechar,scurrency }
+           uchar,uwidechar,scurrency,customint }
 
       type
         tbasedef=(bvoid,bchar,bint,bbool);
@@ -196,9 +196,9 @@ implementation
           (bvoid,
            bint,bint,bint,bint,bint,
            bint,bint,bint,bint,bint,
+           bbool,bbool,bbool,bbool,bbool,
            bbool,bbool,bbool,bbool,
-           bbool,bbool,bbool,bbool,
-           bchar,bchar,bint);
+           bchar,bchar,bint,bint);
 
         basedefconvertsimplicit : array[tbasedef,tbasedef] of tconverttype =
           { void, char, int, bool }
@@ -289,7 +289,15 @@ implementation
              if assigned(tstoreddef(def_from).genconstraintdata) or
                  assigned(tstoreddef(def_to).genconstraintdata) then
                begin
-                 if def_from.typ<>def_to.typ then
+                 { this is bascially a poor man's type checking, if there is a chance
+                   that the types are equal considering the constraints, this needs probably
+                   to be improved and maybe factored out or even result in a recursive compare_defs_ext }
+                 if (def_from.typ<>def_to.typ) and
+                   { formaldefs are compatible with everything }
+                   not(def_from.typ in [formaldef]) and
+                   not(def_to.typ in [formaldef]) and
+                   { constants could get another deftype (e.g. niln) }
+                   not(fromtreetype in nodetype_const) then
                    begin
                      { not compatible anyway }
                      doconv:=tc_not_possible;
@@ -415,7 +423,7 @@ implementation
                                 end;
                             end;
                           uvoid,
-                          pasbool8,pasbool16,pasbool32,pasbool64,
+                          pasbool1,pasbool8,pasbool16,pasbool32,pasbool64,
                           bool8bit,bool16bit,bool32bit,bool64bit:
                             eq:=te_equal;
                           else
@@ -491,15 +499,16 @@ implementation
                    end;
                  arraydef :
                    begin
-                     if (((m_mac in current_settings.modeswitches) and
-                          is_integer(def_to)) or
-                         is_widechar(def_to)) and
+                     if (m_mac in current_settings.modeswitches) and
+                        is_integer(def_to) and
                         (fromtreetype=stringconstn) then
                        begin
                          eq:=te_convert_l3;
                          doconv:=tc_cstring_2_int;
                        end;
                    end;
+                 else
+                   ;
                end;
              end;
 
@@ -608,6 +617,8 @@ implementation
                                    eq:=te_convert_l6;
                                end;
                              end;
+                           else
+                             ;
                          end;
                        end;
                    end;
@@ -787,6 +798,8 @@ implementation
                            end;
                       end;
                    end;
+                 else
+                   ;
                end;
              end;
 
@@ -844,6 +857,8 @@ implementation
                            end;
                        end;
                    end;
+                 else
+                   ;
                end;
              end;
 
@@ -936,6 +951,8 @@ implementation
                            end;
                        end;
                    end;
+                 else
+                   ;
                end;
              end;
 
@@ -1020,8 +1037,8 @@ implementation
                                { dynamic array -> dynamic array }
                                if is_dynamic_array(def_from) then
                                  eq:=te_equal
-                               { fpc modes only: array -> dyn. array }
-                               else if (current_settings.modeswitches*[m_objfpc,m_fpc]<>[]) and
+                               { regular array -> dynamic array }
+                               else if (m_array2dynarray in current_settings.modeswitches) and
                                  not(is_special_array(def_from)) and
                                  is_zero_based_array(def_from) then
                                  begin
@@ -1206,6 +1223,8 @@ implementation
                               eq:=te_convert_l1;
                            end;
                       end;
+                    else
+                      ;
                   end;
                 end;
              end;
@@ -1249,6 +1268,8 @@ implementation
                              eq:=te_convert_l1;
                            end;
                        end;
+                     else
+                       ;
                    end;
                  end;
              end;
@@ -1535,6 +1556,8 @@ implementation
                          eq:=te_convert_l2;
                        end;
                    end;
+                 else
+                   ;
                end;
              end;
 
@@ -1575,6 +1598,8 @@ implementation
                         eq:=te_convert_l1;
                       end;
                    end;
+                 else
+                   ;
                end;
              end;
 
@@ -1631,6 +1656,8 @@ implementation
                          eq:=te_convert_l1;
                        end;
                    end;
+                 else
+                   ;
                end;
              end;
 
@@ -1880,8 +1907,10 @@ implementation
                else
                 { Just about everything can be converted to a formaldef...}
                 if not (def_from.typ in [abstractdef,errordef]) then
-                  eq:=te_convert_l2;
+                  eq:=te_convert_l6;
              end;
+           else
+             ;
         end;
 
         { if we didn't find an appropriate type conversion yet
@@ -1962,13 +1991,19 @@ implementation
                 u8bit,u16bit,u32bit,u64bit,
                 s8bit,s16bit,s32bit,s64bit :
                   is_subequal:=(torddef(def2).ordtype in [s64bit,u64bit,s32bit,u32bit,u8bit,s8bit,s16bit,u16bit]);
-                pasbool8,pasbool16,pasbool32,pasbool64,
+                pasbool1,pasbool8,pasbool16,pasbool32,pasbool64,
                 bool8bit,bool16bit,bool32bit,bool64bit :
-                  is_subequal:=(torddef(def2).ordtype in [pasbool8,pasbool16,pasbool32,pasbool64,bool8bit,bool16bit,bool32bit,bool64bit]);
+                  is_subequal:=(torddef(def2).ordtype in [pasbool1,pasbool8,pasbool16,pasbool32,pasbool64,bool8bit,bool16bit,bool32bit,bool64bit]);
                 uchar :
                   is_subequal:=(torddef(def2).ordtype=uchar);
                 uwidechar :
                   is_subequal:=(torddef(def2).ordtype=uwidechar);
+                customint:
+                  is_subequal:=(torddef(def2).low=torddef(def1).low) and (torddef(def2).high=torddef(def1).high);
+                u128bit, s128bit,
+                scurrency,
+                uvoid:
+                  ;
               end;
             end
            else

+ 47 - 11
compiler/defutil.pas

@@ -156,6 +156,9 @@ interface
     }
     function is_special_array(p : tdef) : boolean;
 
+    {# Returns true, if p points to a normal array, bitpacked arrays are included }
+    function is_normal_array(p : tdef) : boolean;
+
     {# Returns true if p is a bitpacked array }
     function is_packed_array(p: tdef) : boolean;
 
@@ -325,6 +328,9 @@ interface
     { # returns true if the procdef has no parameters and no specified return type }
     function is_bareprocdef(pd : tprocdef): boolean;
 
+    { returns true if the procdef is a C-style variadic function }
+    function is_c_variadic(pd: tabstractprocdef): boolean; {$ifdef USEINLINE}inline;{$endif}
+
     { # returns the smallest base integer type whose range encompasses that of
         both ld and rd; if keep_sign_if_equal, then if ld and rd have the same
         signdness, the result will also get that signdness }
@@ -478,8 +484,8 @@ implementation
                is_ordinal:=dt in [uchar,uwidechar,
                                   u8bit,u16bit,u32bit,u64bit,
                                   s8bit,s16bit,s32bit,s64bit,
-                                  pasbool8,pasbool16,pasbool32,pasbool64,
-                                  bool8bit,bool16bit,bool32bit,bool64bit];
+                                  pasbool1,pasbool8,pasbool16,pasbool32,pasbool64,
+                                  bool8bit,bool16bit,bool32bit,bool64bit,customint];
              end;
            enumdef :
              is_ordinal:=true;
@@ -550,7 +556,8 @@ implementation
       begin
         result:=(def.typ=orddef) and
                     (torddef(def).ordtype in [u8bit,u16bit,u32bit,u64bit,
-                                          s8bit,s16bit,s32bit,s64bit]);
+                                          s8bit,s16bit,s32bit,s64bit,
+                                          customint]);
       end;
 
 
@@ -558,14 +565,14 @@ implementation
     function is_boolean(def : tdef) : boolean;
       begin
         result:=(def.typ=orddef) and
-                    (torddef(def).ordtype in [pasbool8,pasbool16,pasbool32,pasbool64,bool8bit,bool16bit,bool32bit,bool64bit]);
+                    (torddef(def).ordtype in [pasbool1,pasbool8,pasbool16,pasbool32,pasbool64,bool8bit,bool16bit,bool32bit,bool64bit]);
       end;
 
 
     function is_pasbool(def : tdef) : boolean;
       begin
         result:=(def.typ=orddef) and
-                    (torddef(def).ordtype in [pasbool8,pasbool16,pasbool32,pasbool64]);
+                    (torddef(def).ordtype in [pasbool1,pasbool8,pasbool16,pasbool32,pasbool64]);
       end;
 
     { true if def is a C-style boolean (non-zero value = true, zero = false) }
@@ -748,6 +755,14 @@ implementation
                  );
       end;
 
+    { true, if p points to a normal array, bitpacked arrays are included }
+    function is_normal_array(p : tdef) : boolean;
+      begin
+         result:=(p.typ=arraydef) and
+                 ((tarraydef(p).arrayoptions * [ado_IsVariant,ado_IsArrayOfConst,ado_IsConstructor,ado_IsDynamicArray])=[]) and
+                 not(is_open_array(p));
+      end;
+
     { true if p is an ansi string def }
     function is_ansistring(p : tdef) : boolean;
       begin
@@ -902,7 +917,7 @@ implementation
     { true, if def is a 8 bit ordinal type }
     function is_8bit(def : tdef) : boolean;
       begin
-         result:=(def.typ=orddef) and (torddef(def).ordtype in [u8bit,s8bit,pasbool8,bool8bit,uchar])
+         result:=(def.typ=orddef) and (torddef(def).ordtype in [u8bit,s8bit,pasbool1,pasbool8,bool8bit,uchar])
       end;
 
     { true, if def is a 16 bit int type }
@@ -948,8 +963,11 @@ implementation
       begin
         result:=(def1.typ=orddef) and (def2.typ=orddef) and
           (torddef(def1).ordtype in [u8bit,u16bit,u32bit,u64bit,
-                                     s8bit,s16bit,s32bit,s64bit]) and
-          (torddef(def1).ordtype=torddef(def2).ordtype);
+                                     s8bit,s16bit,s32bit,s64bit,customint]) and
+          (torddef(def1).ordtype=torddef(def2).ordtype) and
+          ((torddef(def1).ordtype<>customint) or
+           ((torddef(def1).low=torddef(def2).low) and
+            (torddef(def1).high=torddef(def2).high)));
       end;
 
 
@@ -1050,6 +1068,8 @@ implementation
                1: l := l and $ff;
                2: l := l and $ffff;
                4: l := l and $ffffffff;
+               else
+                 ;
              end;
              {reset sign, i.e. converting -1 to qword changes the value to high(qword)}
              l.signed:=false;
@@ -1060,6 +1080,8 @@ implementation
                   1: l.svalue := shortint(l.svalue);
                   2: l.svalue := smallint(l.svalue);
                   4: l.svalue := longint(l.svalue);
+                  else
+                    ;
                 end;
                 l.signed:=true;
               end;
@@ -1106,6 +1128,8 @@ implementation
                 case tfloatdef(tarraydef(p).elementdef).floattype of
                   s32real:
                     mmx_type:=mmxsingle;
+                  else
+                    ;
                 end
               else
                 case torddef(tarraydef(p).elementdef).ordtype of
@@ -1121,6 +1145,8 @@ implementation
                      mmx_type:=mmxu32bit;
                    s32bit:
                      mmx_type:=mmxs32bit;
+                   else
+                     ;
                 end;
            end;
       end;
@@ -1146,6 +1172,8 @@ implementation
                      range_to_type(torddef(def).low,torddef(def).high,result);
                  end
                else case torddef(def).ordtype of
+                 pasbool1:
+                   result:=pasbool1type;
                  pasbool8:
                    result:=pasbool8type;
                  pasbool16:
@@ -1453,7 +1481,6 @@ implementation
       As of today, both signed and unsigned types from 8 to 64 bits are supported. }
     function is_automatable(p : tdef) : boolean;
       begin
-        result:=false;
         case p.typ of
           orddef:
             result:=torddef(p).ordtype in [u8bit,s8bit,u16bit,s16bit,u32bit,s32bit,
@@ -1466,6 +1493,8 @@ implementation
             result:=true;
           objectdef:
             result:=tobjectdef(p).objecttype in [odt_interfacecom,odt_dispinterface,odt_interfacecorba];
+          else
+            result:=false;
         end;
       end;
 
@@ -1490,6 +1519,12 @@ implementation
                  (pd.proctypeoption = potype_constructor));
       end;
 
+    function is_c_variadic(pd: tabstractprocdef): boolean;
+      begin
+        result:=
+          (po_varargs in pd.procoptions) or
+          (po_variadic in pd.procoptions);
+      end;
 
     function get_common_intdef(ld, rd: torddef; keep_sign_if_equal: boolean): torddef;
       var
@@ -1539,6 +1574,8 @@ implementation
               result:=torddef(s64inttype);
             s64bit:
               result:=torddef(u64inttype);
+            else
+              ;
           end;
       end;
 
@@ -1601,6 +1638,7 @@ implementation
                 result:=tkQWord;
               s64bit:
                 result:=tkInt64;
+              pasbool1,
               pasbool8,
               pasbool16,
               pasbool32,
@@ -1631,8 +1669,6 @@ implementation
                 result:=tkWString;
               st_unicodestring:
                 result:=tkUString;
-              else
-                result:=tkUnknown;
             end;
           enumdef:
             result:=tkEnumeration;

+ 261 - 0
compiler/elfbase.pas

@@ -418,7 +418,268 @@ interface
     VER_FLG_WEAK = 2;
     VER_FLG_INFO = 4;
 
+    procedure MayBeSwapHeader(var h : telf32header);
+    procedure MayBeSwapHeader(var h : telf64header);
+    procedure MayBeSwapHeader(var h : telf32proghdr);
+    procedure MayBeSwapHeader(var h : telf64proghdr);
+    procedure MaybeSwapSecHeader(var h : telf32sechdr);
+    procedure MaybeSwapSecHeader(var h : telf64sechdr);
+    procedure MaybeSwapElfSymbol(var h : telf32symbol);
+    procedure MaybeSwapElfSymbol(var h : telf64symbol);
+    procedure MaybeSwapElfReloc(var h : telf32reloc);
+    procedure MaybeSwapElfReloc(var h : telf64reloc);
+    procedure MaybeSwapElfDyn(var h : telf32dyn);
+    procedure MaybeSwapElfDyn(var h : telf64dyn);
+    procedure MaybeSwapElfverdef(var h: TElfverdef);
+    procedure MaybeSwapElfverdaux(var h: TElfverdaux);
+    procedure MaybeSwapElfverneed(var h: TElfverneed);
+    procedure MaybeSwapElfvernaux(var h: TElfvernaux);
 
 implementation
 
+    uses
+      systems;
+
+    procedure MayBeSwapHeader(var h : telf32header);
+      begin
+        if source_info.endian<>target_info.endian then
+          with h do
+            begin
+              e_type:=swapendian(e_type);
+              e_machine:=swapendian(e_machine);
+              e_version:=swapendian(e_version);
+              e_entry:=swapendian(e_entry);
+              e_phoff:=swapendian(e_phoff);
+              e_shoff:=swapendian(e_shoff);
+              e_flags:=swapendian(e_flags);
+              e_ehsize:=swapendian(e_ehsize);
+              e_phentsize:=swapendian(e_phentsize);
+              e_phnum:=swapendian(e_phnum);
+              e_shentsize:=swapendian(e_shentsize);
+              e_shnum:=swapendian(e_shnum);
+              e_shstrndx:=swapendian(e_shstrndx);
+            end;
+      end;
+
+
+    procedure MayBeSwapHeader(var h : telf64header);
+      begin
+        if source_info.endian<>target_info.endian then
+          with h do
+            begin
+              e_type:=swapendian(e_type);
+              e_machine:=swapendian(e_machine);
+              e_version:=swapendian(e_version);
+              e_entry:=swapendian(e_entry);
+              e_phoff:=swapendian(e_phoff);
+              e_shoff:=swapendian(e_shoff);
+              e_flags:=swapendian(e_flags);
+              e_ehsize:=swapendian(e_ehsize);
+              e_phentsize:=swapendian(e_phentsize);
+              e_phnum:=swapendian(e_phnum);
+              e_shentsize:=swapendian(e_shentsize);
+              e_shnum:=swapendian(e_shnum);
+              e_shstrndx:=swapendian(e_shstrndx);
+            end;
+      end;
+
+
+    procedure MayBeSwapHeader(var h : telf32proghdr);
+      begin
+        if source_info.endian<>target_info.endian then
+          with h do
+            begin
+              p_align:=swapendian(p_align);
+              p_filesz:=swapendian(p_filesz);
+              p_flags:=swapendian(p_flags);
+              p_memsz:=swapendian(p_memsz);
+              p_offset:=swapendian(p_offset);
+              p_paddr:=swapendian(p_paddr);
+              p_type:=swapendian(p_type);
+              p_vaddr:=swapendian(p_vaddr);
+            end;
+      end;
+
+
+    procedure MayBeSwapHeader(var h : telf64proghdr);
+      begin
+        if source_info.endian<>target_info.endian then
+          with h do
+            begin
+              p_align:=swapendian(p_align);
+              p_filesz:=swapendian(p_filesz);
+              p_flags:=swapendian(p_flags);
+              p_memsz:=swapendian(p_memsz);
+              p_offset:=swapendian(p_offset);
+              p_paddr:=swapendian(p_paddr);
+              p_type:=swapendian(p_type);
+              p_vaddr:=swapendian(p_vaddr);
+            end;
+      end;
+
+
+    procedure MaybeSwapSecHeader(var h : telf32sechdr);
+      begin
+        if source_info.endian<>target_info.endian then
+          with h do
+            begin
+              sh_name:=swapendian(sh_name);
+              sh_type:=swapendian(sh_type);
+              sh_flags:=swapendian(sh_flags);
+              sh_addr:=swapendian(sh_addr);
+              sh_offset:=swapendian(sh_offset);
+              sh_size:=swapendian(sh_size);
+              sh_link:=swapendian(sh_link);
+              sh_info:=swapendian(sh_info);
+              sh_addralign:=swapendian(sh_addralign);
+              sh_entsize:=swapendian(sh_entsize);
+            end;
+      end;
+
+
+    procedure MaybeSwapSecHeader(var h : telf64sechdr);
+      begin
+        if source_info.endian<>target_info.endian then
+          with h do
+            begin
+              sh_name:=swapendian(sh_name);
+              sh_type:=swapendian(sh_type);
+              sh_flags:=swapendian(sh_flags);
+              sh_addr:=swapendian(sh_addr);
+              sh_offset:=swapendian(sh_offset);
+              sh_size:=swapendian(sh_size);
+              sh_link:=swapendian(sh_link);
+              sh_info:=swapendian(sh_info);
+              sh_addralign:=swapendian(sh_addralign);
+              sh_entsize:=swapendian(sh_entsize);
+            end;
+      end;
+
+
+    procedure MaybeSwapElfSymbol(var h : telf32symbol);
+      begin
+        if source_info.endian<>target_info.endian then
+          with h do
+            begin
+              st_name:=swapendian(st_name);
+              st_value:=swapendian(st_value);
+              st_size:=swapendian(st_size);
+              st_shndx:=swapendian(st_shndx);
+            end;
+      end;
+
+
+    procedure MaybeSwapElfSymbol(var h : telf64symbol);
+      begin
+        if source_info.endian<>target_info.endian then
+          with h do
+            begin
+              st_name:=swapendian(st_name);
+              st_value:=swapendian(st_value);
+              st_size:=swapendian(st_size);
+              st_shndx:=swapendian(st_shndx);
+            end;
+      end;
+
+
+    procedure MaybeSwapElfReloc(var h : telf32reloc);
+      begin
+        if source_info.endian<>target_info.endian then
+          with h do
+            begin
+              address:=swapendian(address);
+              info:=swapendian(info);
+              addend:=swapendian(addend);
+            end;
+      end;
+
+
+    procedure MaybeSwapElfReloc(var h : telf64reloc);
+      begin
+        if source_info.endian<>target_info.endian then
+          with h do
+            begin
+              address:=swapendian(address);
+              info:=swapendian(info);
+              addend:=swapendian(addend);
+            end;
+      end;
+
+
+    procedure MaybeSwapElfDyn(var h : telf32dyn);
+      begin
+        if source_info.endian<>target_info.endian then
+          with h do
+            begin
+              d_tag:=swapendian(d_tag);
+              d_val:=swapendian(d_val);
+            end;
+      end;
+
+
+    procedure MaybeSwapElfDyn(var h : telf64dyn);
+      begin
+        if source_info.endian<>target_info.endian then
+          with h do
+            begin
+              d_tag:=swapendian(d_tag);
+              d_val:=swapendian(d_val);
+            end;
+      end;
+
+
+    procedure MaybeSwapElfverdef(var h: TElfverdef);
+      begin
+        if source_info.endian<>target_info.endian then
+          with h do
+            begin
+              vd_version:=swapendian(vd_version);
+              vd_flags:=swapendian(vd_flags);
+              vd_ndx:=swapendian(vd_ndx);
+              vd_cnt:=swapendian(vd_cnt);
+              vd_hash:=swapendian(vd_hash);
+              vd_aux:=swapendian(vd_aux);
+              vd_next:=swapendian(vd_next);
+            end;
+      end;
+
+
+    procedure MaybeSwapElfverdaux(var h: TElfverdaux);
+      begin
+        if source_info.endian<>target_info.endian then
+          with h do
+            begin
+              vda_name:=swapendian(vda_name);
+              vda_next:=swapendian(vda_next);
+            end;
+      end;
+
+
+    procedure MaybeSwapElfverneed(var h: TElfverneed);
+      begin
+        if source_info.endian<>target_info.endian then
+          with h do
+            begin
+              vn_version:=swapendian(vn_version);
+              vn_cnt:=swapendian(vn_cnt);
+              vn_file:=swapendian(vn_file);
+              vn_aux:=swapendian(vn_aux);
+              vn_next:=swapendian(vn_next);
+            end;
+      end;
+
+
+    procedure MaybeSwapElfvernaux(var h: TElfvernaux);
+      begin
+        if source_info.endian<>target_info.endian then
+          with h do
+            begin
+              vna_hash:=swapendian(vna_hash);
+              vna_flags:=swapendian(vna_flags);
+              vna_other:=swapendian(vna_other);
+              vna_name:=swapendian(vna_name);
+              vna_next:=swapendian(vna_next);
+            end;
+      end;
+
 end.

Файловите разлики са ограничени, защото са твърде много
+ 535 - 13
compiler/entfile.pas


+ 16 - 5
compiler/fmodule.pas

@@ -128,7 +128,9 @@ interface
         crc,
         interface_crc,
         indirect_crc  : cardinal;
-        flags         : cardinal;  { the PPU flags }
+        headerflags   : cardinal;  { the PPU header flags }
+        longversion   : cardinal;  { longer version than what fits in the ppu header }
+        moduleflags   : tmoduleflags; { ppu flags that do not need to be known by just reading the ppu header }
         islibrary     : boolean;  { if it is a library (win32 dll) }
         IsPackage     : boolean;
         moduleid      : longint;
@@ -167,7 +169,8 @@ interface
         loaded_from   : tmodule;
         _exports      : tlinkedlist;
         dllscannerinputlist : TFPHashList;
-        resourcefiles : TCmdStrList;
+        resourcefiles,
+        linkorderedsymbols : TCmdStrList;
         linkunitofiles,
         linkunitstaticlibs,
         linkunitsharedlibs,
@@ -562,6 +565,7 @@ implementation
         used_units:=TLinkedList.Create;
         dependent_units:=TLinkedList.Create;
         resourcefiles:=TCmdStrList.Create;
+        linkorderedsymbols:=TCmdStrList.Create;
         linkunitofiles:=TLinkContainer.Create;
         linkunitstaticlibs:=TLinkContainer.Create;
         linkunitsharedlibs:=TLinkContainer.Create;
@@ -574,7 +578,9 @@ implementation
         crc:=0;
         interface_crc:=0;
         indirect_crc:=0;
-        flags:=0;
+        headerflags:=0;
+        longversion:=0;
+        moduleflags:=[];
         scanner:=nil;
         unitmap:=nil;
         unitmapsize:=0;
@@ -681,6 +687,7 @@ implementation
         used_units.free;
         dependent_units.free;
         resourcefiles.Free;
+        linkorderedsymbols.Free;
         linkunitofiles.Free;
         linkunitstaticlibs.Free;
         linkunitsharedlibs.Free;
@@ -837,6 +844,8 @@ implementation
         dependent_units:=TLinkedList.Create;
         resourcefiles.Free;
         resourcefiles:=TCmdStrList.Create;
+        linkorderedsymbols.Free;
+        linkorderedsymbols:=TCmdStrList.Create;;
         pendingspecializations.free;
         pendingspecializations:=tfphashobjectlist.create(false);
         if assigned(waitingforunit) and
@@ -886,7 +895,9 @@ implementation
         crc:=0;
         interface_crc:=0;
         indirect_crc:=0;
-        flags:=0;
+        headerflags:=0;
+        longversion:=0;
+        moduleflags:=[];
         mainfilepos.line:=0;
         mainfilepos.column:=0;
         mainfilepos.fileindex:=0;
@@ -1061,7 +1072,7 @@ implementation
                   this is for units with an initialization/finalization }
                 if (unitmap[pu.u.moduleid].refs=0) and
                    pu.in_uses and
-                   ((pu.u.flags and (uf_init or uf_finalize))=0) then
+                   ((pu.u.moduleflags * [mf_init,mf_finalize])=[]) then
                   CGMessagePos2(pu.unitsym.fileinfo,sym_n_unit_not_used,pu.u.realmodulename^,realmodulename^);
               end;
             pu:=tused_unit(pu.next);

+ 38 - 6
compiler/fpcdefs.inc

@@ -34,6 +34,11 @@
 
 {$define USEEXCEPT}
 
+{$ifdef VER3_0}
+  { fix bootstrapping dfa gives warnings on 3.2+ code due to changed case behaviour }
+  {$OPTIMIZATION NODFA}
+{$endif VER3_0}
+
 { This fake CPU is used to allow incorporation of globtype unit
   into utils/ppudump without any CPU specific code PM }
 {$ifdef generic_cpu}
@@ -68,6 +73,12 @@
   {$define SUPPORT_GET_FRAME}
   {$define cpucg64shiftsupport}
   {$define OMFOBJSUPPORT}
+  {$ifdef go32v2}
+    { go32v2 uses cwsdpmi extender which is incompatible with watcom extender
+      thus we use the internal smartlink sections by default in that case. }
+    {$define I8086_SMARTLINK_SECTIONS}
+    {$define i8086_link_intern_debuginfo}
+  {$endif go32v2}
 {$endif i8086}
 
 {$ifdef i386}
@@ -265,11 +276,34 @@
   {$define cputargethasfixedstack}
   {$define cpurefshaveindexreg}
   {$define SUPPORT_GET_FRAME}
+  {$define SUPPORT_SAFECALL}
 {$endif aarch64}
 
-{$IFDEF MACOS}
-{$DEFINE USE_FAKE_SYSUTILS}
-{$ENDIF MACOS}
+{$ifdef riscv32}
+  {$define riscv}
+  {$define cpu32bit}
+  {$define cpu32bitaddr}
+  {$define cpu32bitalu}
+  {$define cpufpemu}
+  {$define cputargethasfixedstack}
+  {$define cpuneedsmulhelper}
+  {$define cpuneedsdivhelper}
+  {$define cpucapabilities}
+  {$define cpurequiresproperalignment}
+{$endif riscv32}
+
+{$ifdef riscv64}
+  {$define riscv}
+  {$define cpu64bit}
+  {$define cpu64bitaddr}
+  {$define cpu64bitalu}
+  {$define cpufpemu}
+  {$define cputargethasfixedstack}
+  {$define cpuneedsmulhelper}
+  {$define cpuneedsdivhelper}
+  {$define cpucapabilities}
+  {$define cpurequiresproperalignment}
+{$endif riscv64}
 
 { Stabs is not officially supported on 64 bit targets by gdb, except on Mac OS X
   (but there we don't support it)
@@ -291,9 +325,7 @@
 }
 {$ifdef llvm}
   {$undef SUPPORT_MMX}
-  {$undef cpu16bitalu}
-  {$undef cpu32bitalu}
-  {$define cpu64bitalu}
   {$define cpuhighleveltarget}
+  {$define cpucg64shiftsupport}
   {$define symansistr}
 {$endif}

+ 2 - 5
compiler/fpcp.pas

@@ -127,8 +127,8 @@ implementation
   {$ifdef cpufpemu}
      { check if floating point emulation is on?
        fpu emulation isn't unit levelwise because it affects calling convention }
-     if ((pcpfile.header.common.flags and uf_fpu_emulation)<>0) xor
-          (cs_fp_emulation in current_settings.moduleswitches) then
+     if ((uf_fpu_emulation and pcpfile.header.common.flags)<>0) <>
+        (cs_fp_emulation in current_settings.moduleswitches) then
        begin
          pcpfile.free;
          pcpfile:=nil;
@@ -137,9 +137,6 @@ implementation
        end;
   {$endif cpufpemu}
 
-    { Load values to be access easier }
-      //flags:=pcpfile.header.common.flags;
-      //crc:=pcpfile.header.checksum;
     { Show Debug info }
       Message1(package_u_pcp_time,filetimestring(pcpfiletime));
       Message1(package_u_pcp_flags,tostr(pcpfile.header.common.flags{flags}));

+ 187 - 119
compiler/fppu.pas

@@ -43,7 +43,6 @@ interface
       symbase,ppu,symtype;
 
     type
-
        { tppumodule }
 
        tppumodule = class(tmodule)
@@ -97,8 +96,10 @@ interface
           procedure writederefdata;
           procedure writeImportSymbols;
           procedure writeResources;
+          procedure writeOrderedSymbols;
           procedure writeunitimportsyms;
           procedure writeasmsyms(kind:tunitasmlisttype;list:tfphashobjectlist);
+          procedure writeextraheader;
           procedure readsourcefiles;
           procedure readloadunit;
           procedure readlinkcontainer(var p:tlinkcontainer);
@@ -106,9 +107,11 @@ interface
           procedure readderefdata;
           procedure readImportSymbols;
           procedure readResources;
+          procedure readOrderedSymbols;
           procedure readwpofile;
           procedure readunitimportsyms;
           procedure readasmsyms;
+          procedure readextraheader;
 {$IFDEF MACRO_DIFF_HINT}
           procedure writeusedmacro(p:TNamedIndexItem;arg:pointer);
           procedure writeusedmacros;
@@ -244,98 +247,110 @@ var
 
 
     function tppumodule.openppu(ppufiletime:longint):boolean;
-      begin
-        openppu:=false;
-      { check for a valid PPU file }
-        if not ppufile.CheckPPUId then
-         begin
-           ppufile.free;
-           ppufile:=nil;
-           Message(unit_u_ppu_invalid_header);
-           exit;
-         end;
-      { check for allowed PPU versions }
-        if not (ppufile.getversion = CurrentPPUVersion) then
-         begin
-           Message1(unit_u_ppu_invalid_version,tostr(ppufile.getversion),@queuecomment);
-           ppufile.free;
-           ppufile:=nil;
-           exit;
-         end;
-      { check the target processor }
-        if tsystemcpu(ppufile.header.common.cpu)<>target_cpu then
-         begin
-           ppufile.free;
-           ppufile:=nil;
-           Message(unit_u_ppu_invalid_processor,@queuecomment);
-           exit;
-         end;
-      { check target }
-        if tsystem(ppufile.header.common.target)<>target_info.system then
-         begin
-           ppufile.free;
-           ppufile:=nil;
-           Message(unit_u_ppu_invalid_target,@queuecomment);
-           exit;
-         end;
-{$ifdef i8086}
-      { check i8086 memory model flags }
-        if ((ppufile.header.common.flags and uf_i8086_far_code)<>0) xor
-            (current_settings.x86memorymodel in [mm_medium,mm_large,mm_huge]) then
-         begin
-           ppufile.free;
-           ppufile:=nil;
-           Message(unit_u_ppu_invalid_memory_model,@queuecomment);
-           exit;
-         end;
-        if ((ppufile.header.common.flags and uf_i8086_far_data)<>0) xor
-            (current_settings.x86memorymodel in [mm_compact,mm_large]) then
-         begin
-           ppufile.free;
-           ppufile:=nil;
-           Message(unit_u_ppu_invalid_memory_model,@queuecomment);
-           exit;
-         end;
-        if ((ppufile.header.common.flags and uf_i8086_huge_data)<>0) xor
-            (current_settings.x86memorymodel=mm_huge) then
-         begin
-           ppufile.free;
-           ppufile:=nil;
-           Message(unit_u_ppu_invalid_memory_model,@queuecomment);
-           exit;
-         end;
-        if ((ppufile.header.common.flags and uf_i8086_cs_equals_ds)<>0) xor
-            (current_settings.x86memorymodel=mm_tiny) then
-         begin
-           ppufile.free;
-           ppufile:=nil;
-           Message(unit_u_ppu_invalid_memory_model,@queuecomment);
-           exit;
-         end;
-        if ((ppufile.header.common.flags and uf_i8086_ss_equals_ds)<>0) xor
-            (current_settings.x86memorymodel in [mm_tiny,mm_small,mm_medium]) then
-         begin
-           ppufile.free;
-           ppufile:=nil;
-           Message(unit_u_ppu_invalid_memory_model,@queuecomment);
-           exit;
-         end;
-{$endif i8086}
+
+      function checkheader: boolean;
+        begin
+          result:=false;
+          { check for a valid PPU file }
+            if not ppufile.CheckPPUId then
+             begin
+               Message(unit_u_ppu_invalid_header);
+               exit;
+             end;
+          { check for allowed PPU versions }
+            if not (ppufile.getversion = CurrentPPUVersion) then
+             begin
+               Message1(unit_u_ppu_invalid_version,tostr(ppufile.getversion),@queuecomment);
+               exit;
+             end;
+          { check the target processor }
+            if tsystemcpu(ppufile.header.common.cpu)<>target_cpu then
+             begin
+               Message(unit_u_ppu_invalid_processor,@queuecomment);
+               exit;
+             end;
+          { check target }
+            if tsystem(ppufile.header.common.target)<>target_info.system then
+             begin
+               Message(unit_u_ppu_invalid_target,@queuecomment);
+               exit;
+             end;
 {$ifdef cpufpemu}
-       { check if floating point emulation is on?
-         fpu emulation isn't unit levelwise because it affects calling convention }
-       if ((ppufile.header.common.flags and uf_fpu_emulation)<>0) xor
-            (cs_fp_emulation in current_settings.moduleswitches) then
-         begin
-           ppufile.free;
-           ppufile:=nil;
-           Message(unit_u_ppu_invalid_fpumode,@queuecomment);
-           exit;
-         end;
+          { check if floating point emulation is on?
+            fpu emulation isn't unit levelwise because it affects calling convention }
+          if ((ppufile.header.common.flags and uf_fpu_emulation)<>0) <>
+             (cs_fp_emulation in current_settings.moduleswitches) then
+            begin
+              Message(unit_u_ppu_invalid_fpumode,@queuecomment);
+              exit;
+            end;
 {$endif cpufpemu}
+           result:=true;
+        end;
+
+      function checkextraheader: boolean;
+        begin
+          result:=false;
+          if ppufile.readentry<>ibextraheader then
+            begin
+              Message(unit_u_ppu_invalid_header);
+              exit;
+            end;
+          readextraheader;
+          if (longversion<>CurrentPPULongVersion) or
+             not ppufile.EndOfEntry then
+            begin
+              Message(unit_u_ppu_invalid_header);
+              exit;
+            end;
+{$ifdef i8086}
+          { check i8086 memory model flags }
+          if (mf_i8086_far_code in moduleflags) <>
+             (current_settings.x86memorymodel in [mm_medium,mm_large,mm_huge]) then
+            begin
+              Message(unit_u_ppu_invalid_memory_model,@queuecomment);
+              exit;
+            end;
+          if (mf_i8086_far_data in moduleflags) <>
+             (current_settings.x86memorymodel in [mm_compact,mm_large]) then
+            begin
+              Message(unit_u_ppu_invalid_memory_model,@queuecomment);
+              exit;
+            end;
+          if (mf_i8086_huge_data in moduleflags) <>
+             (current_settings.x86memorymodel=mm_huge) then
+            begin
+              Message(unit_u_ppu_invalid_memory_model,@queuecomment);
+              exit;
+            end;
+          if (mf_i8086_cs_equals_ds in moduleflags) <>
+             (current_settings.x86memorymodel=mm_tiny) then
+            begin
+              Message(unit_u_ppu_invalid_memory_model,@queuecomment);
+              exit;
+            end;
+          if (mf_i8086_ss_equals_ds in moduleflags) <>
+             (current_settings.x86memorymodel in [mm_tiny,mm_small,mm_medium]) then
+            begin
+              Message(unit_u_ppu_invalid_memory_model,@queuecomment);
+              exit;
+            end;
+{$endif i8086}
+          result:=true;
+        end;
+
+      begin
+        openppu:=false;
+        if not checkheader or
+           not checkextraheader then
+          begin
+            ppufile.free;
+            ppufile:=nil;
+            exit;
+          end;
 
       { Load values to be access easier }
-        flags:=ppufile.header.common.flags;
+        headerflags:=ppufile.header.common.flags;
         crc:=ppufile.header.checksum;
         interface_crc:=ppufile.header.interface_checksum;
         indirect_crc:=ppufile.header.indirect_checksum;
@@ -344,7 +359,7 @@ var
           Message1(unit_u_ppu_time,filetimestring(ppufiletime))
         else
           Message1(unit_u_ppu_time,'unknown');
-        Message1(unit_u_ppu_flags,tostr(flags));
+        Message1(unit_u_ppu_flags,tostr(headerflags));
         Message1(unit_u_ppu_crc,hexstr(ppufile.header.checksum,8));
         Message1(unit_u_ppu_crc,hexstr(ppufile.header.interface_checksum,8)+' (intfc)');
         Message1(unit_u_ppu_crc,hexstr(ppufile.header.indirect_checksum,8)+' (indc)');
@@ -933,6 +948,20 @@ var
       end;
 
 
+    procedure tppumodule.writeOrderedSymbols;
+      var
+        res : TCmdStrListItem;
+      begin
+        res:=TCmdStrListItem(linkorderedsymbols.First);
+        while res<>nil do
+          begin
+            ppufile.putstring(res.FPStr);
+            res:=TCmdStrListItem(res.Next);
+          end;
+        ppufile.writeentry(iborderedsymbols);
+      end;
+
+
     procedure tppumodule.writeunitimportsyms;
       var
         i : longint;
@@ -961,6 +990,38 @@ var
         ppufile.writeentry(ibasmsymbols);
       end;
 
+    procedure tppumodule.writeextraheader;
+      var
+        old_docrc: boolean;
+      begin
+        { create unit flags }
+        if do_release then
+          include(moduleflags,mf_release);
+        if assigned(localsymtable) then
+          include(moduleflags,mf_local_symtable);
+        if cs_checkpointer_called in current_settings.moduleswitches then
+          include(moduleflags,mf_checkpointer_called);
+{$ifdef i8086}
+        if current_settings.x86memorymodel in [mm_medium,mm_large,mm_huge] then
+          include(moduleflags,mf_i8086_far_code);
+        if current_settings.x86memorymodel in [mm_compact,mm_large] then
+          include(moduleflags,mf_i8086_far_data);
+        if current_settings.x86memorymodel=mm_huge then
+          include(moduleflags,mf_i8086_huge_data);
+        if current_settings.x86memorymodel=mm_tiny then
+          include(moduleflags,mf_i8086_cs_equals_ds);
+        if current_settings.x86memorymodel in [mm_tiny,mm_small,mm_medium] then
+          include(moduleflags,mf_i8086_ss_equals_ds);
+{$endif i8086}
+
+        old_docrc:=ppufile.do_crc;
+        ppufile.do_crc:=false;
+        ppufile.putlongint(longint(CurrentPPULongVersion));
+        ppufile.putsmallset(moduleflags);
+        ppufile.writeentry(ibextraheader);
+        ppufile.do_crc:=old_docrc;
+      end;
+
 
 {$IFDEF MACRO_DIFF_HINT}
 
@@ -1026,7 +1087,7 @@ var
         source_time   : longint;
         hp            : tinputfile;
       begin
-        sources_avail:=(flags and uf_release) = 0;
+        sources_avail:=not(mf_release in moduleflags);
         is_main:=true;
         main_dir:='';
         while not ppufile.endofentry do
@@ -1037,7 +1098,7 @@ var
            temp_dir:='';
            if sources_avail then
              begin
-               if (flags and uf_in_library)<>0 then
+               if (headerflags and uf_in_library)<>0 then
                 begin
                   sources_avail:=false;
                   temp:=' library';
@@ -1239,6 +1300,13 @@ var
       end;
 
 
+    procedure tppumodule.readOrderedSymbols;
+      begin
+        while not ppufile.endofentry do
+          linkorderedsymbols.Concat(ppufile.getstring);
+      end;
+
+
     procedure tppumodule.readwpofile;
       var
         orgwpofilename: string;
@@ -1286,8 +1354,6 @@ var
             list:=publicasmsyms;
           ualt_extern:
             list:=externasmsyms;
-          else
-            internalerror(2016060301);
         end;
         c:=ppufile.getlongint;
         for i:=0 to c-1 do
@@ -1300,6 +1366,13 @@ var
       end;
 
 
+    procedure tppumodule.readextraheader;
+      begin
+        longversion:=cardinal(ppufile.getlongint);
+        ppufile.getsmallset(moduleflags);
+      end;
+
+
     procedure tppumodule.load_interface;
       var
         b : byte;
@@ -1324,6 +1397,10 @@ var
                  modulename:=stringdup(upper(newmodulename));
                  realmodulename:=stringdup(newmodulename);
                end;
+             ibextraheader:
+               begin
+                 readextraheader;
+               end;
              ibfeatures :
                begin
                  ppufile.getsmallset(features);
@@ -1374,6 +1451,8 @@ var
                readderefdata;
              ibresources:
                readResources;
+             iborderedsymbols:
+               readOrderedSymbols;
              ibwpofile:
                readwpofile;
              ibendinterface :
@@ -1416,27 +1495,9 @@ var
          Message1(unit_u_ppu_write,realmodulename^);
 
          { create unit flags }
-         if do_release then
-          flags:=flags or uf_release;
-         if assigned(localsymtable) then
-           flags:=flags or uf_local_symtable;
-         if (cs_checkpointer_called in current_settings.moduleswitches) then
-           flags:=flags or uf_checkpointer_called;
-{$ifdef i8086}
-         if current_settings.x86memorymodel in [mm_medium,mm_large,mm_huge] then
-           flags:=flags or uf_i8086_far_code;
-         if current_settings.x86memorymodel in [mm_compact,mm_large] then
-           flags:=flags or uf_i8086_far_data;
-         if current_settings.x86memorymodel=mm_huge then
-           flags:=flags or uf_i8086_huge_data;
-         if current_settings.x86memorymodel=mm_tiny then
-           flags:=flags or uf_i8086_cs_equals_ds;
-         if current_settings.x86memorymodel in [mm_tiny,mm_small,mm_medium] then
-           flags:=flags or uf_i8086_ss_equals_ds;
-{$endif i8086}
 {$ifdef cpufpemu}
          if (cs_fp_emulation in current_settings.moduleswitches) then
-           flags:=flags or uf_fpu_emulation;
+           headerflags:=headerflags or uf_fpu_emulation;
 {$endif cpufpemu}
 {$ifdef Test_Double_checksum_write}
          Assign(CRCFile,s+'.IMP');
@@ -1448,6 +1509,9 @@ var
          if not ppufile.createfile then
           Message(unit_f_ppu_cannot_write);
 
+         { extra header (sub version, module flags) }
+         writeextraheader;
+
          { first the (JVM) namespace }
          if assigned(namespace) then
            begin
@@ -1509,6 +1573,7 @@ var
          writelinkcontainer(linkotherframeworks,iblinkotherframeworks,true);
          writeImportSymbols;
          writeResources;
+         writeOrderedSymbols;
          ppufile.do_crc:=true;
 
          { generate implementation deref data, the interface deref data is
@@ -1532,7 +1597,7 @@ var
               tstoredsymtable(globalmacrosymtable).buildderefimpl;
             end;
 
-         if (flags and uf_local_symtable)<>0 then
+         if mf_local_symtable in moduleflags then
            tstoredsymtable(localsymtable).buildderef_registered;
          buildderefunitimportsyms;
          writederefmap;
@@ -1575,7 +1640,7 @@ var
 
          { write static symtable
            needed for local debugging of unit functions }
-         if (flags and uf_local_symtable)<>0 then
+         if mf_local_symtable in moduleflags then
            tstoredsymtable(localsymtable).ppuwrite(ppufile);
 
          { write whole program optimisation-related information }
@@ -1593,7 +1658,7 @@ var
          ppufile.header.common.compiler:=wordversion;
          ppufile.header.common.cpu:=word(target_cpu);
          ppufile.header.common.target:=word(target_info.system);
-         ppufile.header.common.flags:=flags;
+         ppufile.header.common.flags:=headerflags;
          ppufile.header.deflistsize:=current_module.deflist.count;
          ppufile.header.symlistsize:=current_module.symlist.count;
          ppufile.writeheader;
@@ -1636,6 +1701,9 @@ var
          ppufile.putstring(realmodulename^);
          ppufile.writeentry(ibmodulename);
 
+         { extra header (sub version, module flags) }
+         writeextraheader;
+
          ppufile.putsmallset(moduleoptions);
          if mo_has_deprecated_msg in moduleoptions then
            ppufile.putstring(deprecatedmsg^);
@@ -1699,7 +1767,7 @@ var
          ppufile.header.common.compiler:=wordversion;
          ppufile.header.common.cpu:=word(target_cpu);
          ppufile.header.common.target:=word(target_info.system);
-         ppufile.header.common.flags:=flags;
+         ppufile.header.common.flags:=headerflags;
          ppufile.writeheader;
 
          ppufile.closefile;
@@ -1734,7 +1802,7 @@ var
               if (pu.u.interface_crc<>pu.interface_checksum) or
                  (pu.u.indirect_crc<>pu.indirect_checksum) or
                  (
-                  ((ppufile.header.common.flags and uf_release)=0) and
+                  (not(mf_release in moduleflags)) and
                   (pu.u.crc<>pu.checksum)
                  ) then
                begin
@@ -1810,7 +1878,7 @@ var
          end;
 
         { load implementation symtable }
-        if (flags and uf_local_symtable)<>0 then
+        if mf_local_symtable in moduleflags then
           begin
             localsymtable:=tstaticsymtable.create(modulename^,moduleid);
             tstaticsymtable(localsymtable).ppuload(ppufile);

+ 2 - 0
compiler/gendef.pas

@@ -136,6 +136,8 @@ begin
         if dllversion<>'' then
           writeln(t,'VERSION '+dllversion);
       end;
+    else
+      ;
   end;
 
 {write imports}

+ 63 - 5
compiler/globals.pas

@@ -54,7 +54,8 @@ interface
          [m_delphi,m_class,m_objpas,m_result,m_string_pchar,
           m_pointer_2_procedure,m_autoderef,m_tp_procvar,m_initfinal,m_default_ansistring,
           m_out,m_default_para,m_duplicate_names,m_hintdirective,
-          m_property,m_default_inline,m_except,m_advanced_records];
+          m_property,m_default_inline,m_except,m_advanced_records,
+          m_array_operators];
        delphiunicodemodeswitches = delphimodeswitches + [m_systemcodepage,m_default_unicodestring];
        fpcmodeswitches =
          [m_fpc,m_string_pchar,m_nested_comment,m_repeat_forward,
@@ -163,6 +164,8 @@ interface
 
          disabledircache : boolean;
 
+         tlsmodel : ttlsmodel;
+
 {$if defined(i8086)}
          x86memorymodel  : tx86memorymodel;
 {$endif defined(i8086)}
@@ -213,6 +216,15 @@ interface
         property items[I:longint]:TLinkRec read getlinkrec; default;
       end;
 
+      tpendingstateflag = (
+        psf_alignment_changed,
+        psf_verbosity_full_switched,
+        psf_local_switches_changed,
+        psf_packenum_changed,
+        psf_packrecords_changed,
+        psf_setalloc_changed
+      );
+      tpendingstateflags = set of tpendingstateflag;
 
       tpendingstate = record
         nextverbositystr : shortstring;
@@ -221,9 +233,10 @@ interface
         nextcallingstr : shortstring;
         nextmessagerecord : pmessagestaterecord;
         nextalignment : talignmentinfo;
-        alignmentchanged,
-        verbosityfullswitched,
-        localswitcheschanged : boolean;
+        nextpackenum : shortint;
+        nextpackrecords : shortint;
+        nextsetalloc : shortint;
+        flags : tpendingstateflags;
       end;
 
 
@@ -393,6 +406,9 @@ interface
           procalign : 0;
           loopalign : 0;
           jumpalign : 0;
+          jumpalignskipmax    : 0;
+          coalescealign   : 0;
+          coalescealignskipmax: 0;
           constalignmin : 0;
           constalignmax : 0;
           varalignmin : 0;
@@ -518,6 +534,18 @@ interface
         asmcputype : cpu_none;
         fputype : fpu_x87;
   {$endif i8086}
+  {$ifdef riscv32}
+        cputype : cpu_rv32imafd;
+        optimizecputype : cpu_rv32imafd;
+        asmcputype : cpu_none;
+        fputype : fpu_fd;
+  {$endif riscv32}
+  {$ifdef riscv64}
+        cputype : cpu_rv64imafdc;
+        optimizecputype : cpu_rv64imafdc;
+        asmcputype : cpu_none;
+        fputype : fpu_fd;
+  {$endif riscv64}
 {$endif not GENERIC_CPU}
         asmmode : asmmode_standard;
 {$ifndef jvm}
@@ -530,6 +558,8 @@ interface
         minfpconstprec : s32real;
 
         disabledircache : false;
+
+        tlsmodel : tlsm_none;
 {$if defined(i8086)}
         x86memorymodel : mm_small;
 {$endif defined(i8086)}
@@ -862,6 +892,30 @@ implementation
          end;
 
 {$endif mswindows}
+{$ifdef openbsd}
+       function GetOpenBSDLocalBase: ansistring;
+         var
+           envvalue: pchar;
+         begin
+           envvalue := GetEnvPChar('LOCALBASE');
+           if assigned(envvalue) then
+             Result:=envvalue
+           else
+             Result:='/usr/local';
+           FreeEnvPChar(envvalue);
+         end;
+       function GetOpenBSDX11Base: ansistring;
+         var
+           envvalue: pchar;
+         begin
+           envvalue := GetEnvPChar('X11BASE');
+           if assigned(envvalue) then
+             Result:=envvalue
+           else
+             Result:='/usr/X11R6';
+           FreeEnvPChar(envvalue);
+         end;
+{$endif openbsd}
        var
          envstr: string;
          envvalue: pchar;
@@ -894,6 +948,10 @@ implementation
          ReplaceSpecialFolder('$PROGRAM_FILES_COMMON',CSIDL_PROGRAM_FILES_COMMON);
          ReplaceSpecialFolder('$PROFILE',CSIDL_PROFILE);
 {$endif mswindows}
+{$ifdef openbsd}
+         Replace(s,'$OPENBSD_LOCALBASE',GetOpenBSDLocalBase);
+         Replace(s,'$OPENBSD_X11BASE',GetOpenBSDX11Base);
+{$endif openbsd}
          { Replace environment variables between dollar signs }
          i := pos('$',s);
          while i>0 do
@@ -1394,7 +1452,7 @@ implementation
        if localexepath='' then
         begin
           hs1 := ExtractFileName(exeName);
-          ChangeFileExt(hs1,source_info.exeext);
+	  hs1 := ChangeFileExt(hs1,source_info.exeext);
 {$ifdef macos}
           FindFile(hs1,GetEnvironmentVariable('Commands'),false,localExepath);
 {$else macos}

+ 4 - 1
compiler/globstat.pas

@@ -60,6 +60,7 @@ type
     old_settings : tsettings;
     old_switchesstatestack : tswitchesstatestack;
     old_switchesstatestackpos : Integer;
+    old_verbosity : longint;
 
   { only saved/restored if "full" is true }
     old_asmdata : tasmdata;
@@ -74,7 +75,7 @@ procedure restore_global_state(const state:tglobalstate;full:boolean);
 implementation
 
 uses
-  pbase;
+  pbase,comphook;
 
   procedure save_global_state(out state:tglobalstate;full:boolean);
     begin
@@ -106,6 +107,7 @@ uses
           //flushpendingswitchesstate;
           oldcurrent_filepos:=current_filepos;
           old_settings:=current_settings;
+          old_verbosity:=status.verbosity;
 
           if full then
             begin
@@ -142,6 +144,7 @@ uses
           current_procinfo:=oldcurrent_procinfo;
           current_filepos:=oldcurrent_filepos;
           current_settings:=old_settings;
+          status.verbosity:=old_verbosity;
 
           if full then
             begin

+ 63 - 5
compiler/globtype.pas

@@ -87,6 +87,14 @@ interface
        AIntBits = 8;
 {$endif cpu8bitalu}
 
+     { Maximum possible size of locals space (stack frame) }
+     Const
+{$if defined(cpu16bitaddr)}
+       MaxLocalsSize = High(PUint);
+{$else}
+       MaxLocalsSize = High(longint) - 15;
+{$endif}
+
      Type
        PAWord = ^AWord;
        PAInt = ^AInt;
@@ -123,6 +131,12 @@ interface
            0 : (bytes:array[0..7] of byte);
            1 : (value:double);
        end;
+       { Use a variant record to be sure that the array if aligned correctly }
+       tcompsinglerec=record
+         case byte of
+           0 : (bytes:array[0..3] of byte);
+           1 : (value:single);
+       end;
        tcompextendedrec=record
          case byte of
            0 : (bytes:array[0..9] of byte);
@@ -142,6 +156,8 @@ interface
          cs_full_boolean_eval,cs_typed_const_writable,cs_allow_enum_calc,
          cs_do_inline,cs_fpu_fwait,cs_ieee_errors,
          cs_check_low_addr_load,cs_imported_data,
+         cs_excessprecision,cs_check_fpu_exceptions,
+         cs_check_all_case_coverage,
          { mmx }
          cs_mmx,cs_mmx_saturation,
          { parser }
@@ -333,6 +349,33 @@ interface
        );
        twpoptimizerswitches = set of twpoptimizerswitch;
 
+       { module flags (extra unit flags not in ppu header) }
+       tmoduleflag = (
+         mf_init,                     { unit has initialization section }
+         mf_finalize,                 { unit has finalization section   }
+         mf_checkpointer_called,      { Unit uses experimental checkpointer test code }
+         mf_has_resourcestrings,      { unit has resource string section }
+         mf_release,                  { unit was compiled with -Ur option }
+         mf_threadvars,               { unit has threadvars }
+         mf_has_stabs_debuginfo,      { this unit has stabs debuginfo generated }
+         mf_local_symtable,           { this unit has a local symtable stored }
+         mf_uses_variants,            { this unit uses variants }
+         mf_has_resourcefiles,        { this unit has external resources (using $R directive)}
+         mf_has_exports,              { this module or a used unit has exports }
+         mf_has_dwarf_debuginfo,      { this unit has dwarf debuginfo generated }
+         mf_wideinits,                { this unit has winlike widestring typed constants }
+         mf_classinits,               { this unit has class constructors/destructors }
+         mf_resstrinits,              { this unit has string consts referencing resourcestrings }
+         mf_i8086_far_code,           { this unit uses an i8086 memory model with far code (i.e. medium, large or huge) }
+         mf_i8086_far_data,           { this unit uses an i8086 memory model with far data (i.e. compact or large) }
+         mf_i8086_huge_data,          { this unit uses an i8086 memory model with huge data (i.e. huge) }
+         mf_i8086_cs_equals_ds,       { this unit uses an i8086 memory model with CS=DS (i.e. tiny) }
+         mf_i8086_ss_equals_ds,       { this unit uses an i8086 memory model with SS=DS (i.e. tiny, small or medium) }
+         mf_package_deny,             { this unit must not be part of a package }
+         mf_package_weak              { this unit may be completely contained in a package }
+       );
+       tmoduleflags = set of tmoduleflag;
+
     type
        ttargetswitchinfo = record
           name: string[22];
@@ -377,7 +420,7 @@ interface
        { switches being applied to all CPUs at the given level }
        genericlevel1optimizerswitches = [cs_opt_level1,cs_opt_peephole];
        genericlevel2optimizerswitches = [cs_opt_level2,cs_opt_remove_emtpy_proc];
-       genericlevel3optimizerswitches = [cs_opt_level3,cs_opt_constant_propagate,cs_opt_nodedfa,cs_opt_use_load_modify_store,cs_opt_loopunroll];
+       genericlevel3optimizerswitches = [cs_opt_level3,cs_opt_constant_propagate,cs_opt_nodedfa{$ifndef llvm},cs_opt_use_load_modify_store{$endif},cs_opt_loopunroll];
        genericlevel4optimizerswitches = [cs_opt_level4,cs_opt_reorder_fields,cs_opt_dead_values,cs_opt_fastmath];
 
        { whole program optimizations whose information generation requires
@@ -438,7 +481,10 @@ interface
          m_blocks,              { support for http://en.wikipedia.org/wiki/Blocks_(C_language_extension) }
          m_isolike_io,          { I/O as it required by an ISO compatible compiler }
          m_isolike_program_para, { program parameters as it required by an ISO compatible compiler }
-         m_isolike_mod          { mod operation as it is required by an iso compatible compiler }
+         m_isolike_mod,         { mod operation as it is required by an iso compatible compiler }
+         m_array_operators,     { use Delphi compatible array operators instead of custom ones ("+") }
+         m_multi_helpers,       { helpers can appear in multiple scopes simultaneously }
+         m_array2dynarray       { regular arrays can be implicitly converted to dynamic arrays }
        );
        tmodeswitches = set of tmodeswitch;
 
@@ -570,7 +616,7 @@ interface
            'Interrupt',
            'HardFloat',
            'SysV_ABI_Default',
-           'MS_ABI_CDecl', { TODO: Is this correct? Shouldn't it be SysV_ABI_Default }
+           'SysV_ABI_CDecl',
            'MS_ABI_Default',
            'MS_ABI_CDecl',
            'VectorCall'
@@ -626,7 +672,10 @@ interface
          'CBLOCKS',
          'ISOIO',
          'ISOPROGRAMPARAS',
-         'ISOMOD'
+         'ISOMOD',
+         'ARRAYOPERATORS',
+         'MULTIHELPERS',
+         'ARRAYTODYNARRAY'
          );
 
 
@@ -683,10 +732,19 @@ interface
            for i8086 cpu huge memory model,
            as this changes SP register it requires special handling
            to restore DS segment register  }
-         pi_has_open_array_parameter
+         pi_has_open_array_parameter,
+         { subroutine uses threadvars }
+         pi_uses_threadvar
        );
        tprocinfoflags=set of tprocinfoflag;
 
+       ttlsmodel = (tlsm_none,
+         { elf tls model: works for all kind of code and thread vars }
+         tlsm_general,
+         { elf tls model: works only if the thread vars are declared and used in the same executable }
+         tlsm_local
+       );
+
     type
       { float types -- warning, this enum/order is used internally by the RTL
         as well in rtl/inc/real2str.inc }

+ 630 - 9
compiler/hlcg2ll.pas

@@ -304,7 +304,7 @@ unit hlcg2ll;
 
           procedure location_force_reg(list:TAsmList;var l:tlocation;src_size,dst_size:tdef;maybeconst:boolean);override;
           procedure location_force_mem(list:TAsmList;var l:tlocation;size:tdef);override;
-          procedure location_force_mmregscalar(list:TAsmList;var l: tlocation;size:tdef;maybeconst:boolean);override;
+          procedure location_force_mmregscalar(list:TAsmList;var l: tlocation;var size:tdef;maybeconst:boolean);override;
 //          procedure location_force_mmreg(list:TAsmList;var l: tlocation;size:tdef;maybeconst:boolean);override;
 
           procedure maketojumpboollabels(list: TAsmList; p: tnode; truelabel, falselabel: tasmlabel); override;
@@ -330,7 +330,8 @@ implementation
 
     uses
        globals,systems,
-       verbose,defutil,
+       verbose,defutil,symsym,
+       procinfo,paramgr,
        cgobj,tgobj,cutils,
        ncgutil;
 
@@ -1250,7 +1251,7 @@ implementation
       end;
     end;
 
-  procedure thlcg2ll.location_force_mmregscalar(list: TAsmList; var l: tlocation; size: tdef; maybeconst: boolean);
+  procedure thlcg2ll.location_force_mmregscalar(list: TAsmList; var l: tlocation; var size: tdef; maybeconst: boolean);
     var
       reg : tregister;
       href : treference;
@@ -1297,6 +1298,7 @@ implementation
                reg:=getmmregister(list,newsize);
                a_loadmm_loc_reg(list,size,newsize,l,reg,mms_movescalar);
                l.size:=def_cgsize(newsize);
+               size:=newsize;
              end;
           location_freetemp(list,l);
           location_reset(l,LOC_MMREGISTER,l.size);
@@ -1317,10 +1319,90 @@ implementation
       ncgutil.maketojumpboollabels(list,p,truelabel,falselabel);
     end;
 
+{$if first_mm_imreg = 0}
+  {$WARN 4044 OFF} { Comparison might be always false ... }
+{$endif}
+
   procedure thlcg2ll.gen_load_para_value(list: TAsmList);
-    begin
-      ncgutil.gen_load_para_value(list);
-    end;
+
+    procedure get_para(const paraloc:TCGParaLocation);
+      begin
+         case paraloc.loc of
+           LOC_REGISTER :
+             begin
+               if getsupreg(paraloc.register)<first_int_imreg then
+                 cg.getcpuregister(list,paraloc.register);
+             end;
+           LOC_MMREGISTER :
+             begin
+               if getsupreg(paraloc.register)<first_mm_imreg then
+                 cg.getcpuregister(list,paraloc.register);
+             end;
+           LOC_FPUREGISTER :
+             begin
+               if getsupreg(paraloc.register)<first_fpu_imreg then
+                 cg.getcpuregister(list,paraloc.register);
+             end;
+           else
+             ;
+         end;
+      end;
+
+   var
+     i : longint;
+     currpara : tparavarsym;
+     paraloc  : pcgparalocation;
+   begin
+     if (po_assembler in current_procinfo.procdef.procoptions) or
+     { exceptfilters have a single hidden 'parentfp' parameter, which
+       is handled by tcg.g_proc_entry. }
+        (current_procinfo.procdef.proctypeoption=potype_exceptfilter) then
+       exit;
+
+     { Allocate registers used by parameters }
+     for i:=0 to current_procinfo.procdef.paras.count-1 do
+       begin
+         currpara:=tparavarsym(current_procinfo.procdef.paras[i]);
+         paraloc:=currpara.paraloc[calleeside].location;
+         while assigned(paraloc) do
+           begin
+             if paraloc^.loc in [LOC_REGISTER,LOC_FPUREGISTER,LOC_MMREGISTER] then
+               get_para(paraloc^);
+             paraloc:=paraloc^.next;
+           end;
+       end;
+
+     { Copy parameters to local references/registers }
+     for i:=0 to current_procinfo.procdef.paras.count-1 do
+       begin
+         currpara:=tparavarsym(current_procinfo.procdef.paras[i]);
+         { don't use currpara.vardef, as this will be wrong in case of
+           call-by-reference parameters (it won't contain the pointerdef) }
+         gen_load_cgpara_loc(list,currpara.paraloc[calleeside].def,currpara.paraloc[calleeside],currpara.initialloc,paramanager.param_use_paraloc(currpara.paraloc[calleeside]));
+         { gen_load_cgpara_loc() already allocated the initialloc
+           -> don't allocate again }
+         if currpara.initialloc.loc in [LOC_CREGISTER,LOC_CFPUREGISTER,LOC_CMMREGISTER] then
+           begin
+             gen_alloc_regvar(list,currpara,false);
+             hlcg.varsym_set_localloc(list,currpara);
+           end;
+       end;
+
+     { generate copies of call by value parameters, must be done before
+       the initialization and body is parsed because the refcounts are
+       incremented using the local copies }
+     current_procinfo.procdef.parast.SymList.ForEachCall(@hlcg.g_copyvalueparas,list);
+     if not(po_assembler in current_procinfo.procdef.procoptions) then
+       begin
+         { initialize refcounted paras, and trash others. Needed here
+           instead of in gen_initialize_code, because when a reference is
+           intialised or trashed while the pointer to that reference is kept
+           in a regvar, we add a register move and that one again has to
+           come after the parameter loading code as far as the register
+           allocator is concerned }
+         current_procinfo.procdef.parast.SymList.ForEachCall(@init_paras,list);
+       end;
+   end;
 
   procedure thlcg2ll.gen_loadfpu_loc_cgpara(list: TAsmList; size: tdef; const l: tlocation; const cgpara: tcgpara; locintsize: longint);
     var
@@ -1472,8 +1554,9 @@ implementation
               cg128.a_load128_loc_cgpara(list,l,cgpara)
             else
 {$else cpu64bitalu}
-            { use cg64 only for int64, not for 8 byte records }
-            if (l.size in [OS_64,OS_S64]) and (cgpara.Size in [OS_64,OS_S64]) then
+            { use cg64 only for int64, not for 8 byte records; in particular,
+              filter out records passed in fpu/mm register}
+            if (l.size in [OS_64,OS_S64]) and (cgpara.Size in [OS_64,OS_S64]) and (cgpara.location^.loc in [LOC_REGISTER,LOC_REFERENCE]) then
               cg64.a_load64_loc_cgpara(list,l,cgpara)
             else
 {$endif cpu64bitalu}
@@ -1524,8 +1607,544 @@ implementation
     end;
 
   procedure thlcg2ll.gen_load_cgpara_loc(list: TAsmList; vardef: tdef; const para: TCGPara; var destloc: tlocation; reusepara: boolean);
+
+    procedure unget_para(const paraloc:TCGParaLocation);
+      begin
+         case paraloc.loc of
+           LOC_REGISTER :
+             begin
+               if getsupreg(paraloc.register)<first_int_imreg then
+                 cg.ungetcpuregister(list,paraloc.register);
+             end;
+           LOC_MMREGISTER :
+             begin
+               if getsupreg(paraloc.register)<first_mm_imreg then
+                 cg.ungetcpuregister(list,paraloc.register);
+             end;
+           LOC_FPUREGISTER :
+             begin
+               if getsupreg(paraloc.register)<first_fpu_imreg then
+                 cg.ungetcpuregister(list,paraloc.register);
+             end;
+           else
+             ;
+         end;
+      end;
+
+    var
+      paraloc   : pcgparalocation;
+      href      : treference;
+      sizeleft  : aint;
+      tempref   : treference;
+      loadsize  : tcgint;
+      tempreg  : tregister;
+{$ifdef mips}
+      //tmpreg   : tregister;
+{$endif mips}
+{$ifndef cpu64bitalu}
+      reg64    : tregister64;
+{$if defined(cpu8bitalu)}
+      curparaloc : PCGParaLocation;
+{$endif defined(cpu8bitalu)}
+{$endif not cpu64bitalu}
     begin
-      ncgutil.gen_load_cgpara_loc(list, vardef, para, destloc, reusepara);
+      paraloc:=para.location;
+      if not assigned(paraloc) then
+        internalerror(200408203);
+      { skip e.g. empty records }
+      if (paraloc^.loc = LOC_VOID) then
+        exit;
+      case destloc.loc of
+        LOC_REFERENCE :
+          begin
+            { If the parameter location is reused we don't need to copy
+              anything }
+            if not reusepara then
+              begin
+                href:=destloc.reference;
+                sizeleft:=para.intsize;
+                while assigned(paraloc) do
+                  begin
+                    if (paraloc^.size=OS_NO) then
+                      begin
+                        { Can only be a reference that contains the rest
+                          of the parameter }
+                        if (paraloc^.loc<>LOC_REFERENCE) or
+                           assigned(paraloc^.next) then
+                          internalerror(2005013010);
+                        cg.a_load_cgparaloc_ref(list,paraloc^,href,sizeleft,destloc.reference.alignment);
+                        inc(href.offset,sizeleft);
+                        sizeleft:=0;
+                      end
+                    else
+                      begin
+                        { the min(...) call ensures that we do not store more than place is left as
+                           paraloc^.size could be bigger than destloc.size of a parameter occupies a full register
+                           and as on big endian system the parameters might be left aligned, we have to work
+                           with the full register size for paraloc^.size }
+                        if tcgsize2size[destloc.size]<>0 then
+                          loadsize:=min(min(tcgsize2size[paraloc^.size],tcgsize2size[destloc.size]),sizeleft)
+                        else
+                          loadsize:=min(tcgsize2size[paraloc^.size],sizeleft);
+
+                        cg.a_load_cgparaloc_ref(list,paraloc^,href,loadsize,destloc.reference.alignment);
+                        inc(href.offset,loadsize);
+                        dec(sizeleft,loadsize);
+                      end;
+                    unget_para(paraloc^);
+                    paraloc:=paraloc^.next;
+                  end;
+              end;
+          end;
+        LOC_REGISTER,
+        LOC_CREGISTER :
+          begin
+{$ifdef cpu64bitalu}
+            if (para.size in [OS_128,OS_S128,OS_F128]) and
+               ({ in case of fpu emulation, or abi's that pass fpu values
+                  via integer registers }
+                (vardef.typ=floatdef) or
+                 is_methodpointer(vardef) or
+                 is_record(vardef)) then
+              begin
+                case paraloc^.loc of
+                  LOC_REGISTER,
+                  LOC_MMREGISTER:
+                    begin
+                      if not assigned(paraloc^.next) then
+                        internalerror(200410104);
+                      case tcgsize2size[paraloc^.size] of
+                        8:
+                          begin
+                            if (target_info.endian=ENDIAN_BIG) then
+                              begin
+                                { paraloc^ -> high
+                                  paraloc^.next -> low }
+                                unget_para(paraloc^);
+                                gen_alloc_regloc(list,destloc,vardef);
+                                { reg->reg, alignment is irrelevant }
+                                cg.a_load_cgparaloc_anyreg(list,OS_64,paraloc^,destloc.register128.reghi,8);
+                                unget_para(paraloc^.next^);
+                                cg.a_load_cgparaloc_anyreg(list,OS_64,paraloc^.next^,destloc.register128.reglo,8);
+                              end
+                            else
+                              begin
+                                { paraloc^ -> low
+                                  paraloc^.next -> high }
+                                unget_para(paraloc^);
+                                gen_alloc_regloc(list,destloc,vardef);
+                                cg.a_load_cgparaloc_anyreg(list,OS_64,paraloc^,destloc.register128.reglo,8);
+                                unget_para(paraloc^.next^);
+                                cg.a_load_cgparaloc_anyreg(list,OS_64,paraloc^.next^,destloc.register128.reghi,8);
+                              end;
+                          end;
+                        4:
+                          begin
+                            { The 128-bit parameter is located in 4 32-bit MM registers.
+                              It is needed to copy them to 2 64-bit int registers.
+                              A code generator or a target cpu must support loading of a 32-bit MM register to
+                              a 64-bit int register, zero extending it. }
+                            if target_info.endian=ENDIAN_BIG then
+                              internalerror(2018101702);  // Big endian support not implemented yet
+                            gen_alloc_regloc(list,destloc,vardef);
+                            tempreg:=cg.getintregister(list,OS_64);
+                            // Low part of the 128-bit param
+                            unget_para(paraloc^);
+                            cg.a_load_cgparaloc_anyreg(list,OS_64,paraloc^,tempreg,4);
+                            paraloc:=paraloc^.next;
+                            if paraloc=nil then
+                              internalerror(2018101703);
+                            unget_para(paraloc^);
+                            cg.a_load_cgparaloc_anyreg(list,OS_64,paraloc^,destloc.register128.reglo,4);
+                            cg.a_op_const_reg(list,OP_SHL,OS_64,32,destloc.register128.reglo);
+                            cg.a_op_reg_reg(list,OP_OR,OS_64,tempreg,destloc.register128.reglo);
+                            // High part of the 128-bit param
+                            paraloc:=paraloc^.next;
+                            if paraloc=nil then
+                              internalerror(2018101704);
+                            unget_para(paraloc^);
+                            cg.a_load_cgparaloc_anyreg(list,OS_64,paraloc^,tempreg,4);
+                            paraloc:=paraloc^.next;
+                            if paraloc=nil then
+                              internalerror(2018101705);
+                            unget_para(paraloc^);
+                            cg.a_load_cgparaloc_anyreg(list,OS_64,paraloc^,destloc.register128.reghi,4);
+                            cg.a_op_const_reg(list,OP_SHL,OS_64,32,destloc.register128.reghi);
+                            cg.a_op_reg_reg(list,OP_OR,OS_64,tempreg,destloc.register128.reghi);
+                          end
+                        else
+                          internalerror(2018101701);
+                      end;
+                    end;
+                  LOC_REFERENCE:
+                    begin
+                      gen_alloc_regloc(list,destloc,vardef);
+                      reference_reset_base(href,cpointerdef.getreusable(vardef),paraloc^.reference.index,paraloc^.reference.offset,ctempposinvalid,para.alignment,[]);
+                      cg128.a_load128_ref_reg(list,href,destloc.register128);
+                      unget_para(paraloc^);
+                    end;
+                  else
+                    internalerror(2012090607);
+                end
+              end
+            else
+{$else cpu64bitalu}
+            if (para.size in [OS_64,OS_S64,OS_F64]) and
+               (is_64bit(vardef) or
+                { in case of fpu emulation, or abi's that pass fpu values
+                  via integer registers }
+                (vardef.typ=floatdef) or
+                 is_methodpointer(vardef) or
+                 is_record(vardef)) then
+              begin
+                case paraloc^.loc of
+                  LOC_REGISTER:
+                    begin
+                      case para.locations_count of
+{$if defined(cpu8bitalu)}
+                        { 8 paralocs? }
+                        8:
+                          if (target_info.endian=ENDIAN_BIG) then
+                            begin
+                              { is there any big endian 8 bit ALU/16 bit Addr CPU? }
+                              internalerror(2015041003);
+                              { paraloc^ -> high
+                                paraloc^.next^.next^.next^.next -> low }
+                              unget_para(paraloc^);
+                              gen_alloc_regloc(list,destloc,vardef);
+                              { reg->reg, alignment is irrelevant }
+                              cg.a_load_cgparaloc_anyreg(list,OS_16,paraloc^,cg.GetNextReg(destloc.register64.reghi),1);
+                              unget_para(paraloc^.next^);
+                              cg.a_load_cgparaloc_anyreg(list,OS_16,paraloc^.next^,destloc.register64.reghi,1);
+                              unget_para(paraloc^.next^.next^);
+                              cg.a_load_cgparaloc_anyreg(list,OS_16,paraloc^.next^.next^,cg.GetNextReg(destloc.register64.reglo),1);
+                              unget_para(paraloc^.next^.next^.next^);
+                              cg.a_load_cgparaloc_anyreg(list,OS_16,paraloc^.next^.next^.next^,destloc.register64.reglo,1);
+                            end
+                          else
+                            begin
+                              { paraloc^ -> low
+                                paraloc^.next^.next^.next^.next -> high }
+                              curparaloc:=paraloc;
+                              unget_para(curparaloc^);
+                              gen_alloc_regloc(list,destloc,vardef);
+                              cg.a_load_cgparaloc_anyreg(list,OS_8,curparaloc^,destloc.register64.reglo,2);
+                              unget_para(curparaloc^.next^);
+                              cg.a_load_cgparaloc_anyreg(list,OS_8,curparaloc^.next^,cg.GetNextReg(destloc.register64.reglo),1);
+                              unget_para(curparaloc^.next^.next^);
+                              cg.a_load_cgparaloc_anyreg(list,OS_8,curparaloc^.next^.next^,cg.GetNextReg(cg.GetNextReg(destloc.register64.reglo)),1);
+                              unget_para(curparaloc^.next^.next^.next^);
+                              cg.a_load_cgparaloc_anyreg(list,OS_8,curparaloc^.next^.next^.next^,cg.GetNextReg(cg.GetNextReg(cg.GetNextReg(destloc.register64.reglo))),1);
+
+                              curparaloc:=paraloc^.next^.next^.next^.next;
+                              unget_para(curparaloc^);
+                              cg.a_load_cgparaloc_anyreg(list,OS_8,curparaloc^,destloc.register64.reghi,2);
+                              unget_para(curparaloc^.next^);
+                              cg.a_load_cgparaloc_anyreg(list,OS_8,curparaloc^.next^,cg.GetNextReg(destloc.register64.reghi),1);
+                              unget_para(curparaloc^.next^.next^);
+                              cg.a_load_cgparaloc_anyreg(list,OS_8,curparaloc^.next^.next^,cg.GetNextReg(cg.GetNextReg(destloc.register64.reghi)),1);
+                              unget_para(curparaloc^.next^.next^.next^);
+                              cg.a_load_cgparaloc_anyreg(list,OS_8,curparaloc^.next^.next^.next^,cg.GetNextReg(cg.GetNextReg(cg.GetNextReg(destloc.register64.reghi))),1);
+                            end;
+{$endif defined(cpu8bitalu)}
+{$if defined(cpu16bitalu) or defined(cpu8bitalu)}
+                        { 4 paralocs? }
+                        4:
+                          if (target_info.endian=ENDIAN_BIG) then
+                            begin
+                              { paraloc^ -> high
+                                paraloc^.next^.next -> low }
+                              unget_para(paraloc^);
+                              gen_alloc_regloc(list,destloc,vardef);
+                              { reg->reg, alignment is irrelevant }
+                              cg.a_load_cgparaloc_anyreg(list,OS_16,paraloc^,cg.GetNextReg(destloc.register64.reghi),2);
+                              unget_para(paraloc^.next^);
+                              cg.a_load_cgparaloc_anyreg(list,OS_16,paraloc^.next^,destloc.register64.reghi,2);
+                              unget_para(paraloc^.next^.next^);
+                              cg.a_load_cgparaloc_anyreg(list,OS_16,paraloc^.next^.next^,cg.GetNextReg(destloc.register64.reglo),2);
+                              unget_para(paraloc^.next^.next^.next^);
+                              cg.a_load_cgparaloc_anyreg(list,OS_16,paraloc^.next^.next^.next^,destloc.register64.reglo,2);
+                            end
+                          else
+                            begin
+                              { paraloc^ -> low
+                                paraloc^.next^.next -> high }
+                              unget_para(paraloc^);
+                              gen_alloc_regloc(list,destloc,vardef);
+                              cg.a_load_cgparaloc_anyreg(list,OS_16,paraloc^,destloc.register64.reglo,2);
+                              unget_para(paraloc^.next^);
+                              cg.a_load_cgparaloc_anyreg(list,OS_16,paraloc^.next^,cg.GetNextReg(destloc.register64.reglo),2);
+                              unget_para(paraloc^.next^.next^);
+                              cg.a_load_cgparaloc_anyreg(list,OS_16,paraloc^.next^.next^,destloc.register64.reghi,2);
+                              unget_para(paraloc^.next^.next^.next^);
+                              cg.a_load_cgparaloc_anyreg(list,OS_16,paraloc^.next^.next^.next^,cg.GetNextReg(destloc.register64.reghi),2);
+                            end;
+{$endif defined(cpu16bitalu) or defined(cpu8bitalu)}
+                        2:
+                          if (target_info.endian=ENDIAN_BIG) then
+                            begin
+                              { paraloc^ -> high
+                                paraloc^.next -> low }
+                              unget_para(paraloc^);
+                              gen_alloc_regloc(list,destloc,vardef);
+                              { reg->reg, alignment is irrelevant }
+                              cg.a_load_cgparaloc_anyreg(list,OS_32,paraloc^,destloc.register64.reghi,4);
+                              unget_para(paraloc^.next^);
+                              cg.a_load_cgparaloc_anyreg(list,OS_32,paraloc^.next^,destloc.register64.reglo,4);
+                            end
+                          else
+                            begin
+                              { paraloc^ -> low
+                                paraloc^.next -> high }
+                              unget_para(paraloc^);
+                              gen_alloc_regloc(list,destloc,vardef);
+                              cg.a_load_cgparaloc_anyreg(list,OS_32,paraloc^,destloc.register64.reglo,4);
+                              unget_para(paraloc^.next^);
+                              cg.a_load_cgparaloc_anyreg(list,OS_32,paraloc^.next^,destloc.register64.reghi,4);
+                            end;
+                        else
+                          { unexpected number of paralocs }
+                          internalerror(200410104);
+                      end;
+                    end;
+                  LOC_REFERENCE:
+                    begin
+                      gen_alloc_regloc(list,destloc,vardef);
+                      reference_reset_base(href,cpointerdef.getreusable(vardef),paraloc^.reference.index,paraloc^.reference.offset,ctempposinvalid,para.alignment,[]);
+                      cg64.a_load64_ref_reg(list,href,destloc.register64);
+                      unget_para(paraloc^);
+                    end;
+                  else
+                    internalerror(2005101501);
+                end
+              end
+            else
+{$endif cpu64bitalu}
+              begin
+                if assigned(paraloc^.next) then
+                  begin
+                    if (destloc.size in [OS_PAIR,OS_SPAIR]) and
+                      (para.Size in [OS_PAIR,OS_SPAIR]) then
+                      begin
+                        unget_para(paraloc^);
+                        gen_alloc_regloc(list,destloc,vardef);
+                        cg.a_load_cgparaloc_anyreg(list,OS_INT,paraloc^,destloc.register,sizeof(aint));
+                        unget_para(paraloc^.Next^);
+                        {$if defined(cpu16bitalu) or defined(cpu8bitalu)}
+                          cg.a_load_cgparaloc_anyreg(list,OS_INT,paraloc^.Next^,cg.GetNextReg(destloc.register),sizeof(aint));
+                        {$else}
+                          cg.a_load_cgparaloc_anyreg(list,OS_INT,paraloc^.Next^,destloc.registerhi,sizeof(aint));
+                        {$endif}
+                      end
+{$if defined(cpu8bitalu)}
+                    else if (destloc.size in [OS_32,OS_S32]) and
+                      (para.Size in [OS_32,OS_S32]) then
+                      begin
+                        unget_para(paraloc^);
+                        gen_alloc_regloc(list,destloc,vardef);
+                        cg.a_load_cgparaloc_anyreg(list,OS_8,paraloc^,destloc.register,sizeof(aint));
+                        unget_para(paraloc^.Next^);
+                        cg.a_load_cgparaloc_anyreg(list,OS_8,paraloc^.Next^,cg.GetNextReg(destloc.register),sizeof(aint));
+                        unget_para(paraloc^.Next^.Next^);
+                        cg.a_load_cgparaloc_anyreg(list,OS_8,paraloc^.Next^.Next^,cg.GetNextReg(cg.GetNextReg(destloc.register)),sizeof(aint));
+                        unget_para(paraloc^.Next^.Next^.Next^);
+                        cg.a_load_cgparaloc_anyreg(list,OS_8,paraloc^.Next^.Next^.Next^,cg.GetNextReg(cg.GetNextReg(cg.GetNextReg(destloc.register))),sizeof(aint));
+                      end
+{$endif defined(cpu8bitalu)}
+                    else
+                      begin
+                        { this can happen if a parameter is spread over
+                          multiple paralocs, e.g. if a record with two single
+                          fields must be passed in two single precision
+                          registers }
+                        { does it fit in the register of destloc? }
+                        sizeleft:=para.intsize;
+                        if sizeleft<>vardef.size then
+                          internalerror(2014122806);
+                        if sizeleft<>tcgsize2size[destloc.size] then
+                          internalerror(200410105);
+                        { store everything first to memory, then load it in
+                          destloc }
+                        tg.gettemp(list,sizeleft,sizeleft,tt_persistent,tempref);
+                        gen_alloc_regloc(list,destloc,vardef);
+                        while sizeleft>0 do
+                          begin
+                            if not assigned(paraloc) then
+                              internalerror(2014122807);
+                            unget_para(paraloc^);
+                            cg.a_load_cgparaloc_ref(list,paraloc^,tempref,sizeleft,newalignment(para.alignment,para.intsize-sizeleft));
+                            if (paraloc^.size=OS_NO) and
+                               assigned(paraloc^.next) then
+                              internalerror(2014122805);
+                            inc(tempref.offset,tcgsize2size[paraloc^.size]);
+                            dec(sizeleft,tcgsize2size[paraloc^.size]);
+                            paraloc:=paraloc^.next;
+                          end;
+                        dec(tempref.offset,para.intsize);
+                        cg.a_load_ref_reg(list,para.size,para.size,tempref,destloc.register);
+                        tg.ungettemp(list,tempref);
+                      end;
+                  end
+                else
+                  begin
+                    unget_para(paraloc^);
+                    gen_alloc_regloc(list,destloc,vardef);
+                    { we can't directly move regular registers into fpu
+                      registers }
+                    if getregtype(paraloc^.register)=R_FPUREGISTER then
+                      begin
+                        { store everything first to memory, then load it in
+                          destloc }
+                        tg.gettemp(list,tcgsize2size[paraloc^.size],para.intsize,tt_persistent,tempref);
+                        cg.a_load_cgparaloc_ref(list,paraloc^,tempref,tcgsize2size[paraloc^.size],tempref.alignment);
+                        cg.a_load_ref_reg(list,int_cgsize(tcgsize2size[paraloc^.size]),destloc.size,tempref,destloc.register);
+                        tg.ungettemp(list,tempref);
+                      end
+                    else
+                      cg.a_load_cgparaloc_anyreg(list,destloc.size,paraloc^,destloc.register,sizeof(aint));
+                  end;
+              end;
+          end;
+        LOC_FPUREGISTER,
+        LOC_CFPUREGISTER :
+          begin
+{$ifdef mips}
+            if (destloc.size = paraloc^.Size) and
+               (paraloc^.Loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER,LOC_REFERENCE,LOC_CREFERENCE]) then
+              begin
+                unget_para(paraloc^);
+                gen_alloc_regloc(list,destloc,vardef);
+                cg.a_load_cgparaloc_anyreg(list,destloc.size,paraloc^,destloc.register,para.alignment);
+              end
+            else if (destloc.size = OS_F32) and
+               (paraloc^.Loc in [LOC_REGISTER,LOC_CREGISTER]) then
+              begin
+                gen_alloc_regloc(list,destloc,vardef);
+                unget_para(paraloc^);
+                list.Concat(taicpu.op_reg_reg(A_MTC1,paraloc^.register,destloc.register));
+              end
+{ TODO: Produces invalid code, needs fixing together with regalloc setup. }
+{
+            else if (destloc.size = OS_F64) and
+                    (paraloc^.Loc in [LOC_REGISTER,LOC_CREGISTER]) and
+                    (paraloc^.next^.Loc in [LOC_REGISTER,LOC_CREGISTER]) then
+              begin
+                gen_alloc_regloc(list,destloc,vardef);
+
+                tmpreg:=destloc.register;
+                unget_para(paraloc^);
+                list.Concat(taicpu.op_reg_reg(A_MTC1,paraloc^.register,tmpreg));
+                setsupreg(tmpreg,getsupreg(tmpreg)+1);
+                unget_para(paraloc^.next^);
+                list.Concat(taicpu.op_reg_reg(A_MTC1,paraloc^.Next^.register,tmpreg));
+              end
+}
+            else
+              begin
+                sizeleft := TCGSize2Size[destloc.size];
+                tg.GetTemp(list,sizeleft,sizeleft,tt_normal,tempref);
+                href:=tempref;
+                while assigned(paraloc) do
+                  begin
+                    unget_para(paraloc^);
+                    cg.a_load_cgparaloc_ref(list,paraloc^,href,sizeleft,destloc.reference.alignment);
+                    inc(href.offset,TCGSize2Size[paraloc^.size]);
+                    dec(sizeleft,TCGSize2Size[paraloc^.size]);
+                    paraloc:=paraloc^.next;
+                  end;
+                gen_alloc_regloc(list,destloc,vardef);
+                cg.a_loadfpu_ref_reg(list,destloc.size,destloc.size,tempref,destloc.register);
+                tg.UnGetTemp(list,tempref);
+              end;
+{$else mips}
+{$if defined(sparc) or defined(arm)}
+            { Arm and Sparc passes floats in int registers, when loading to fpu register
+              we need a temp }
+            sizeleft := TCGSize2Size[destloc.size];
+            tg.GetTemp(list,sizeleft,sizeleft,tt_normal,tempref);
+            href:=tempref;
+            while assigned(paraloc) do
+              begin
+                unget_para(paraloc^);
+                cg.a_load_cgparaloc_ref(list,paraloc^,href,sizeleft,destloc.reference.alignment);
+                inc(href.offset,TCGSize2Size[paraloc^.size]);
+                dec(sizeleft,TCGSize2Size[paraloc^.size]);
+                paraloc:=paraloc^.next;
+              end;
+            gen_alloc_regloc(list,destloc,vardef);
+            cg.a_loadfpu_ref_reg(list,destloc.size,destloc.size,tempref,destloc.register);
+            tg.UnGetTemp(list,tempref);
+{$else defined(sparc) or defined(arm)}
+            unget_para(paraloc^);
+            gen_alloc_regloc(list,destloc,vardef);
+            { from register to register -> alignment is irrelevant }
+            cg.a_load_cgparaloc_anyreg(list,destloc.size,paraloc^,destloc.register,0);
+            if assigned(paraloc^.next) then
+              internalerror(200410109);
+{$endif defined(sparc) or defined(arm)}
+{$endif mips}
+          end;
+        LOC_MMREGISTER,
+        LOC_CMMREGISTER :
+          begin
+{$ifndef cpu64bitalu}
+            { ARM vfp floats are passed in integer registers }
+            if (para.size=OS_F64) and
+               (paraloc^.size in [OS_32,OS_S32]) and
+               use_vectorfpu(vardef) then
+              begin
+                { we need 2x32bit reg }
+                if not assigned(paraloc^.next) or
+                   assigned(paraloc^.next^.next) then
+                  internalerror(2009112421);
+                unget_para(paraloc^.next^);
+                case paraloc^.next^.loc of
+                  LOC_REGISTER:
+                    tempreg:=paraloc^.next^.register;
+                  LOC_REFERENCE:
+                    begin
+                      tempreg:=cg.getintregister(list,OS_32);
+                      cg.a_load_cgparaloc_anyreg(list,OS_32,paraloc^.next^,tempreg,4);
+                    end;
+                  else
+                    internalerror(2012051301);
+                end;
+                { don't free before the above, because then the getintregister
+                  could reallocate this register and overwrite it }
+                unget_para(paraloc^);
+                gen_alloc_regloc(list,destloc,vardef);
+                if (target_info.endian=endian_big) then
+                  { paraloc^ -> high
+                    paraloc^.next -> low }
+                  reg64:=joinreg64(tempreg,paraloc^.register)
+                else
+                  reg64:=joinreg64(paraloc^.register,tempreg);
+                cg64.a_loadmm_intreg64_reg(list,OS_F64,reg64,destloc.register);
+              end
+            else
+{$endif not cpu64bitalu}
+              begin
+                if not assigned(paraloc^.next) then
+                  begin
+                    unget_para(paraloc^);
+                    gen_alloc_regloc(list,destloc,vardef);
+                    { from register to register -> alignment is irrelevant }
+                    cg.a_load_cgparaloc_anyreg(list,destloc.size,paraloc^,destloc.register,0);
+                  end
+                else
+                  begin
+                    internalerror(200410108);
+                  end;
+                { data could come in two memory locations, for now
+                  we simply ignore the sanity check (FK)
+                if assigned(paraloc^.next) then
+                  internalerror(200410108);
+                }
+              end;
+          end;
+        else
+          internalerror(2010052903);
+      end;
     end;
 
   function thlcg2ll.getintmmcgsize(reg: tregister; size: tcgsize): tcgsize;
@@ -1540,6 +2159,8 @@ implementation
               result:=OS_F64;
             OS_128:
               result:=OS_M128;
+            else
+              ;
           end;
         end;
     end;

+ 207 - 74
compiler/hlcgobj.pas

@@ -429,6 +429,11 @@ unit hlcgobj;
           }
           procedure g_exception_reason_discard(list : TAsmList; size: tdef; href: treference); virtual;
 
+          {#
+              Call when the current location should never be reached
+          }
+          procedure g_unreachable(list: TAsmList); virtual;
+
           procedure g_maybe_testself(list : TAsmList; selftype: tdef; reg:tregister);
 //          procedure g_maybe_testvmt(list : TAsmList;reg:tregister;objdef:tobjectdef);
           {# This should emit the opcode to copy len bytes from the source
@@ -570,7 +575,7 @@ unit hlcgobj;
           procedure location_force_reg(list:TAsmList;var l:tlocation;src_size,dst_size:tdef;maybeconst:boolean);virtual;
           procedure location_force_fpureg(list:TAsmList;var l: tlocation;size: tdef;maybeconst:boolean);virtual;
           procedure location_force_mem(list:TAsmList;var l:tlocation;size:tdef);virtual;
-          procedure location_force_mmregscalar(list:TAsmList;var l: tlocation;size:tdef;maybeconst:boolean);virtual;
+          procedure location_force_mmregscalar(list:TAsmList;var l: tlocation;var size:tdef;maybeconst:boolean);virtual;
 //          procedure location_force_mmreg(list:TAsmList;var l: tlocation;size:tdef;maybeconst:boolean);virtual;abstract;
 
           { Retrieve the location of the data pointed to in location l, when the location is
@@ -818,9 +823,13 @@ implementation
           objectdef,
           procvardef,
           procdef,
-          arraydef,
           formaldef:
             result:=R_ADDRESSREGISTER;
+          arraydef:
+            if tstoreddef(def).is_intregable then
+              result:=R_INTREGISTER
+            else
+              result:=R_ADDRESSREGISTER;
           floatdef:
             if use_vectorfpu(def) then
               result:=R_MMREGISTER
@@ -986,7 +995,7 @@ implementation
                      { load the value piecewise to get it into the register }
                      orgsizeleft:=sizeleft;
                      reghasvalue:=false;
-{$ifdef cpu64bitalu}
+{$if defined(cpu64bitalu) or defined(cpuhighleveltarget)}
                      if sizeleft>=4 then
                        begin
                          a_load_ref_reg(list,u32inttype,location^.def,tmpref,location^.register);
@@ -996,7 +1005,7 @@ implementation
                          inc(tmpref.offset,4);
                          reghasvalue:=true;
                        end;
-{$endif cpu64bitalu}
+{$endif defind(cpu64bitalu) or defined(cpuhighleveltarget)}
                      if sizeleft>=2 then
                        begin
                          tmpreg:=getintregister(list,location^.def);
@@ -1411,12 +1420,57 @@ implementation
   procedure thlcgobj.a_load_subsetref_reg(list: TAsmList; fromsubsetsize, tosize: tdef; const sref: tsubsetreference; destreg: tregister);
     var
       tmpref: treference;
-      valuereg,extra_value_reg: tregister;
+      valuereg,extra_value_reg, tmpreg: tregister;
       tosreg: tsubsetregister;
       loadsize: torddef;
       loadbitsize: byte;
       extra_load: boolean;
+      tmpsref: tsubsetreference;
     begin
+      if sref.bitlen>AIntBits then
+        begin
+          tmpsref:=sref;
+          tmpsref.bitlen:=AIntBits;
+          valuereg:=hlcg.getintregister(list,tosize);
+          a_load_subsetref_reg(list,sinttype,tosize,tmpsref,valuereg);
+          tmpsref.bitlen:=sref.bitlen-AIntBits;
+          inc(tmpsref.ref.offset,AIntBits div 8);
+          extra_value_reg:=hlcg.getintregister(list,tosize);
+          a_load_subsetref_reg(list,sinttype,tosize,tmpsref,extra_value_reg);
+          { can't use a_load_reg_subsetreg to merge the results, as that one
+            does not support sizes > AIntBits either }
+          tmpreg:=hlcg.getintregister(list,tosize);
+          if target_info.endian=endian_big then
+            begin
+              a_op_const_reg_reg(list,OP_SHL,tosize,sref.bitlen-AIntBits,valuereg,tmpreg);
+              if is_signed(fromsubsetsize) then
+                begin
+                  valuereg:=tmpreg;
+                  tmpreg:=hlcg.getintregister(list,tosize);
+                  a_op_const_reg_reg(list,OP_AND,tosize,(tcgint(1) shl (sref.bitlen-AIntBits))-1,extra_value_reg,tmpreg);
+                  valuereg:=tmpreg;
+                end
+            end
+          else
+            begin
+              a_op_const_reg_reg(list,OP_SHL,tosize,AIntBits,extra_value_reg,tmpreg);
+              if is_signed(fromsubsetsize) then
+                begin
+                  extra_value_reg:=hlcg.getintregister(list,tosize);
+                  a_op_const_reg_reg(list,OP_AND,tosize,(tcgint(1) shl AIntBits)-1,valuereg,extra_value_reg);
+                  valuereg:=extra_value_reg;
+                end
+            end;
+          if is_signed(fromsubsetsize) then
+            begin
+              extra_value_reg:=hlcg.getintregister(list,tosize);
+              a_op_const_reg_reg(list,OP_AND,tosize,(tcgint(1) shl AIntBits)-1,valuereg,extra_value_reg);
+              valuereg:=extra_value_reg;
+            end;
+          a_op_reg_reg_reg(list,OP_OR,tosize,valuereg,tmpreg,destreg);
+          exit;
+        end;
+
       get_subsetref_load_info(sref,loadsize,extra_load);
       loadbitsize:=loadsize.size*8;
 
@@ -1507,7 +1561,37 @@ implementation
     end;
 
   procedure thlcgobj.a_load_reg_subsetref(list: TAsmList; fromsize, tosubsetsize: tdef; fromreg: tregister; const sref: tsubsetreference);
+    var
+      tmpsref: tsubsetreference;
+      fromreg1: tregister;
     begin
+      if sref.bitlen>AIntBits then
+        begin
+          if ((sref.bitlen mod AIntBits)<>0) then
+            internalerror(2019052901);
+          tmpsref:=sref;
+          tmpsref.bitlen:=AIntBits;
+          fromreg1:=hlcg.getintregister(list,uinttype);
+          a_load_reg_reg(list,fromsize,uinttype,fromreg,fromreg1);
+          if target_info.endian=endian_big then
+            begin
+              inc(tmpsref.ref.offset,sref.bitlen-AIntBits);
+            end;
+          a_load_reg_subsetref(list,uinttype,uinttype,fromreg1,tmpsref);
+          if target_info.endian=endian_big then
+            begin
+              tmpsref.ref.offset:=sref.ref.offset;
+            end
+          else
+            begin
+              inc(tmpsref.ref.offset,AIntBits div 8);
+            end;
+          tmpsref.bitlen:=sref.bitlen-AIntBits;
+          fromreg1:=hlcg.getintregister(list,fromsize);
+          hlcg.a_op_const_reg_reg(list,OP_SHR,fromsize,AIntBits,fromreg,fromreg1);
+          a_load_reg_subsetref(list,fromsize,tosubsetsize,fromreg1,tmpsref);
+          exit;
+        end;
       a_load_regconst_subsetref_intern(list,fromsize,tosubsetsize,fromreg,sref,SL_REG);
     end;
 
@@ -1540,9 +1624,37 @@ implementation
 
   procedure thlcgobj.a_load_const_subsetref(list: TAsmlist; tosubsetsize: tdef; a: tcgint; const sref: tsubsetreference);
     var
+      tmpref: treference;
+      tmpsref: tsubsetreference;
       tmpreg: tregister;
       slopt: tsubsetloadopt;
+      newdef: tdef;
+      newbytesize: longint;
+      loval, hival: longint;
     begin
+      if sref.bitlen>AIntBits then
+        begin
+          if ((sref.bitlen mod AIntBits)<>0) then
+            internalerror(2019052901);
+          tmpsref:=sref;
+          tmpsref.bitlen:=AIntBits;
+          if target_info.endian=endian_big then
+            begin
+              inc(tmpsref.ref.offset,sref.bitlen-AIntBits);
+            end;
+          a_load_const_subsetref(list,tosubsetsize,aint(a),tmpsref);
+          if target_info.endian=endian_big then
+            begin
+              tmpsref.ref.offset:=sref.ref.offset;
+            end
+          else
+            begin
+              inc(tmpsref.ref.offset,AIntBits div 8);
+            end;
+          tmpsref.bitlen:=sref.bitlen-AIntBits;
+          a_load_const_subsetref(list,tosubsetsize,a shr AIntBits,tmpsref);
+          exit;
+        end;
       { perform masking of the source value in advance }
       slopt:=SL_REGNOSRCMASK;
       if (sref.bitlen<>AIntBits) then
@@ -3159,6 +3271,12 @@ implementation
     end;
 
 
+  procedure thlcgobj.g_unreachable(list: TAsmList);
+    begin
+      { nothing }
+    end;
+
+
   procedure thlcgobj.g_maybe_testself(list: TAsmList; selftype: tdef; reg: tregister);
     var
       OKLabel : tasmlabel;
@@ -3175,7 +3293,7 @@ implementation
          paramanager.getintparaloc(list,pd,1,cgpara1);
          a_load_const_cgpara(list,s32inttype,aint(210),cgpara1);
          paramanager.freecgpara(list,cgpara1);
-         g_call_system_proc(list,pd,[@cgpara1],nil);
+         g_call_system_proc(list,pd,[@cgpara1],nil).resetiftemp;
          cgpara1.done;
          a_label(list,oklabel);
        end;
@@ -3223,7 +3341,7 @@ implementation
       paramanager.freecgpara(list,cgpara3);
       paramanager.freecgpara(list,cgpara2);
       paramanager.freecgpara(list,cgpara1);
-      g_call_system_proc(list,pd,[@cgpara1,@cgpara2,@cgpara3],nil);
+      g_call_system_proc(list,pd,[@cgpara1,@cgpara2,@cgpara3],nil).resetiftemp;
       cgpara3.done;
       cgpara2.done;
       cgpara1.done;
@@ -3251,7 +3369,7 @@ implementation
         end;
       paramanager.freecgpara(list,cgpara2);
       paramanager.freecgpara(list,cgpara1);
-      g_call_system_proc(list,pd,[@cgpara1,@cgpara2],nil);
+      g_call_system_proc(list,pd,[@cgpara1,@cgpara2],nil).resetiftemp;
       cgpara2.done;
       cgpara1.done;
     end;
@@ -3290,7 +3408,7 @@ implementation
             { these functions get the pointer by value }
             a_load_ref_cgpara(list,t,ref,cgpara1);
           paramanager.freecgpara(list,cgpara1);
-          g_call_system_proc(list,pd,[@cgpara1],nil);
+          g_call_system_proc(list,pd,[@cgpara1],nil).resetiftemp;
         end
        else
         begin
@@ -3312,7 +3430,7 @@ implementation
             end;
           paramanager.freecgpara(list,cgpara1);
           paramanager.freecgpara(list,cgpara2);
-          g_call_system_proc(list,pd,[@cgpara1,@cgpara2],nil);
+          g_call_system_proc(list,pd,[@cgpara1,@cgpara2],nil).resetiftemp;
         end;
        cgpara2.done;
        cgpara1.done;
@@ -3338,7 +3456,7 @@ implementation
            paramanager.getintparaloc(list,pd,1,cgpara1);
            a_loadaddr_ref_cgpara(list,t,ref,cgpara1);
            paramanager.freecgpara(list,cgpara1);
-          g_call_system_proc(list,pd,[@cgpara1],nil);
+          g_call_system_proc(list,pd,[@cgpara1],nil).resetiftemp;
          end
        else
          begin
@@ -3360,7 +3478,7 @@ implementation
               end;
             paramanager.freecgpara(list,cgpara1);
             paramanager.freecgpara(list,cgpara2);
-            g_call_system_proc(list,pd,[@cgpara1,@cgpara2],nil);
+            g_call_system_proc(list,pd,[@cgpara1,@cgpara2],nil).resetiftemp;
          end;
        cgpara1.done;
        cgpara2.done;
@@ -3410,7 +3528,7 @@ implementation
             end;
           paramanager.freecgpara(list,cgpara1);
           paramanager.freecgpara(list,cgpara2);
-          g_call_system_proc(list,pd,[@cgpara1,@cgpara2],nil);
+          g_call_system_proc(list,pd,[@cgpara1,@cgpara2],nil).resetiftemp;
           cgpara1.done;
           cgpara2.done;
           exit;
@@ -3420,7 +3538,7 @@ implementation
       paramanager.getintparaloc(list,pd,1,cgpara1);
       a_loadaddr_ref_cgpara(list,t,ref,cgpara1);
       paramanager.freecgpara(list,cgpara1);
-      g_call_system_proc(list,pd,[@cgpara1],nil);
+      g_call_system_proc(list,pd,[@cgpara1],nil).resetiftemp;
       cgpara1.done;
     end;
 
@@ -3474,7 +3592,7 @@ implementation
       paramanager.freecgpara(list,cgpara1);
       paramanager.freecgpara(list,cgpara2);
       paramanager.freecgpara(list,cgpara3);
-      g_call_system_proc(list,pd,[@cgpara1,@cgpara2,@cgpara3],nil);
+      g_call_system_proc(list,pd,[@cgpara1,@cgpara2,@cgpara3],nil).resetiftemp;
 
       cgpara3.done;
       cgpara2.done;
@@ -3491,7 +3609,9 @@ implementation
 
   procedure thlcgobj.g_rangecheck(list: TAsmList; const l: tlocation; fromdef, todef: tdef);
     var
-{$if defined(cpu64bitalu) or defined(cpu32bitalu)}
+{$if defined(cpuhighleveltarget)}
+      aintmax: tcgint;
+{$elseif defined(cpu64bitalu) or defined(cpu32bitalu)}
       aintmax: aint;
 {$else}
       aintmax: longint;
@@ -3653,7 +3773,7 @@ implementation
                   { if low(to) > maxlongint also range error }
                   (lto > aintmax) then
                  begin
-                   g_call_system_proc(list,'fpc_rangeerror',[],nil);
+                   g_call_system_proc(list,'fpc_rangeerror',[],nil).resetiftemp;
                    exit
                  end;
                { from is signed and to is unsigned -> when looking at to }
@@ -3668,7 +3788,7 @@ implementation
                if (lfrom > aintmax) or
                   (hto < 0) then
                  begin
-                   g_call_system_proc(list,'fpc_rangeerror',[],nil);
+                   g_call_system_proc(list,'fpc_rangeerror',[],nil).resetiftemp;
                    exit
                  end;
                { from is unsigned and to is signed -> when looking at to }
@@ -3691,7 +3811,7 @@ implementation
         a_cmp_const_reg_label(list,maxdef,OC_BE,aintmax,hreg,neglabel)
       else
         a_cmp_const_reg_label(list,maxdef,OC_BE,tcgint(int64(hto-lto)),hreg,neglabel);
-      g_call_system_proc(list,'fpc_rangeerror',[],nil);
+      g_call_system_proc(list,'fpc_rangeerror',[],nil).resetiftemp;
       a_label(list,neglabel);
     end;
 
@@ -3770,7 +3890,7 @@ implementation
       paramanager.freecgpara(list,cgpara3);
       paramanager.freecgpara(list,cgpara2);
       paramanager.freecgpara(list,cgpara1);
-      g_call_system_proc(list,pd,[@cgpara1,@cgpara2,@cgpara3],nil);
+      g_call_system_proc(list,pd,[@cgpara1,@cgpara2,@cgpara3],nil).resetiftemp;
       cgpara3.done;
       cgpara2.done;
       cgpara1.done;
@@ -3789,7 +3909,7 @@ implementation
       { load source }
       a_load_loc_cgpara(list,cpointerdef.getreusable(arrdef),l,cgpara1);
       paramanager.freecgpara(list,cgpara1);
-      g_call_system_proc(list,pd,[@cgpara1],nil);
+      g_call_system_proc(list,pd,[@cgpara1],nil).resetiftemp;
       cgpara1.done;
     end;
 
@@ -4089,7 +4209,7 @@ implementation
       end;
     end;
 
-  procedure thlcgobj.location_force_mmregscalar(list: TAsmList; var l: tlocation; size: tdef; maybeconst: boolean);
+  procedure thlcgobj.location_force_mmregscalar(list: TAsmList; var l: tlocation; var size: tdef; maybeconst: boolean);
     var
       reg : tregister;
       href : treference;
@@ -4134,6 +4254,7 @@ implementation
           l.size:=def_cgsize(newsize);
           location_freetemp(list,l);
           location_reset(l,LOC_MMREGISTER,l.size);
+          size:=newsize;
           l.register:=reg;
         end;
     end;
@@ -4455,47 +4576,63 @@ implementation
         inn,
         asn,isn:
           result := fen_norecurse_false;
+        else
+          ;
       end;
     end;
 
 
   procedure thlcgobj.gen_proc_symbol(list: TAsmList);
     var
-      item,
-      previtem : TCmdStrListItem;
-    begin
-      previtem:=nil;
-      item := TCmdStrListItem(current_procinfo.procdef.aliasnames.first);
+      firstitem,
+      item: TCmdStrListItem;
+      global: boolean;
+    begin
+      item:=TCmdStrListItem(current_procinfo.procdef.aliasnames.first);
+      firstitem:=item;
+      global:=
+        (cs_profile in current_settings.moduleswitches) or
+        { smart linking using a library requires to promote
+          all non-nested procedures to AB_GLOBAL
+          otherwise you get undefined symbol error at linking
+          for msdos  target with -CX option for instance }
+        (create_smartlink_library and not is_nested_pd(current_procinfo.procdef)) or
+        (po_global in current_procinfo.procdef.procoptions);
       while assigned(item) do
         begin
 {$ifdef arm}
           if GenerateThumbCode or GenerateThumb2Code then
             list.concat(tai_directive.create(asd_thumb_func,''));
 {$endif arm}
-          { "double link" all procedure entry symbols via .reference }
-          { directives on darwin, because otherwise the linker       }
-          { sometimes strips the procedure if only on of the symbols }
-          { is referenced                                            }
-          if assigned(previtem) and
+          { alias procedure entry symbols via ".set" on Darwin, otherwise
+            they can be interpreted as all different starting symbols of
+            subsections and be reordered }
+          if (item<>firstitem) and
              (target_info.system in systems_darwin) then
-            list.concat(tai_directive.create(asd_reference,item.str));
-          if (cs_profile in current_settings.moduleswitches) or
-             { smart linking using a library requires to promote
-               all non-nested procedures to AB_GLOBAL
-               otherwise you get undefined symbol error at linking
-               for msdos  target with -CX option for instance }
-             (create_smartlink_library and not is_nested_pd(current_procinfo.procdef)) or
-             (po_global in current_procinfo.procdef.procoptions) then
-            list.concat(Tai_symbol.createname_global(item.str,AT_FUNCTION,0,current_procinfo.procdef))
+            begin
+              { the .set already defines the symbol, so can't emit a tai_symbol as that will redefine it }
+              if global then
+                begin
+                  list.concat(tai_symbolpair.create(spk_set_global,item.str,firstitem.str));
+                  { needed for generating the tai_symbol_end }
+                  current_asmdata.DefineAsmSymbol(item.str,AB_GLOBAL,AT_FUNCTION,current_procinfo.procdef);
+                end
+              else
+                begin
+                  list.concat(tai_symbolpair.create(spk_set,item.str,firstitem.str));
+                  current_asmdata.DefineAsmSymbol(item.str,AB_LOCAL,AT_FUNCTION,current_procinfo.procdef);
+                end;
+            end
           else
-            list.concat(Tai_symbol.createname(item.str,AT_FUNCTION,0,current_procinfo.procdef));
-          if assigned(previtem) and
-             (target_info.system in systems_darwin) then
-            list.concat(tai_directive.create(asd_reference,previtem.str));
-          if not(af_stabs_use_function_absolute_addresses in target_asm.flags) then
-            list.concat(Tai_function_name.create(item.str));
-          previtem:=item;
-          item := TCmdStrListItem(item.next);
+            begin
+              if global then
+                list.concat(Tai_symbol.createname_global(item.str,AT_FUNCTION,0,current_procinfo.procdef))
+              else
+                list.concat(Tai_symbol.createname(item.str,AT_FUNCTION,0,current_procinfo.procdef));
+              if not(af_stabs_use_function_absolute_addresses in target_asm.flags) then
+                list.concat(Tai_function_name.create(item.str));
+            end;
+          item:=TCmdStrListItem(item.next);
         end;
       current_procinfo.procdef.procstarttai:=tai(list.last);
     end;
@@ -4540,6 +4677,8 @@ implementation
         potype_unitinit,
         potype_proginit:
           TSymtable(current_module.localsymtable).SymList.ForEachCall(@initialize_regvars,list);
+        else
+          ;
       end;
 
       { initialises temp. ansi/wide string data }
@@ -4567,13 +4706,14 @@ implementation
       cleanup_regvars(list);
 {$endif OLDREGVARS}
 
-      { finalize temporary data }
-      finalizetempvariables(list);
-
       { finalize paras data }
       if assigned(current_procinfo.procdef.parast) and
          not(po_assembler in current_procinfo.procdef.procoptions) then
         current_procinfo.procdef.parast.SymList.ForEachCall(@final_paras,list);
+
+      { finalize temporary data }
+      finalizetempvariables(list);
+
       current_procinfo:=old_current_procinfo;
     end;
 
@@ -4589,6 +4729,8 @@ implementation
                      std_regname(vs.initialloc.reference.base)+tostr_with_plus(vs.initialloc.reference.offset)+
                      ', size='+tcgsize2str(vs.initialloc.size))));
               end;
+            else
+              ;
           end;
         end;
       vs.localloc:=vs.initialloc;
@@ -4626,10 +4768,10 @@ implementation
 {$ifdef AVR}
            cg.a_call_name(list,'FPC_INIT_FUNC_TABLE',false)
 {$else AVR}
-           g_call_system_proc(list,'fpc_initializeunits',[],nil)
+           g_call_system_proc(list,'fpc_initializeunits',[],nil).resetiftemp
 {$endif AVR}
          else
-           g_call_system_proc(list,'fpc_libinitializeunits',[],nil);
+           g_call_system_proc(list,'fpc_libinitializeunits',[],nil).resetiftemp;
        end;
 
       list.concat(Tai_force_line.Create);
@@ -4647,7 +4789,7 @@ implementation
       { call __EXIT for main program }
       if (not current_module.islibrary) and
          (current_procinfo.procdef.proctypeoption=potype_proginit) then
-        g_call_system_proc(list,'fpc_do_exit',[],nil);
+        g_call_system_proc(list,'fpc_do_exit',[],nil).resetiftemp;
     end;
 
   procedure thlcgobj.inittempvariables(list: TAsmList);
@@ -4871,6 +5013,8 @@ implementation
                      end;
                  end;
              end;
+           else
+             ;
          end;
        end;
     end;
@@ -4983,8 +5127,6 @@ implementation
                 end
               else
                 begin
-                  { pass proper alignment info }
-                  localcopyloc.reference.alignment:=tparavarsym(p).vardef.alignment;
                   g_concatcopy(list,tparavarsym(p).vardef,href,localcopyloc.reference);
                 end;
               { update localloc of varsym }
@@ -5171,7 +5313,7 @@ implementation
 
   procedure thlcgobj.gen_load_return_value(list: TAsmList);
     var
-      ressym : tabstractnormalvarsym;
+      ressym : tsym;
       retdef : tdef;
     begin
       { Is the loading needed? }
@@ -5185,28 +5327,19 @@ implementation
         exit;
 
       { constructors return self }
-      if (current_procinfo.procdef.proctypeoption=potype_constructor) then
-        begin
-          ressym:=tabstractnormalvarsym(current_procinfo.procdef.parast.Find('self'));
-          retdef:=ressym.vardef;
-          { and TP-style constructors return a pointer to self }
-          if is_object(ressym.vardef) then
-            retdef:=cpointerdef.getreusable(retdef);
-        end
-      else
-        begin
-          ressym:=tabstractnormalvarsym(current_procinfo.procdef.funcretsym);
-          retdef:=ressym.vardef;
-        end;
+      if not current_procinfo.procdef.getfuncretsyminfo(ressym,retdef) then
+        internalerror(2018122501);
       if (ressym.refs>0) or
          is_managed_type(retdef) then
         begin
           { was: don't do anything if funcretloc.loc in [LOC_INVALID,LOC_REFERENCE] }
           if not paramanager.ret_in_param(current_procinfo.procdef.returndef,current_procinfo.procdef) then
-            gen_load_loc_function_result(list,retdef,ressym.localloc);
+            gen_load_loc_function_result(list,retdef,tabstractnormalvarsym(ressym).localloc);
         end
       else
-        gen_load_uninitialized_function_result(list,current_procinfo.procdef,retdef,current_procinfo.procdef.funcretloc[calleeside])
+        gen_load_uninitialized_function_result(list,current_procinfo.procdef,retdef,current_procinfo.procdef.funcretloc[calleeside]);
+      if tabstractnormalvarsym(ressym).localloc.loc=LOC_REFERENCE then
+        tg.UnGetLocal(list,tabstractnormalvarsym(ressym).localloc.reference);
     end;
 
   procedure thlcgobj.gen_stack_check_size_para(list: TAsmList);
@@ -5234,7 +5367,7 @@ implementation
       paramanager.getintparaloc(list,pd,1,paraloc1);
       paramanager.freecgpara(list,paraloc1);
       { Call the helper }
-      hlcg.g_call_system_proc(list,pd,[@paraloc1],nil);
+      g_call_system_proc(list,pd,[@paraloc1],nil).resetiftemp;
       paraloc1.done;
     end;
 

+ 195 - 79
compiler/htypechk.pas

@@ -37,6 +37,8 @@ interface
         nod : tnodetype;
         inr : tinlinenumber;
         op_overloading_supported : boolean;
+        minargs : longint;
+        maxargs : longint;
       end;
 
       Ttok2opRec=record
@@ -111,33 +113,33 @@ interface
     const
       tok2nodes=27;
       tok2node:array[1..tok2nodes] of ttok2noderec=(
-        (tok:_PLUS       ;nod:addn;inr:in_none;op_overloading_supported:true),      { binary overloading supported }
-        (tok:_MINUS      ;nod:subn;inr:in_none;op_overloading_supported:true),      { binary and unary overloading supported }
-        (tok:_STAR       ;nod:muln;inr:in_none;op_overloading_supported:true),      { binary overloading supported }
-        (tok:_SLASH      ;nod:slashn;inr:in_none;op_overloading_supported:true),    { binary overloading supported }
-        (tok:_EQ         ;nod:equaln;inr:in_none;op_overloading_supported:true),    { binary overloading supported }
-        (tok:_GT         ;nod:gtn;inr:in_none;op_overloading_supported:true),       { binary overloading supported }
-        (tok:_LT         ;nod:ltn;inr:in_none;op_overloading_supported:true),       { binary overloading supported }
-        (tok:_GTE        ;nod:gten;inr:in_none;op_overloading_supported:true),      { binary overloading supported }
-        (tok:_LTE        ;nod:lten;inr:in_none;op_overloading_supported:true),      { binary overloading supported }
-        (tok:_SYMDIF     ;nod:symdifn;inr:in_none;op_overloading_supported:true),   { binary overloading supported }
-        (tok:_STARSTAR   ;nod:starstarn;inr:in_none;op_overloading_supported:true), { binary overloading supported }
-        (tok:_OP_AS      ;nod:asn;inr:in_none;op_overloading_supported:false),      { binary overloading NOT supported }
-        (tok:_OP_IN      ;nod:inn;inr:in_none;op_overloading_supported:true),       { binary overloading supported }
-        (tok:_OP_IS      ;nod:isn;inr:in_none;op_overloading_supported:false),      { binary overloading NOT supported }
-        (tok:_OP_OR      ;nod:orn;inr:in_none;op_overloading_supported:true),       { binary overloading supported }
-        (tok:_OP_AND     ;nod:andn;inr:in_none;op_overloading_supported:true),      { binary overloading supported }
-        (tok:_OP_DIV     ;nod:divn;inr:in_none;op_overloading_supported:true),      { binary overloading supported }
-        (tok:_OP_NOT     ;nod:notn;inr:in_none;op_overloading_supported:true),      { unary overloading supported }
-        (tok:_OP_MOD     ;nod:modn;inr:in_none;op_overloading_supported:true),      { binary overloading supported }
-        (tok:_OP_SHL     ;nod:shln;inr:in_none;op_overloading_supported:true),      { binary overloading supported }
-        (tok:_OP_SHR     ;nod:shrn;inr:in_none;op_overloading_supported:true),      { binary overloading supported }
-        (tok:_OP_XOR     ;nod:xorn;inr:in_none;op_overloading_supported:true),      { binary overloading supported }
-        (tok:_ASSIGNMENT ;nod:assignn;inr:in_none;op_overloading_supported:true),   { unary overloading supported }
-        (tok:_OP_EXPLICIT;nod:assignn;inr:in_none;op_overloading_supported:true),   { unary overloading supported }
-        (tok:_NE         ;nod:unequaln;inr:in_none;op_overloading_supported:true),  { binary overloading supported }
-        (tok:_OP_INC     ;nod:inlinen;inr:in_inc_x;op_overloading_supported:true),  { unary overloading supported }
-        (tok:_OP_DEC     ;nod:inlinen;inr:in_dec_x;op_overloading_supported:true)   { unary overloading supported }
+        (tok:_PLUS       ;nod:addn;inr:in_none;op_overloading_supported:true;minargs:1;maxargs:2),      { binary overloading supported }
+        (tok:_MINUS      ;nod:subn;inr:in_none;op_overloading_supported:true;minargs:1;maxargs:2),      { binary and unary overloading supported }
+        (tok:_STAR       ;nod:muln;inr:in_none;op_overloading_supported:true;minargs:2;maxargs:2),      { binary overloading supported }
+        (tok:_SLASH      ;nod:slashn;inr:in_none;op_overloading_supported:true;minargs:2;maxargs:2),    { binary overloading supported }
+        (tok:_EQ         ;nod:equaln;inr:in_none;op_overloading_supported:true;minargs:2;maxargs:2),    { binary overloading supported }
+        (tok:_GT         ;nod:gtn;inr:in_none;op_overloading_supported:true;minargs:2;maxargs:2),       { binary overloading supported }
+        (tok:_LT         ;nod:ltn;inr:in_none;op_overloading_supported:true;minargs:2;maxargs:2),       { binary overloading supported }
+        (tok:_GTE        ;nod:gten;inr:in_none;op_overloading_supported:true;minargs:2;maxargs:2),      { binary overloading supported }
+        (tok:_LTE        ;nod:lten;inr:in_none;op_overloading_supported:true;minargs:2;maxargs:2),      { binary overloading supported }
+        (tok:_SYMDIF     ;nod:symdifn;inr:in_none;op_overloading_supported:true;minargs:2;maxargs:2),   { binary overloading supported }
+        (tok:_STARSTAR   ;nod:starstarn;inr:in_none;op_overloading_supported:true;minargs:2;maxargs:2), { binary overloading supported }
+        (tok:_OP_AS      ;nod:asn;inr:in_none;op_overloading_supported:false;minargs:0;maxargs:0),      { binary overloading NOT supported }
+        (tok:_OP_IN      ;nod:inn;inr:in_none;op_overloading_supported:true;minargs:2;maxargs:2),       { binary overloading supported }
+        (tok:_OP_IS      ;nod:isn;inr:in_none;op_overloading_supported:false;minargs:0;maxargs:0),      { binary overloading NOT supported }
+        (tok:_OP_OR      ;nod:orn;inr:in_none;op_overloading_supported:true;minargs:2;maxargs:2),       { binary overloading supported }
+        (tok:_OP_AND     ;nod:andn;inr:in_none;op_overloading_supported:true;minargs:2;maxargs:2),      { binary overloading supported }
+        (tok:_OP_DIV     ;nod:divn;inr:in_none;op_overloading_supported:true;minargs:2;maxargs:2),      { binary overloading supported }
+        (tok:_OP_NOT     ;nod:notn;inr:in_none;op_overloading_supported:true;minargs:1;maxargs:1),      { unary overloading supported }
+        (tok:_OP_MOD     ;nod:modn;inr:in_none;op_overloading_supported:true;minargs:2;maxargs:2),      { binary overloading supported }
+        (tok:_OP_SHL     ;nod:shln;inr:in_none;op_overloading_supported:true;minargs:2;maxargs:2),      { binary overloading supported }
+        (tok:_OP_SHR     ;nod:shrn;inr:in_none;op_overloading_supported:true;minargs:2;maxargs:2),      { binary overloading supported }
+        (tok:_OP_XOR     ;nod:xorn;inr:in_none;op_overloading_supported:true;minargs:2;maxargs:2),      { binary overloading supported }
+        (tok:_ASSIGNMENT ;nod:assignn;inr:in_none;op_overloading_supported:true;minargs:1;maxargs:1),   { unary overloading supported }
+        (tok:_OP_EXPLICIT;nod:assignn;inr:in_none;op_overloading_supported:true;minargs:1;maxargs:1),   { unary overloading supported }
+        (tok:_NE         ;nod:unequaln;inr:in_none;op_overloading_supported:true;minargs:2;maxargs:2),  { binary overloading supported }
+        (tok:_OP_INC     ;nod:inlinen;inr:in_inc_x;op_overloading_supported:true;minargs:1;maxargs:1),  { unary overloading supported }
+        (tok:_OP_DEC     ;nod:inlinen;inr:in_dec_x;op_overloading_supported:true;minargs:1;maxargs:1)   { unary overloading supported }
       );
 
       tok2ops=4;
@@ -152,10 +154,22 @@ interface
     function token2managementoperator(optoken:ttoken):tmanagementoperator;
 
     { check operator args and result type }
+
+    type
+      toverload_check_flag = (
+        ocf_check_non_overloadable, { also check operators that are (currently) considered as
+                                      not overloadable (e.g. the "+" operator for dynamic arrays
+                                      if modeswitch arrayoperators is active) }
+        ocf_check_only              { only check whether the operator is overloaded, but don't
+                                      modify the passed in node (return true if the operator is
+                                      overloaded, false otherwise) }
+      );
+      toverload_check_flags = set of toverload_check_flag;
+
     function isbinaryoperatoroverloadable(treetyp:tnodetype;ld:tdef;lt:tnodetype;rd:tdef;rt:tnodetype) : boolean;
     function isoperatoracceptable(pf : tprocdef; optoken : ttoken) : boolean;
-    function isunaryoverloaded(var t : tnode) : boolean;
-    function isbinaryoverloaded(var t : tnode) : boolean;
+    function isunaryoverloaded(var t : tnode;ocf:toverload_check_flags) : boolean;
+    function isbinaryoverloaded(var t : tnode;ocf:toverload_check_flags) : boolean;
 
     { Register Allocation }
     procedure make_not_regable(p : tnode; how: tregableinfoflags);
@@ -171,7 +185,7 @@ interface
 
     { sets varsym varstate field correctly }
     type
-      tvarstateflag = (vsf_must_be_valid,vsf_use_hints);
+      tvarstateflag = (vsf_must_be_valid,vsf_use_hints,vsf_use_hint_for_string_result);
       tvarstateflags = set of tvarstateflag;
     procedure set_varstate(p:tnode;newstate:tvarstate;varstateflags:tvarstateflags);
 
@@ -180,6 +194,7 @@ interface
     procedure set_unique(p : tnode);
 
     function  valid_for_formal_var(p : tnode; report_errors: boolean) : boolean;
+    function  valid_for_formal_constref(p : tnode; report_errors: boolean) : boolean;
     function  valid_for_formal_const(p : tnode; report_errors: boolean) : boolean;
     function  valid_for_var(p:tnode; report_errors: boolean):boolean;
     function  valid_for_assignment(p:tnode; report_errors: boolean):boolean;
@@ -503,7 +518,9 @@ implementation
                     end;
 
                  { <dyn. array> + <dyn. array> is handled by the compiler }
-                 if (treetyp=addn) and (is_dynamic_array(ld) or is_dynamic_array(rd)) then
+                 if (m_array_operators in current_settings.modeswitches) and
+                     (treetyp=addn) and
+                     (is_dynamic_array(ld) or is_dynamic_array(rd)) then
                     begin
                       allowed:=false;
                       exit;
@@ -588,6 +605,8 @@ implementation
 
               result:=true;
             end;
+          else
+            ;
         end;
       end;
 
@@ -610,7 +629,11 @@ implementation
         while count > 0 do
           begin
             parasym:=tparavarsym(pf.parast.SymList[count-1]);
-            if is_boolean(parasym.vardef) then
+            if parasym.typ<>paravarsym then
+              begin
+                dec(count);
+              end
+            else if is_boolean(parasym.vardef) then
               begin
                 if parasym.name='RANGECHECK' then
                   begin
@@ -682,6 +705,8 @@ implementation
                         begin
                           result:=
                             tok2node[i].op_overloading_supported and
+                            (tok2node[i].minargs<=1) and
+                            (tok2node[i].maxargs>=1) and
                             isunaryoperatoroverloadable(tok2node[i].nod,tok2node[i].inr,ld);
                           break;
                         end;
@@ -698,6 +723,8 @@ implementation
                       rd:=tparavarsym(pf.parast.SymList[1]).vardef;
                       result:=
                         tok2node[i].op_overloading_supported and
+                        (tok2node[i].minargs<=2) and
+                        (tok2node[i].maxargs>=2) and
                         isbinaryoperatoroverloadable(tok2node[i].nod,ld,nothingn,rd,nothingn);
                       break;
                     end;
@@ -706,7 +733,7 @@ implementation
       end;
 
 
-    function isunaryoverloaded(var t : tnode) : boolean;
+    function isunaryoverloaded(var t : tnode;ocf:toverload_check_flags) : boolean;
       var
         ld      : tdef;
         optoken : ttoken;
@@ -728,11 +755,11 @@ implementation
         else
           inlinenumber:=in_none;
 
-        if not isunaryoperatoroverloadable(t.nodetype,inlinenumber,ld) then
+        if not (ocf_check_non_overloadable in ocf) and not isunaryoperatoroverloadable(t.nodetype,inlinenumber,ld) then
           exit;
 
         { operator overload is possible }
-        result:=true;
+        result:=not (ocf_check_only in ocf);
 
         optoken:=NOTOKEN;
         case t.nodetype of
@@ -748,12 +775,19 @@ implementation
                   optoken:=_OP_INC;
                 in_dec_x:
                   optoken:=_OP_DEC;
+                else
+                  ;
              end;
+           else
+             ;
         end;
         if (optoken=NOTOKEN) then
           begin
-            CGMessage(parser_e_operator_not_overloaded);
-            t:=cnothingnode.create;
+            if not (ocf_check_only in ocf) then
+              begin
+                CGMessage(parser_e_operator_not_overloaded);
+                t:=cnothingnode.create;
+              end;
             exit;
           end;
 
@@ -771,10 +805,13 @@ implementation
         { stop when there are no operators found }
         if candidates.count=0 then
           begin
-            CGMessage2(parser_e_operator_not_overloaded_2,ld.typename,arraytokeninfo[optoken].str);
             candidates.free;
             ppn.free;
-            t:=cnothingnode.create;
+            if not (ocf_check_only in ocf) then
+              begin
+                CGMessage2(parser_e_operator_not_overloaded_2,ld.typename,arraytokeninfo[optoken].str);
+                t:=cnothingnode.create;
+              end;
             exit;
           end;
 
@@ -789,15 +826,18 @@ implementation
         { exit when no overloads are found }
         if cand_cnt=0 then
           begin
-            CGMessage2(parser_e_operator_not_overloaded_2,ld.typename,arraytokeninfo[optoken].str);
             candidates.free;
             ppn.free;
-            t:=cnothingnode.create;
+            if not (ocf_check_only in ocf) then
+              begin
+                CGMessage2(parser_e_operator_not_overloaded_2,ld.typename,arraytokeninfo[optoken].str);
+                t:=cnothingnode.create;
+              end;
             exit;
           end;
 
         { Multiple candidates left? }
-        if cand_cnt>1 then
+        if (cand_cnt>1) and not (ocf_check_only in ocf) then
           begin
             CGMessage(type_e_cant_choose_overload_function);
 {$ifdef EXTDEBUG}
@@ -810,6 +850,13 @@ implementation
           end;
         candidates.free;
 
+        if ocf_check_only in ocf then
+          begin
+            ppn.free;
+            result:=true;
+            exit;
+          end;
+
         addsymref(operpd.procsym);
 
         { the nil as symtable signs firstcalln that this is
@@ -822,7 +869,7 @@ implementation
       end;
 
 
-    function isbinaryoverloaded(var t : tnode) : boolean;
+    function isbinaryoverloaded(var t : tnode;ocf:toverload_check_flags) : boolean;
       var
         rd,ld   : tdef;
         optoken : ttoken;
@@ -856,6 +903,8 @@ implementation
                     optoken:=_GT;
                   _GTE:
                     optoken:=_LT;
+                  else
+                    ;
                 end;
                 candidates:=tcallcandidates.create_operator(optoken,ppn);
               end;
@@ -915,11 +964,14 @@ implementation
         { load easier access variables }
         ld:=tbinarynode(t).left.resultdef;
         rd:=tbinarynode(t).right.resultdef;
-        if not isbinaryoperatoroverloadable(t.nodetype,ld,tbinarynode(t).left.nodetype,rd,tbinarynode(t).right.nodetype) then
+        if not (ocf_check_non_overloadable in ocf) and
+            not isbinaryoperatoroverloadable(t.nodetype,ld,tbinarynode(t).left.nodetype,rd,tbinarynode(t).right.nodetype) then
           exit;
 
         { operator overload is possible }
-        result:=true;
+        { if we only check for the existance of the overload, then we assume that
+          it is not overloaded }
+        result:=not (ocf_check_only in ocf);
 
         case t.nodetype of
            equaln:
@@ -964,16 +1016,19 @@ implementation
              optoken:=_OP_IN;
            else
              begin
-               CGMessage(parser_e_operator_not_overloaded);
-               t:=cnothingnode.create;
+               if not (ocf_check_only in ocf) then
+                 begin
+                   CGMessage(parser_e_operator_not_overloaded);
+                   t:=cnothingnode.create;
+                 end;
                exit;
              end;
         end;
 
-        cand_cnt:=search_operator(optoken,optoken<>_NE);
+        cand_cnt:=search_operator(optoken,(optoken<>_NE) and not (ocf_check_only in ocf));
 
         { no operator found for "<>" then search for "=" operator }
-        if (cand_cnt=0) and (optoken=_NE) then
+        if (cand_cnt=0) and (optoken=_NE) and not (ocf_check_only in ocf) then
           begin
             ppn.free;
             ppn:=nil;
@@ -985,7 +1040,15 @@ implementation
         if (cand_cnt=0) then
           begin
             ppn.free;
-            t:=cnothingnode.create;
+            if not (ocf_check_only in ocf) then
+              t:=cnothingnode.create;
+            exit;
+          end;
+
+        if ocf_check_only in ocf then
+          begin
+            ppn.free;
+            result:=true;
             exit;
           end;
 
@@ -1050,6 +1113,8 @@ implementation
                   begin
                     if (ra_addr_taken in how) then
                       tabstractvarsym(tloadnode(p).symtableentry).addr_taken:=true;
+                    if (ra_different_scope in how) then
+                      tabstractvarsym(tloadnode(p).symtableentry).different_scope:=true;
                     if (tabstractvarsym(tloadnode(p).symtableentry).varregable <> vr_none) and
                        ((not records_only) or
                         (tabstractvarsym(tloadnode(p).symtableentry).vardef.typ = recorddef)) then
@@ -1202,6 +1267,8 @@ implementation
                          pointer itself is read and never written }
                        newstate := vs_read;
                      end;
+                   else
+                     ;
                end;
                  p:=tunarynode(p).left;
                end;
@@ -1253,7 +1320,20 @@ implementation
                                begin
                                  if (vo_is_funcret in hsym.varoptions) then
                                    begin
-                                     if (vsf_use_hints in varstateflags) then
+                                     { An uninitialized function Result of a managed type needs special handling.
+                                       When passing it as a var parameter a warning need to be emitted, since a user
+                                       may expect Result to be empty (nil) by default as it happens with local vars
+                                       of a managed type. But this is not true for Result and may lead to serious issues.
+
+                                       The only exception is SetLength(Result, ?) for a string Result. A user always
+                                       expects undefined contents of the string after calling SetLength(). In such
+                                       case a hint need to be emitted.
+                                     }
+                                     if is_managed_type(hsym.vardef) then
+                                       if not ( is_string(hsym.vardef) and (vsf_use_hint_for_string_result in varstateflags) ) then
+                                         exclude(varstateflags,vsf_use_hints);
+
+                                     if vsf_use_hints in varstateflags then
                                        begin
                                          if is_managed_type(hsym.vardef) then
                                            CGMessagePos(p.fileinfo,sym_h_managed_function_result_uninitialized)
@@ -1291,6 +1371,8 @@ implementation
                    vs_readwritten:
                      if not(nf_write in tloadnode(p).flags) then
                        include(tloadnode(p).flags,nf_modify);
+                   else
+                     ;
                  end;
                  break;
                end;
@@ -1414,6 +1496,8 @@ implementation
                    gotrecord:=true;
                  stringdef :
                    gotstring:=true;
+                 else
+                   ;
                end;
                if (valid_property in opts) then
                  begin
@@ -1556,6 +1640,8 @@ implementation
                            exit
                          end;
                      end;
+                   else
+                     ;
                  end;
                  hp:=ttypeconvnode(hp).left;
                end;
@@ -1884,6 +1970,13 @@ implementation
       end;
 
 
+    function  valid_for_formal_constref(p : tnode; report_errors: boolean) : boolean;
+      begin
+        valid_for_formal_constref:=(p.resultdef.typ=formaldef) or
+          valid_for_assign(p,[valid_void,valid_range],report_errors);
+      end;
+
+
     function  valid_for_formal_const(p : tnode; report_errors: boolean) : boolean;
       begin
         valid_for_formal_const:=(p.resultdef.typ=formaldef) or
@@ -1918,7 +2011,7 @@ implementation
               { all types can be passed to a formaldef,
                 but it is not the prefered way }
               if not is_constnode(fromnode) then
-                eq:=te_convert_l2
+                eq:=te_convert_l6
               else
                 eq:=te_incompatible;
             end;
@@ -1979,6 +2072,8 @@ implementation
                  (tfiledef(def_to).filetyp = ft_untyped) then
                 eq:=te_convert_l1;
             end;
+          else
+            ;
         end;
       end;
 
@@ -1991,11 +2086,6 @@ implementation
       begin
         { Note: eq must be already valid, it will only be updated! }
         case def_to.typ of
-          formaldef :
-            begin
-              { all types can be passed to a formaldef }
-              eq:=te_equal;
-            end;
           stringdef :
             begin
               { to support ansi/long/wide strings in a proper way }
@@ -2061,6 +2151,8 @@ implementation
                     end
                 end;
             end;
+          else
+            ;
         end;
       end;
 
@@ -2189,6 +2281,33 @@ implementation
             end;
         end;
 
+      function processhelper(hashedid:THashedIDString;helperdef:tobjectdef):boolean;
+        var
+          srsym : tsym;
+          hasoverload,foundanything : boolean;
+        begin
+          result:=false;
+          srsym:=nil;
+          hasoverload:=false;
+          while assigned(helperdef) do
+            begin
+              srsym:=tsym(helperdef.symtable.FindWithHash(hashedid));
+              if assigned(srsym) and
+                  { Delphi allows hiding a property by a procedure with the same name }
+                  (srsym.typ=procsym) then
+                begin
+                  hasoverload:=processprocsym(tprocsym(srsym),foundanything);
+                  { when there is no explicit overload we stop searching }
+                  if foundanything and
+                     not hasoverload then
+                    break;
+                end;
+              helperdef:=helperdef.childof;
+            end;
+          if not hasoverload and assigned(srsym) then
+            exit(true);
+        end;
+
       var
         srsym      : tsym;
         hashedid   : THashedIDString;
@@ -2196,6 +2315,8 @@ implementation
         foundanything : boolean;
         extendeddef : tabstractrecorddef;
         helperdef  : tobjectdef;
+        helperlist : TFPObjectList;
+        i : integer;
       begin
         if FOperator=NOTOKEN then
           hashedid.id:=FProcsym.name
@@ -2215,27 +2336,24 @@ implementation
                )
                and searchhelpers then
              begin
-               if search_last_objectpascal_helper(structdef,nil,helperdef) then
+               if m_multi_helpers in current_settings.modeswitches then
                  begin
-                   srsym:=nil;
-                   while assigned(helperdef) do
+                   helperlist:=get_objectpascal_helpers(structdef);
+                   if assigned(helperlist) and (helperlist.count>0) then
                      begin
-                       srsym:=tsym(helperdef.symtable.FindWithHash(hashedid));
-                       if assigned(srsym) and
-                           { Delphi allows hiding a property by a procedure with the same name }
-                           (srsym.typ=procsym) then
-                         begin
-                           hasoverload:=processprocsym(tprocsym(srsym),foundanything);
-                           { when there is no explicit overload we stop searching }
-                           if foundanything and
-                              not hasoverload then
-                             break;
-                         end;
-                       helperdef:=helperdef.childof;
+                       i:=helperlist.count-1;
+                       repeat
+                         helperdef:=tobjectdef(helperlist[i]);
+                         if (helperdef.owner.symtabletype in [staticsymtable,globalsymtable]) or
+                            is_visible_for_object(helperdef.typesym,helperdef) then
+                              if processhelper(hashedid,helperdef) then
+                                exit;
+                         dec(i);
+                       until (i<0);
                      end;
-                   if not hasoverload and assigned(srsym) then
-                     exit;
-                 end;
+                 end
+               else if search_last_objectpascal_helper(structdef,nil,helperdef) and processhelper(hashedid,helperdef) then
+                  exit;
              end;
            { now search in the type itself }
            srsym:=tsym(structdef.symtable.FindWithHash(hashedid));
@@ -3024,8 +3142,6 @@ implementation
                   inc(hp^.coper_count);
                 te_incompatible :
                   hp^.invalid:=true;
-                else
-                  internalerror(200212072);
               end;
 
               { stop checking when an incompatible parameter is found }
@@ -3076,9 +3192,9 @@ implementation
         variantorddef_cl: array[tordtype] of tvariantequaltype =
           (tve_incompatible,tve_byte,tve_word,tve_cardinal,tve_chari64,tve_incompatible,
            tve_shortint,tve_smallint,tve_longint,tve_chari64,tve_incompatible,
+           tve_boolformal,tve_boolformal,tve_boolformal,tve_boolformal,tve_boolformal,
            tve_boolformal,tve_boolformal,tve_boolformal,tve_boolformal,
-           tve_boolformal,tve_boolformal,tve_boolformal,tve_boolformal,
-           tve_chari64,tve_chari64,tve_dblcurrency);
+           tve_chari64,tve_chari64,tve_dblcurrency,tve_incompatible);
 { TODO: fixme for 128 bit floats }
         variantfloatdef_cl: array[tfloattype] of tvariantequaltype =
           (tve_single,tve_dblcurrency,tve_extended,tve_extended,

+ 56 - 435
compiler/i386/aoptcpu.pas

@@ -40,7 +40,6 @@ unit aoptcpu;
         procedure PeepHoleOptPass1; override;
         procedure PeepHoleOptPass2; override;
         procedure PostPeepHoleOpts; override;
-        function DoFpuLoadStoreOpt(var p : tai) : boolean;
       end;
 
     Var
@@ -58,74 +57,6 @@ unit aoptcpu;
       { units we should get rid off: }
       symsym,symconst;
 
-    function TCPUAsmoptimizer.DoFpuLoadStoreOpt(var p: tai): boolean;
-    { returns true if a "continue" should be done after this optimization }
-    var hp1, hp2: tai;
-    begin
-      DoFpuLoadStoreOpt := false;
-      if (taicpu(p).oper[0]^.typ = top_ref) and
-         getNextInstruction(p, hp1) and
-         (hp1.typ = ait_instruction) and
-         (((taicpu(hp1).opcode = A_FLD) and
-           (taicpu(p).opcode = A_FSTP)) or
-          ((taicpu(p).opcode = A_FISTP) and
-           (taicpu(hp1).opcode = A_FILD))) and
-         (taicpu(hp1).oper[0]^.typ = top_ref) and
-         (taicpu(hp1).opsize = taicpu(p).opsize) and
-         RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
-        begin
-          { replacing fstp f;fld f by fst f is only valid for extended because of rounding }
-          if (taicpu(p).opsize=S_FX) and
-             getNextInstruction(hp1, hp2) and
-             (hp2.typ = ait_instruction) and
-             IsExitCode(hp2) and
-             (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
-             not(assigned(current_procinfo.procdef.funcretsym) and
-                 (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
-             (taicpu(p).oper[0]^.ref^.index = NR_NO) then
-            begin
-              asml.remove(p);
-              asml.remove(hp1);
-              p.free;
-              hp1.free;
-              p := hp2;
-              removeLastDeallocForFuncRes(p);
-              doFPULoadStoreOpt := true;
-            end
-          (* can't be done because the store operation rounds
-          else
-            { fst can't store an extended value! }
-            if (taicpu(p).opsize <> S_FX) and
-               (taicpu(p).opsize <> S_IQ) then
-              begin
-                if (taicpu(p).opcode = A_FSTP) then
-                  taicpu(p).opcode := A_FST
-                else taicpu(p).opcode := A_FIST;
-                asml.remove(hp1);
-                hp1.free;
-              end
-          *)
-        end;
-    end;
-
-
-  { converts a TChange variable to a TRegister }
-  function tch2reg(ch: tinschange): tsuperregister;
-    const
-      ch2reg: array[CH_REAX..CH_REDI] of tsuperregister = (RS_EAX,RS_ECX,RS_EDX,RS_EBX,RS_ESP,RS_EBP,RS_ESI,RS_EDI);
-    begin
-      if (ch <= CH_REDI) then
-        tch2reg := ch2reg[ch]
-      else if (ch <= CH_WEDI) then
-        tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_REDI))]
-      else if (ch <= CH_RWEDI) then
-        tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_WEDI))]
-      else if (ch <= CH_MEDI) then
-        tch2reg := ch2reg[tinschange(ord(ch) - ord(CH_RWEDI))]
-      else
-        InternalError(2016041901)
-    end;
-
 
   { Checks if the register is a 32 bit general purpose register }
   function isgp32reg(reg: TRegister): boolean;
@@ -152,9 +83,7 @@ end;
 
 procedure TCPUAsmOptimizer.PrePeepHoleOpts;
 var
-  p,hp1: tai;
-  l: aint;
-  tmpRef: treference;
+  p: tai;
 begin
   p := BlockStart;
   while (p <> BlockEnd) Do
@@ -169,220 +98,29 @@ begin
               end;
             case taicpu(p).opcode Of
               A_IMUL:
-                {changes certain "imul const, %reg"'s to lea sequences}
-                begin
-                  if (taicpu(p).oper[0]^.typ = Top_Const) and
-                     (taicpu(p).oper[1]^.typ = Top_Reg) and
-                     (taicpu(p).opsize = S_L) then
-                    if (taicpu(p).oper[0]^.val = 1) then
-                      if (taicpu(p).ops = 2) then
-                       {remove "imul $1, reg"}
-                        begin
-                          hp1 := tai(p.Next);
-                          asml.remove(p);
-                          p.free;
-                          p := hp1;
-                          continue;
-                        end
-                      else
-                       {change "imul $1, reg1, reg2" to "mov reg1, reg2"}
-                        begin
-                          hp1 := taicpu.Op_Reg_Reg(A_MOV, S_L, taicpu(p).oper[1]^.reg,taicpu(p).oper[2]^.reg);
-                          InsertLLItem(p.previous, p.next, hp1);
-                          p.free;
-                          p := hp1;
-                        end
-                    else if
-                     ((taicpu(p).ops <= 2) or
-                      (taicpu(p).oper[2]^.typ = Top_Reg)) and
-                     (taicpu(p).oper[0]^.val <= 12) and
-                     not(cs_opt_size in current_settings.optimizerswitches) and
-                     (not(GetNextInstruction(p, hp1)) or
-                       {GetNextInstruction(p, hp1) and}
-                       not((tai(hp1).typ = ait_instruction) and
-                           ((taicpu(hp1).opcode=A_Jcc) and
-                            (taicpu(hp1).condition in [C_O,C_NO])))) then
-                      begin
-                        reference_reset(tmpref,1,[]);
-                        case taicpu(p).oper[0]^.val Of
-                          3: begin
-                             {imul 3, reg1, reg2 to
-                                lea (reg1,reg1,2), reg2
-                              imul 3, reg1 to
-                                lea (reg1,reg1,2), reg1}
-                               TmpRef.base := taicpu(p).oper[1]^.reg;
-                               TmpRef.index := taicpu(p).oper[1]^.reg;
-                               TmpRef.ScaleFactor := 2;
-                               if (taicpu(p).ops = 2) then
-                                 hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
-                               else
-                                 hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
-                               InsertLLItem(p.previous, p.next, hp1);
-                               p.free;
-                               p := hp1;
-                            end;
-                         5: begin
-                            {imul 5, reg1, reg2 to
-                               lea (reg1,reg1,4), reg2
-                             imul 5, reg1 to
-                               lea (reg1,reg1,4), reg1}
-                              TmpRef.base := taicpu(p).oper[1]^.reg;
-                              TmpRef.index := taicpu(p).oper[1]^.reg;
-                              TmpRef.ScaleFactor := 4;
-                              if (taicpu(p).ops = 2) then
-                                hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
-                              else
-                                hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
-                              InsertLLItem(p.previous, p.next, hp1);
-                              p.free;
-                              p := hp1;
-                            end;
-                         6: begin
-                            {imul 6, reg1, reg2 to
-                               lea (,reg1,2), reg2
-                               lea (reg2,reg1,4), reg2
-                             imul 6, reg1 to
-                               lea (reg1,reg1,2), reg1
-                               add reg1, reg1}
-                              if (current_settings.optimizecputype <= cpu_386) then
-                                begin
-                                  TmpRef.index := taicpu(p).oper[1]^.reg;
-                                  if (taicpu(p).ops = 3) then
-                                    begin
-                                      TmpRef.base := taicpu(p).oper[2]^.reg;
-                                      TmpRef.ScaleFactor := 4;
-                                      hp1 :=  taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
-                                    end
-                                  else
-                                    begin
-                                      hp1 :=  taicpu.op_reg_reg(A_ADD, S_L,
-                                        taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
-                                    end;
-                                  InsertLLItem(p, p.next, hp1);
-                                  reference_reset(tmpref,2,[]);
-                                  TmpRef.index := taicpu(p).oper[1]^.reg;
-                                  TmpRef.ScaleFactor := 2;
-                                  if (taicpu(p).ops = 3) then
-                                    begin
-                                      TmpRef.base := NR_NO;
-                                      hp1 :=  taicpu.op_ref_reg(A_LEA, S_L, TmpRef,
-                                        taicpu(p).oper[2]^.reg);
-                                    end
-                                  else
-                                    begin
-                                      TmpRef.base := taicpu(p).oper[1]^.reg;
-                                      hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
-                                    end;
-                                  InsertLLItem(p.previous, p.next, hp1);
-                                  p.free;
-                                  p := tai(hp1.next);
-                                end
-                            end;
-                          9: begin
-                             {imul 9, reg1, reg2 to
-                                lea (reg1,reg1,8), reg2
-                              imul 9, reg1 to
-                                lea (reg1,reg1,8), reg1}
-                               TmpRef.base := taicpu(p).oper[1]^.reg;
-                               TmpRef.index := taicpu(p).oper[1]^.reg;
-                               TmpRef.ScaleFactor := 8;
-                               if (taicpu(p).ops = 2) then
-                                 hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg)
-                               else
-                                 hp1 := taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
-                               InsertLLItem(p.previous, p.next, hp1);
-                               p.free;
-                               p := hp1;
-                             end;
-                         10: begin
-                            {imul 10, reg1, reg2 to
-                               lea (reg1,reg1,4), reg2
-                               add reg2, reg2
-                             imul 10, reg1 to
-                               lea (reg1,reg1,4), reg1
-                               add reg1, reg1}
-                               if (current_settings.optimizecputype <= cpu_386) then
-                                 begin
-                                   if (taicpu(p).ops = 3) then
-                                     hp1 :=  taicpu.op_reg_reg(A_ADD, S_L,
-                                       taicpu(p).oper[2]^.reg,taicpu(p).oper[2]^.reg)
-                                   else
-                                     hp1 := taicpu.op_reg_reg(A_ADD, S_L,
-                                       taicpu(p).oper[1]^.reg,taicpu(p).oper[1]^.reg);
-                                   InsertLLItem(p, p.next, hp1);
-                                   TmpRef.base := taicpu(p).oper[1]^.reg;
-                                   TmpRef.index := taicpu(p).oper[1]^.reg;
-                                   TmpRef.ScaleFactor := 4;
-                                   if (taicpu(p).ops = 3) then
-                                      hp1 :=  taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg)
-                                    else
-                                      hp1 :=  taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
-                                   InsertLLItem(p.previous, p.next, hp1);
-                                   p.free;
-                                   p := tai(hp1.next);
-                                 end
-                             end;
-                         12: begin
-                            {imul 12, reg1, reg2 to
-                               lea (,reg1,4), reg2
-                               lea (reg2,reg1,8), reg2
-                             imul 12, reg1 to
-                               lea (reg1,reg1,2), reg1
-                               lea (,reg1,4), reg1}
-                               if (current_settings.optimizecputype <= cpu_386)
-                                 then
-                                   begin
-                                     TmpRef.index := taicpu(p).oper[1]^.reg;
-                                     if (taicpu(p).ops = 3) then
-                                       begin
-                                         TmpRef.base := taicpu(p).oper[2]^.reg;
-                                         TmpRef.ScaleFactor := 8;
-                                         hp1 :=  taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
-                                       end
-                                     else
-                                       begin
-                                         TmpRef.base := NR_NO;
-                                         TmpRef.ScaleFactor := 4;
-                                         hp1 :=  taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
-                                       end;
-                                     InsertLLItem(p, p.next, hp1);
-                                     reference_reset(tmpref,2,[]);
-                                     TmpRef.index := taicpu(p).oper[1]^.reg;
-                                     if (taicpu(p).ops = 3) then
-                                       begin
-                                         TmpRef.base := NR_NO;
-                                         TmpRef.ScaleFactor := 4;
-                                         hp1 :=  taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[2]^.reg);
-                                       end
-                                     else
-                                       begin
-                                         TmpRef.base := taicpu(p).oper[1]^.reg;
-                                         TmpRef.ScaleFactor := 2;
-                                         hp1 :=  taicpu.op_ref_reg(A_LEA, S_L, TmpRef, taicpu(p).oper[1]^.reg);
-                                       end;
-                                     InsertLLItem(p.previous, p.next, hp1);
-                                     p.free;
-                                     p := tai(hp1.next);
-                                   end
-                             end
-                        end;
-                      end;
-                end;
+                if PrePeepholeOptIMUL(p) then
+                  Continue;
               A_SAR,A_SHR:
                 if PrePeepholeOptSxx(p) then
                   continue;
               A_XOR:
-                if (taicpu(p).oper[0]^.typ = top_reg) and
-                   (taicpu(p).oper[1]^.typ = top_reg) and
-                   (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
-                 { temporarily change this to 'mov reg,0' to make it easier }
-                 { for the CSE. Will be changed back in pass 2              }
-                  begin
-                    taicpu(p).opcode := A_MOV;
-                    taicpu(p).loadConst(0,0);
-                  end;
+                begin
+                  if (taicpu(p).oper[0]^.typ = top_reg) and
+                     (taicpu(p).oper[1]^.typ = top_reg) and
+                     (taicpu(p).oper[0]^.reg = taicpu(p).oper[1]^.reg) then
+                   { temporarily change this to 'mov reg,0' to make it easier }
+                   { for the CSE. Will be changed back in pass 2              }
+                    begin
+                      taicpu(p).opcode := A_MOV;
+                      taicpu(p).loadConst(0,0);
+                    end;
+                end;
+              else
+                ;
             end;
           end;
+        else
+          ;
       end;
       p := tai(p.next)
     end;
@@ -399,15 +137,10 @@ function WriteOk : Boolean;
   end;
 
 var
-  l : longint;
   p,hp1,hp2 : tai;
   hp3,hp4: tai;
   v:aint;
 
-  TmpRef: TReference;
-
-  TmpBool1, TmpBool2: Boolean;
-
   function GetFinalDestination(asml: TAsmList; hp: taicpu; level: longint): boolean;
   {traces sucessive jumps to their final destination and sets it, e.g.
    je l1                je l3
@@ -524,18 +257,18 @@ begin
             { Handle Jmp Optimizations }
             if taicpu(p).is_jmp then
               begin
-      {the following if-block removes all code between a jmp and the next label,
-        because it can never be executed}
+                { the following if-block removes all code between a jmp and the next label,
+                  because it can never be executed }
                 if (taicpu(p).opcode = A_JMP) then
                   begin
                     hp2:=p;
                     while GetNextInstruction(hp2, hp1) and
                           (hp1.typ <> ait_label) do
-                      if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then
+                      if not(hp1.typ in ([ait_label]+skipinstr)) then
                         begin
                           { don't kill start/end of assembler block,
                             no-line-info-start/end etc }
-                          if hp1.typ<>ait_marker then
+                          if not(hp1.typ in [ait_align,ait_marker]) then
                             begin
                               asml.remove(hp1);
                               hp1.free;
@@ -596,18 +329,6 @@ begin
             else
             { All other optimizes }
               begin
-                for l := 0 to taicpu(p).ops-1 Do
-                  if (taicpu(p).oper[l]^.typ = top_ref) then
-                    With taicpu(p).oper[l]^.ref^ Do
-                      begin
-                        if (base = NR_NO) and
-                           (index <> NR_NO) and
-                           (scalefactor in [0,1]) then
-                          begin
-                            base := index;
-                            index := NR_NO
-                          end
-                      end;
                 case taicpu(p).opcode Of
                   A_AND:
                     if OptPass1And(p) then
@@ -680,6 +401,8 @@ begin
                           case taicpu(hp1).condition of
                             C_LE: taicpu(hp3).condition := C_GE;
                             C_BE: taicpu(hp3).condition := C_AE;
+                            else
+                              internalerror(2019050903);
                           end;
                           asml.remove(p);
                           asml.remove(hp1);
@@ -690,109 +413,10 @@ begin
                         end
                     end;
                   A_FLD:
-                    begin
-                      if (taicpu(p).oper[0]^.typ = top_reg) and
-                         GetNextInstruction(p, hp1) and
-                         (hp1.typ = Ait_Instruction) and
-                          (taicpu(hp1).oper[0]^.typ = top_reg) and
-                         (taicpu(hp1).oper[1]^.typ = top_reg) and
-                         (taicpu(hp1).oper[0]^.reg = NR_ST) and
-                         (taicpu(hp1).oper[1]^.reg = NR_ST1) then
-                         { change                        to
-                             fld      reg               fxxx reg,st
-                             fxxxp    st, st1 (hp1)
-                           Remark: non commutative operations must be reversed!
-                         }
-                        begin
-                            case taicpu(hp1).opcode Of
-                              A_FMULP,A_FADDP,
-                              A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
-                                begin
-                                  case taicpu(hp1).opcode Of
-                                    A_FADDP: taicpu(hp1).opcode := A_FADD;
-                                    A_FMULP: taicpu(hp1).opcode := A_FMUL;
-                                    A_FSUBP: taicpu(hp1).opcode := A_FSUBR;
-                                    A_FSUBRP: taicpu(hp1).opcode := A_FSUB;
-                                    A_FDIVP: taicpu(hp1).opcode := A_FDIVR;
-                                    A_FDIVRP: taicpu(hp1).opcode := A_FDIV;
-                                  end;
-                                  taicpu(hp1).oper[0]^.reg := taicpu(p).oper[0]^.reg;
-                                  taicpu(hp1).oper[1]^.reg := NR_ST;
-                                  asml.remove(p);
-                                  p.free;
-                                  p := hp1;
-                                  continue;
-                                end;
-                            end;
-                        end
-                      else
-                        if (taicpu(p).oper[0]^.typ = top_ref) and
-                           GetNextInstruction(p, hp2) and
-                           (hp2.typ = Ait_Instruction) and
-                           (taicpu(hp2).ops = 2) and
-                           (taicpu(hp2).oper[0]^.typ = top_reg) and
-                           (taicpu(hp2).oper[1]^.typ = top_reg) and
-                           (taicpu(p).opsize in [S_FS, S_FL]) and
-                           (taicpu(hp2).oper[0]^.reg = NR_ST) and
-                           (taicpu(hp2).oper[1]^.reg = NR_ST1) then
-                          if GetLastInstruction(p, hp1) and
-                             (hp1.typ = Ait_Instruction) and
-                             ((taicpu(hp1).opcode = A_FLD) or
-                              (taicpu(hp1).opcode = A_FST)) and
-                             (taicpu(hp1).opsize = taicpu(p).opsize) and
-                             (taicpu(hp1).oper[0]^.typ = top_ref) and
-                             RefsEqual(taicpu(p).oper[0]^.ref^, taicpu(hp1).oper[0]^.ref^) then
-                            if ((taicpu(hp2).opcode = A_FMULP) or
-                                (taicpu(hp2).opcode = A_FADDP)) then
-                            { change                      to
-                                fld/fst   mem1  (hp1)       fld/fst   mem1
-                                fld       mem1  (p)         fadd/
-                                faddp/                       fmul     st, st
-                                fmulp  st, st1 (hp2) }
-                              begin
-                                asml.remove(p);
-                                p.free;
-                                p := hp1;
-                                if (taicpu(hp2).opcode = A_FADDP) then
-                                  taicpu(hp2).opcode := A_FADD
-                                else
-                                  taicpu(hp2).opcode := A_FMUL;
-                                taicpu(hp2).oper[1]^.reg := NR_ST;
-                              end
-                            else
-                            { change              to
-                                fld/fst mem1 (hp1)   fld/fst mem1
-                                fld     mem1 (p)     fld      st}
-                              begin
-                                taicpu(p).changeopsize(S_FL);
-                                taicpu(p).loadreg(0,NR_ST);
-                              end
-                          else
-                            begin
-                              case taicpu(hp2).opcode Of
-                                A_FMULP,A_FADDP,A_FSUBP,A_FDIVP,A_FSUBRP,A_FDIVRP:
-                          { change                        to
-                              fld/fst  mem1    (hp1)      fld/fst    mem1
-                              fld      mem2    (p)        fxxx       mem2
-                              fxxxp    st, st1 (hp2)                      }
-
-                                  begin
-                                    case taicpu(hp2).opcode Of
-                                      A_FADDP: taicpu(p).opcode := A_FADD;
-                                      A_FMULP: taicpu(p).opcode := A_FMUL;
-                                      A_FSUBP: taicpu(p).opcode := A_FSUBR;
-                                      A_FSUBRP: taicpu(p).opcode := A_FSUB;
-                                      A_FDIVP: taicpu(p).opcode := A_FDIVR;
-                                      A_FDIVRP: taicpu(p).opcode := A_FDIV;
-                                    end;
-                                    asml.remove(hp2);
-                                    hp2.free;
-                                  end
-                              end
-                            end
-                    end;
+                    if OptPass1FLD(p) then
+                      continue;
                   A_FSTP,A_FISTP:
-                    if doFpuLoadStoreOpt(p) then
+                    if OptPass1FSTP(p) then
                       continue;
                   A_LEA:
                     begin
@@ -917,32 +541,6 @@ begin
                   A_SHL, A_SAL:
                     if OptPass1SHLSAL(p) then
                       Continue;
-                  A_SETcc :
-                    { changes
-                        setcc (funcres)             setcc reg
-                        movb (funcres), reg      to leave/ret
-                        leave/ret                               }
-                    begin
-                      if (taicpu(p).oper[0]^.typ = top_ref) and
-                         GetNextInstruction(p, hp1) and
-                         GetNextInstruction(hp1, hp2) and
-                         IsExitCode(hp2) and
-                         (taicpu(p).oper[0]^.ref^.base = current_procinfo.FramePointer) and
-                         (taicpu(p).oper[0]^.ref^.index = NR_NO) and
-                         not(assigned(current_procinfo.procdef.funcretsym) and
-                             (taicpu(p).oper[0]^.ref^.offset < tabstractnormalvarsym(current_procinfo.procdef.funcretsym).localloc.reference.offset)) and
-                         (hp1.typ = ait_instruction) and
-                         (taicpu(hp1).opcode = A_MOV) and
-                         (taicpu(hp1).opsize = S_B) and
-                         (taicpu(hp1).oper[0]^.typ = top_ref) and
-                         RefsEqual(taicpu(hp1).oper[0]^.ref^, taicpu(p).oper[0]^.ref^) then
-                        begin
-                          taicpu(p).loadReg(0,taicpu(hp1).oper[1]^.reg);
-                          DebugMsg('Peephole optimizer SetccMovbLeaveRet2SetccLeaveRet',p);
-                          asml.remove(hp1);
-                          hp1.free;
-                        end
-                    end;
                   A_SUB:
                     if OptPass1Sub(p) then
                       continue;
@@ -982,9 +580,18 @@ begin
                   A_MOVSS:
                     if OptPass1MOVXX(p) then
                       continue;
+                  A_SETcc:
+                    begin
+                      if OptPass1SETcc(p) then
+                        continue;
+                    end
+                  else
+                    ;
                 end;
             end; { if is_jmp }
           end;
+        else
+          ;
       end;
       updateUsedRegs(UsedRegs,p);
       p:=tai(p.next);
@@ -1014,7 +621,7 @@ begin
                 if OptPass2Jcc(p) then
                   continue;
               A_FSTP,A_FISTP:
-                if DoFpuLoadStoreOpt(p) then
+                if OptPass1FSTP(p) then
                   continue;
               A_IMUL:
                 if OptPass2Imul(p) then
@@ -1023,10 +630,16 @@ begin
                 if OptPass2Jmp(p) then
                   continue;
               A_MOV:
-                if OptPass2MOV(p) then
-                  continue;
+                begin
+                  if OptPass2MOV(p) then
+                    continue;
+                end
+              else
+                ;
             end;
           end;
+        else
+          ;
       end;
       p := tai(p.next)
     end;
@@ -1035,7 +648,7 @@ end;
 
 procedure TCPUAsmOptimizer.PostPeepHoleOpts;
 var
-  p,hp1,hp2: tai;
+  p,hp1: tai;
 begin
   p := BlockStart;
   ClearUsedRegs;
@@ -1091,6 +704,8 @@ begin
                                   setsubreg(taicpu(p).oper[1]^.reg,R_SUBL);
                                 end;
                             end;
+                          else
+                            ;
                         end
                       else if (taicpu(p).oper[0]^.typ = top_ref) and
                           (taicpu(p).oper[0]^.ref^.base <> taicpu(p).oper[1]^.reg) and
@@ -1111,10 +726,16 @@ begin
                         end;
                  end;
               A_TEST, A_OR:
-                if PostPeepholeOptTestOr(p) then
-                  Continue;
+                begin
+                  if PostPeepholeOptTestOr(p) then
+                    Continue;
+                end;
+              else
+                ;
             end;
           end;
+        else
+          ;
       end;
       p := tai(p.next)
     end;

+ 0 - 4
compiler/i386/aoptcpub.pas

@@ -70,10 +70,6 @@ Const
 
   MaxCh = 3;
 
-{ the maximum number of operands an instruction has }
-
-  MaxOps = 3;
-
 {Oper index of operand that contains the source (reference) with a load }
 {instruction                                                            }
 

+ 12 - 5
compiler/i386/cgcpu.pas

@@ -261,10 +261,6 @@ unit cgcpu;
                                 reference_reset_symbol(tmpref,dirref.symbol,0,sizeof(pint),[]);
                                 tmpref.refaddr:=addr_pic;
                                 tmpref.base:=current_procinfo.got;
-{$ifdef EXTDEBUG}
-				if not (pi_needs_got in current_procinfo.flags) then
-				  Comment(V_warning,'pi_needs_got not included');
-{$endif EXTDEBUG}
                                 include(current_procinfo.flags,pi_needs_got);
                                 list.concat(taicpu.op_ref(A_PUSH,S_L,tmpref));
                               end
@@ -517,6 +513,8 @@ unit cgcpu;
           S_B : list.concat(Taicpu.Op_none(A_MOVSB,S_NO));
           S_W : list.concat(Taicpu.Op_none(A_MOVSW,S_NO));
           S_L : list.concat(Taicpu.Op_none(A_MOVSD,S_NO));
+          else
+            internalerror(2019050901);
         end;
         ungetcpuregister(list,NR_EDI);
         ungetcpuregister(list,NR_ECX);
@@ -548,7 +546,10 @@ unit cgcpu;
             if not (target_info.system in [system_i386_darwin,system_i386_iphonesim]) then
               begin
                 { Use ECX as a temp register by default }
-                tmpreg:=NR_ECX;
+                if current_procinfo.got = NR_EBX then
+                  tmpreg:=NR_EBX
+                else
+                  tmpreg:=NR_ECX;
                 { Allocate registers used for parameters to make sure they
                   never allocated during this PIC init code }
                 for i:=0 to current_procinfo.procdef.paras.Count - 1 do
@@ -873,6 +874,8 @@ unit cgcpu;
               cg.ungetcpuregister(list,NR_ECX);
               exit;
             end;
+          else
+            ;
         end;
         get_64bit_ops(op,op1,op2);
         if op in [OP_ADD,OP_SUB] then
@@ -940,6 +943,8 @@ unit cgcpu;
                           list.concat(taicpu.op_const_reg(A_RCR,S_L,value,reg.reglo));
                           cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
                         end;
+                      else
+                        internalerror(2019050902);
                     end
                   else if value>31 then
                     case op of
@@ -1053,6 +1058,8 @@ unit cgcpu;
                           list.concat(taicpu.op_const_ref(A_RCR,S_L,value,tempref));
                           cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
                         end;
+                      else
+                        internalerror(2019050901);
                     end
                   else if value>31 then
                     case op of

+ 2 - 0
compiler/i386/cpuelf.pas

@@ -334,6 +334,8 @@ implementation
                 data.Write(zero,4);
                 continue;
               end;
+            else
+              ;
           end;
 
           if (objreloc.flags and rf_raw)=0 then

+ 36 - 20
compiler/i386/cpupara.pas

@@ -42,7 +42,7 @@ unit cpupara;
           function get_volatile_registers_mm(calloption : tproccalloption):tcpuregisterset;override;
           function get_saved_registers_int(calloption : tproccalloption):tcpuregisterarray;override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
-          function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
+          function create_varargs_paraloc_info(p : tabstractprocdef; side: tcallercallee; varargspara:tvarargsparalist):longint;override;
           procedure createtempparaloc(list: TAsmList;calloption : tproccalloption;parasym : tparavarsym;can_use_final_stack_loc : boolean;var cgpara:TCGPara);override;
           function get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): TCGPara;override;
        private
@@ -113,6 +113,8 @@ unit cpupara;
                        exit;
                      end;
                   end;
+                else
+                  ;
               end;
             end;
           system_i386_os2,
@@ -130,6 +132,8 @@ unit cpupara;
                        exit;
                      end;
                   end;
+                else
+                  ;
               end;
             end;
           system_i386_freebsd,
@@ -157,9 +161,13 @@ unit cpupara;
                         result:=false;
                         exit;
                       end;
+                    else
+                      ;
                   end;
               end;
             end;
+          else
+            ;
         end;
         result:=inherited ret_in_param(def,pd);
       end;
@@ -234,6 +242,8 @@ unit cpupara;
             result:=not(calloption in cdecl_pocalls) and not tprocvardef(def).is_addressonly;
           setdef :
             result:=not(calloption in cdecl_pocalls) and (not is_smallset(def));
+          else
+            ;
         end;
       end;
 
@@ -289,8 +299,8 @@ unit cpupara;
 
     function tcpuparamanager.get_saved_registers_int(calloption : tproccalloption):tcpuregisterarray;
       const
-        saveregs : array[0..3] of tsuperregister = (RS_EBX,RS_ESI,RS_EDI,RS_EBP);
-        saveregs_oldfpccall : array[0..0] of tsuperregister = (RS_EBP);
+        saveregs : {$ifndef VER3_0}tcpuregisterarray{$else}array[0..3] of tsuperregister{$endif} = (RS_EBX,RS_ESI,RS_EDI,RS_EBP);
+        saveregs_oldfpccall : {$ifndef VER3_0}tcpuregisterarray{$else}array[0..0] of tsuperregister{$endif} = (RS_EBP);
       begin
         case calloption of
           pocall_internproc,
@@ -447,7 +457,8 @@ unit cpupara;
             { syscall for AROS can have already a paraloc set }
             if (vo_has_explicit_paraloc in hp.varoptions) then
               begin
-                if not(vo_is_syscall_lib in hp.varoptions) then
+                { on AROS-i386, only the libbase can have explicit paraloc }
+                if not (vo_is_syscall_lib in hp.varoptions) then
                   internalerror(2016090105);
                 if p.proccalloption in pushleftright_pocalls then
                   dec(i)
@@ -466,25 +477,23 @@ unit cpupara;
             else
               begin
                 paralen:=push_size(hp.varspez,paradef,p.proccalloption);
-                { darwin/x86 requires that parameters < sizeof(aint) are sign/ }
-                { zero extended to sizeof(aint)                                }
-                if (target_info.system in [system_i386_darwin,system_i386_iphonesim]) and
-                   (side = callerside) and
-                   (paralen > 0) and
-                   (paralen < sizeof(aint)) then
-                  begin
-                    paralen:=sizeof(aint);
-                    paracgsize:=OS_SINT;
-                    paradef:=sinttype;
-                  end
-                else
-                  paracgsize:=def_cgsize(paradef);
+                paracgsize:=def_cgsize(paradef);
               end;
             hp.paraloc[side].reset;
             hp.paraloc[side].size:=paracgsize;
             hp.paraloc[side].intsize:=paralen;
             hp.paraloc[side].def:=paradef;
             hp.paraloc[side].Alignment:=paraalign;
+            { darwin/x86 requires that parameters < sizeof(aint) are sign/ }
+            { zero extended to sizeof(aint)                                }
+            if (target_info.system in [system_i386_darwin,system_i386_iphonesim]) and
+               (side = callerside) and
+               (paralen > 0) and
+               (paralen < sizeof(aint)) then
+              begin
+                paracgsize:=OS_SINT;
+                paradef:=sinttype;
+              end;
             { Copy to stack? }
             if (paracgsize=OS_NO) or
                (use_fixed_stack) then
@@ -769,15 +778,22 @@ unit cpupara;
       end;
 
 
-    function tcpuparamanager.create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;
+    function tcpuparamanager.create_varargs_paraloc_info(p : tabstractprocdef; side: tcallercallee; varargspara:tvarargsparalist):longint;
       var
         parasize : longint;
       begin
         parasize:=0;
         { calculate the registers for the normal parameters }
-        create_stdcall_paraloc_info(p,callerside,p.paras,parasize);
+        create_stdcall_paraloc_info(p,side,p.paras,parasize);
         { append the varargs }
-        create_stdcall_paraloc_info(p,callerside,varargspara,parasize);
+        if assigned(varargspara) then
+          begin
+            if side=callerside then
+              create_stdcall_paraloc_info(p,side,varargspara,parasize)
+            else
+              internalerror(2019021926);
+          end;
+        create_funcretloc_info(p,side);
         result:=parasize;
       end;
 

Някои файлове не бяха показани, защото твърде много файлове са промени