Переглянути джерело

* synchronised with trunk up till r26975

git-svn-id: branches/hlcgllvm@26976 -
Jonas Maebe 11 роки тому
батько
коміт
e9268a0a14
100 змінених файлів з 4466 додано та 1943 видалено
  1. 274 119
      .gitattributes
  2. 32 6
      Makefile
  3. 8 2
      Makefile.fpc
  4. 50 11
      compiler/Makefile
  5. 13 7
      compiler/Makefile.fpc
  6. 6 1
      compiler/aasmtai.pas
  7. 77 32
      compiler/aggas.pas
  8. 3 2
      compiler/agjasmin.pas
  9. 121 113
      compiler/aoptobj.pas
  10. 4 7
      compiler/arm/aasmcpu.pas
  11. 2 2
      compiler/arm/agarmgas.pas
  12. 461 260
      compiler/arm/aoptcpu.pas
  13. 69 44
      compiler/arm/cgcpu.pas
  14. 16 28
      compiler/arm/cpubase.pas
  15. 10 0
      compiler/arm/cpuelf.pas
  16. 15 14
      compiler/arm/cpuinfo.pas
  17. 19 10
      compiler/arm/cpupara.pas
  18. 9 4
      compiler/arm/cpupi.pas
  19. 42 18
      compiler/arm/narmadd.pas
  20. 91 77
      compiler/arm/narmmat.pas
  21. 48 3
      compiler/arm/rgcpu.pas
  22. 31 1
      compiler/assemble.pas
  23. 7 1
      compiler/avr/aasmcpu.pas
  24. 1 1
      compiler/avr/agavrgas.pas
  25. 233 14
      compiler/avr/aoptcpu.pas
  26. 26 8
      compiler/avr/aoptcpub.pas
  27. 173 62
      compiler/avr/cgcpu.pas
  28. 12 4
      compiler/avr/cpubase.pas
  29. 10 10
      compiler/avr/cpuinfo.pas
  30. 60 45
      compiler/avr/cpupara.pas
  31. 1 1
      compiler/avr/cpupi.pas
  32. 24 10
      compiler/avr/raavrgas.pas
  33. 6 6
      compiler/cclasses.pas
  34. 30 3
      compiler/cfileutl.pas
  35. 5 1
      compiler/cg64f32.pas
  36. 48 24
      compiler/cgobj.pas
  37. 62 2
      compiler/cgutils.pas
  38. 1 4
      compiler/crefs.pas
  39. 45 61
      compiler/cresstr.pas
  40. 9 1
      compiler/cstreams.pas
  41. 5 1
      compiler/dbgdwarf.pas
  42. 1 0
      compiler/dbgstabs.pas
  43. 1 0
      compiler/dbgstabx.pas
  44. 1 0
      compiler/dirparse.pas
  45. 3 0
      compiler/fpcdefs.inc
  46. 14 8
      compiler/globals.pas
  47. 9 6
      compiler/globtype.pas
  48. 15 10
      compiler/hlcg2ll.pas
  49. 16 29
      compiler/hlcgobj.pas
  50. 9 2
      compiler/htypechk.pas
  51. 16 10
      compiler/i386/aopt386.pas
  52. 17 22
      compiler/i386/cgcpu.pas
  53. 2 0
      compiler/i386/cpuelf.pas
  54. 34 2
      compiler/i386/cpuinfo.pas
  55. 3 0
      compiler/i386/cpunode.pas
  56. 0 53
      compiler/i386/cpupara.pas
  57. 17 7
      compiler/i386/csopt386.pas
  58. 1 0
      compiler/i386/daopt386.pas
  59. 1 0
      compiler/i386/i386att.inc
  60. 1 0
      compiler/i386/i386atts.inc
  61. 1 0
      compiler/i386/i386int.inc
  62. 1 1
      compiler/i386/i386nop.inc
  63. 1 0
      compiler/i386/i386op.inc
  64. 79 78
      compiler/i386/i386prop.inc
  65. 7 0
      compiler/i386/i386tab.inc
  66. 146 21
      compiler/i386/n386add.pas
  67. 2 1
      compiler/i386/n386cal.pas
  68. 685 0
      compiler/i386/n386flw.pas
  69. 5 3
      compiler/i386/popt386.pas
  70. 1 0
      compiler/i386/rropt386.pas
  71. 445 105
      compiler/i8086/cgcpu.pas
  72. 1 1
      compiler/i8086/cpuinfo.pas
  73. 1 0
      compiler/i8086/i8086att.inc
  74. 1 0
      compiler/i8086/i8086atts.inc
  75. 1 0
      compiler/i8086/i8086int.inc
  76. 1 1
      compiler/i8086/i8086nop.inc
  77. 1 0
      compiler/i8086/i8086op.inc
  78. 79 78
      compiler/i8086/i8086prop.inc
  79. 7 0
      compiler/i8086/i8086tab.inc
  80. 10 11
      compiler/i8086/n8086add.pas
  81. 32 3
      compiler/i8086/n8086cal.pas
  82. 17 1
      compiler/i8086/n8086inl.pas
  83. 3 3
      compiler/jvm/cpubase.pas
  84. 4 2
      compiler/jvm/hlcgcpu.pas
  85. 4 1
      compiler/jvm/jvmdef.pas
  86. 2 29
      compiler/jvm/njvmadd.pas
  87. 10 5
      compiler/jvm/njvmmem.pas
  88. 3 0
      compiler/jvm/pjvm.pas
  89. 25 6
      compiler/link.pas
  90. 1 1
      compiler/m68k/ag68kgas.pas
  91. 399 242
      compiler/m68k/cgcpu.pas
  92. 1 1
      compiler/m68k/cpubase.pas
  93. 25 91
      compiler/m68k/cpupara.pas
  94. 12 12
      compiler/m68k/n68kadd.pas
  95. 27 36
      compiler/m68k/n68kcnv.pas
  96. 65 11
      compiler/mips/aasmcpu.pas
  97. 19 8
      compiler/mips/aoptcpu.pas
  98. 16 17
      compiler/mips/cgcpu.pas
  99. 4 4
      compiler/mips/cpugas.pas
  100. 2 1
      compiler/mips/cpuinfo.pas

Різницю між файлами не показано, бо вона завелика
+ 274 - 119
.gitattributes


+ 32 - 6
Makefile

@@ -1,8 +1,8 @@
 #
 #
-# Don't edit, this file is generated by FPCMake Version 2.0.0 [2013-10-05 rev 25642]
+# Don't edit, this file is generated by FPCMake Version 2.0.0 [2014-02-06 rev 26692]
 #
 #
 default: help
 default: help
-MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim i386-android m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii powerpc-aix sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-netbsd x86_64-solaris x86_64-openbsd x86_64-darwin x86_64-win64 x86_64-embedded arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian arm-android powerpc64-linux powerpc64-darwin powerpc64-embedded powerpc64-aix avr-embedded armeb-linux armeb-embedded mips-linux mipsel-linux jvm-java jvm-android i8086-msdos
+MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim i386-android m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii powerpc-aix sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-netbsd x86_64-solaris x86_64-openbsd x86_64-darwin x86_64-win64 x86_64-embedded arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian arm-android powerpc64-linux powerpc64-darwin powerpc64-embedded powerpc64-aix avr-embedded armeb-linux armeb-embedded mips-linux mipsel-linux mipsel-android jvm-java jvm-android i8086-msdos
 BSDs = freebsd netbsd openbsd darwin
 BSDs = freebsd netbsd openbsd darwin
 UNIXs = linux $(BSDs) solaris qnx haiku aix
 UNIXs = linux $(BSDs) solaris qnx haiku aix
 LIMIT83fs = go32v2 os2 emx watcom msdos
 LIMIT83fs = go32v2 os2 emx watcom msdos
@@ -276,7 +276,7 @@ else
 ifeq ($(CPU_TARGET),i386)
 ifeq ($(CPU_TARGET),i386)
 BINUTILSPREFIX=i686-linux-android-
 BINUTILSPREFIX=i686-linux-android-
 else
 else
-ifeq ($(CPU_TARGET),mips)
+ifeq ($(CPU_TARGET),mipsel)
 BINUTILSPREFIX=mipsel-linux-android-
 BINUTILSPREFIX=mipsel-linux-android-
 endif
 endif
 endif
 endif
@@ -322,7 +322,7 @@ endif
 override PACKAGE_NAME=fpc
 override PACKAGE_NAME=fpc
 override PACKAGE_VERSION=2.7.1
 override PACKAGE_VERSION=2.7.1
 REQUIREDVERSION=2.6.2
 REQUIREDVERSION=2.6.2
-REQUIREDVERSION2=2.6.2
+REQUIREDVERSION2=2.6.4
 ifndef inOS2
 ifndef inOS2
 override FPCDIR:=$(BASEDIR)
 override FPCDIR:=$(BASEDIR)
 export FPCDIR
 export FPCDIR
@@ -370,6 +370,9 @@ endif
 ifeq ($(CPU_TARGET),i8086)
 ifeq ($(CPU_TARGET),i8086)
 PPSUF=8086
 PPSUF=8086
 endif
 endif
+ifeq ($(CPU_TARGET),avr)
+PPSUF=avr
+endif
 ifdef CROSSCOMPILE
 ifdef CROSSCOMPILE
 ifneq ($(CPU_TARGET),jvm)
 ifneq ($(CPU_TARGET),jvm)
 PPPRE=ppcross
 PPPRE=ppcross
@@ -442,8 +445,11 @@ else
 FPCMAKENEW=$(BASEDIR)/utils/fpcm/bin/$(SOURCESUFFIX)/fpcmake$(SRCEXEEXT)
 FPCMAKENEW=$(BASEDIR)/utils/fpcm/bin/$(SOURCESUFFIX)/fpcmake$(SRCEXEEXT)
 endif
 endif
 endif
 endif
+ifneq ($(OPT),)
+OPTNEW+=$(OPT)
+endif
 CLEANOPTS=FPC=$(PPNEW)
 CLEANOPTS=FPC=$(PPNEW)
-BUILDOPTS=FPC=$(PPNEW) FPCFPMAKE=$(FPCFPMAKENEW) RELEASE=1
+BUILDOPTS=FPC=$(PPNEW) FPCFPMAKE=$(FPCFPMAKENEW) RELEASE=1 'OPT=$(OPTNEW)'
 INSTALLOPTS=FPC=$(PPNEW) ZIPDESTDIR=$(BASEDIR) FPCMAKE=$(FPCMAKENEW)
 INSTALLOPTS=FPC=$(PPNEW) ZIPDESTDIR=$(BASEDIR) FPCMAKE=$(FPCMAKENEW)
 ifndef CROSSCOMPILE
 ifndef CROSSCOMPILE
 ifneq ($(wildcard ide),)
 ifneq ($(wildcard ide),)
@@ -672,6 +678,9 @@ endif
 ifeq ($(FULL_TARGET),mipsel-linux)
 ifeq ($(FULL_TARGET),mipsel-linux)
 override TARGET_DIRS+=compiler rtl utils packages ide installer
 override TARGET_DIRS+=compiler rtl utils packages ide installer
 endif
 endif
+ifeq ($(FULL_TARGET),mipsel-android)
+override TARGET_DIRS+=compiler rtl utils packages ide installer
+endif
 ifeq ($(FULL_TARGET),jvm-java)
 ifeq ($(FULL_TARGET),jvm-java)
 override TARGET_DIRS+=compiler rtl utils packages ide installer
 override TARGET_DIRS+=compiler rtl utils packages ide installer
 endif
 endif
@@ -1255,6 +1264,7 @@ ASNAME=$(BINUTILSPREFIX)as
 LDNAME=$(BINUTILSPREFIX)ld
 LDNAME=$(BINUTILSPREFIX)ld
 ARNAME=$(BINUTILSPREFIX)ar
 ARNAME=$(BINUTILSPREFIX)ar
 RCNAME=$(BINUTILSPREFIX)rc
 RCNAME=$(BINUTILSPREFIX)rc
+NASMNAME=$(BINUTILSPREFIX)nasm
 ifndef ASPROG
 ifndef ASPROG
 ifdef CROSSBINDIR
 ifdef CROSSBINDIR
 ASPROG=$(CROSSBINDIR)/$(ASNAME)$(SRCEXEEXT)
 ASPROG=$(CROSSBINDIR)/$(ASNAME)$(SRCEXEEXT)
@@ -1283,10 +1293,18 @@ else
 ARPROG=$(ARNAME)
 ARPROG=$(ARNAME)
 endif
 endif
 endif
 endif
+ifndef NASMPROG
+ifdef CROSSBINDIR
+NASMPROG=$(CROSSBINDIR)/$(NASMNAME)$(SRCEXEEXT)
+else
+NASMPROG=$(NASMNAME)
+endif
+endif
 AS=$(ASPROG)
 AS=$(ASPROG)
 LD=$(LDPROG)
 LD=$(LDPROG)
 RC=$(RCPROG)
 RC=$(RCPROG)
 AR=$(ARPROG)
 AR=$(ARPROG)
+NASM=$(NASMPROG)
 ifdef inUnix
 ifdef inUnix
 PPAS=./ppas$(SRCBATCHEXT)
 PPAS=./ppas$(SRCBATCHEXT)
 else
 else
@@ -1453,7 +1471,7 @@ endif
 ifdef ACROSSCOMPILE
 ifdef ACROSSCOMPILE
 override FPCOPT+=$(CROSSOPT)
 override FPCOPT+=$(CROSSOPT)
 endif
 endif
-override COMPILER:=$(FPC) $(FPCOPT)
+override COMPILER:=$(strip $(FPC) $(FPCOPT))
 ifeq (,$(findstring -s ,$(COMPILER)))
 ifeq (,$(findstring -s ,$(COMPILER)))
 EXECPPAS=
 EXECPPAS=
 else
 else
@@ -2366,6 +2384,14 @@ TARGET_DIRS_PACKAGES=1
 TARGET_DIRS_IDE=1
 TARGET_DIRS_IDE=1
 TARGET_DIRS_INSTALLER=1
 TARGET_DIRS_INSTALLER=1
 endif
 endif
+ifeq ($(FULL_TARGET),mipsel-android)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_IDE=1
+TARGET_DIRS_INSTALLER=1
+endif
 ifeq ($(FULL_TARGET),jvm-java)
 ifeq ($(FULL_TARGET),jvm-java)
 TARGET_DIRS_COMPILER=1
 TARGET_DIRS_COMPILER=1
 TARGET_DIRS_RTL=1
 TARGET_DIRS_RTL=1

+ 8 - 2
Makefile.fpc

@@ -21,7 +21,7 @@ rule=help
 
 
 [prerules]
 [prerules]
 REQUIREDVERSION=2.6.2
 REQUIREDVERSION=2.6.2
-REQUIREDVERSION2=2.6.2
+REQUIREDVERSION2=2.6.4
 
 
 
 
 # make versions < 3.77 (OS2 version) are buggy
 # make versions < 3.77 (OS2 version) are buggy
@@ -76,6 +76,9 @@ endif
 ifeq ($(CPU_TARGET),i8086)
 ifeq ($(CPU_TARGET),i8086)
 PPSUF=8086
 PPSUF=8086
 endif
 endif
+ifeq ($(CPU_TARGET),avr)
+PPSUF=avr
+endif
 
 
 # cross compilers uses full cpu_target, not just ppc-suffix
 # cross compilers uses full cpu_target, not just ppc-suffix
 # (except if the target cannot run a native compiler)
 # (except if the target cannot run a native compiler)
@@ -179,8 +182,11 @@ endif
 endif
 endif
 
 
 # Build/install options
 # Build/install options
+ifneq ($(OPT),)
+OPTNEW+=$(OPT)
+endif
 CLEANOPTS=FPC=$(PPNEW)
 CLEANOPTS=FPC=$(PPNEW)
-BUILDOPTS=FPC=$(PPNEW) FPCFPMAKE=$(FPCFPMAKENEW) RELEASE=1
+BUILDOPTS=FPC=$(PPNEW) FPCFPMAKE=$(FPCFPMAKENEW) RELEASE=1 'OPT=$(OPTNEW)'
 INSTALLOPTS=FPC=$(PPNEW) ZIPDESTDIR=$(BASEDIR) FPCMAKE=$(FPCMAKENEW)
 INSTALLOPTS=FPC=$(PPNEW) ZIPDESTDIR=$(BASEDIR) FPCMAKE=$(FPCMAKENEW)
 
 
 # Compile also IDE (check for ide and fv dir)
 # Compile also IDE (check for ide and fv dir)

+ 50 - 11
compiler/Makefile

@@ -1,8 +1,8 @@
 #
 #
-# Don't edit, this file is generated by FPCMake Version 2.0.0 [2013/10/05]
+# Don't edit, this file is generated by FPCMake Version 2.0.0 [2014-02-06 rev 26692]
 #
 #
 default: all
 default: all
-MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim i386-android m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii powerpc-aix sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-netbsd x86_64-solaris x86_64-openbsd x86_64-darwin x86_64-win64 x86_64-embedded arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian arm-android powerpc64-linux powerpc64-darwin powerpc64-embedded powerpc64-aix avr-embedded armeb-linux armeb-embedded mips-linux mipsel-linux jvm-java jvm-android i8086-msdos
+MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim i386-android m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii powerpc-aix sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-netbsd x86_64-solaris x86_64-openbsd x86_64-darwin x86_64-win64 x86_64-embedded arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian arm-android powerpc64-linux powerpc64-darwin powerpc64-embedded powerpc64-aix avr-embedded armeb-linux armeb-embedded mips-linux mipsel-linux mipsel-android jvm-java jvm-android i8086-msdos
 BSDs = freebsd netbsd openbsd darwin
 BSDs = freebsd netbsd openbsd darwin
 UNIXs = linux $(BSDs) solaris qnx haiku aix
 UNIXs = linux $(BSDs) solaris qnx haiku aix
 LIMIT83fs = go32v2 os2 emx watcom msdos
 LIMIT83fs = go32v2 os2 emx watcom msdos
@@ -276,7 +276,7 @@ else
 ifeq ($(CPU_TARGET),i386)
 ifeq ($(CPU_TARGET),i386)
 BINUTILSPREFIX=i686-linux-android-
 BINUTILSPREFIX=i686-linux-android-
 else
 else
-ifeq ($(CPU_TARGET),mips)
+ifeq ($(CPU_TARGET),mipsel)
 BINUTILSPREFIX=mipsel-linux-android-
 BINUTILSPREFIX=mipsel-linux-android-
 endif
 endif
 endif
 endif
@@ -402,18 +402,24 @@ override LOCALOPT+=$(OPTLEVEL2)
 override RTLOPT+=$(OPTLEVEL2)
 override RTLOPT+=$(OPTLEVEL2)
 override LOCALOPT+=$(LOCALOPTLEVEL2)
 override LOCALOPT+=$(LOCALOPTLEVEL2)
 override RTLOPT+=$(RTLOPTLEVEL2)
 override RTLOPT+=$(RTLOPTLEVEL2)
+override LOCALOPT+=$(OPTNEW)
+override RTLOPT+=$(OPTNEW)
 endif
 endif
 ifeq ($(CYCLELEVEL),3)
 ifeq ($(CYCLELEVEL),3)
 override LOCALOPT+=$(OPTLEVEL3)
 override LOCALOPT+=$(OPTLEVEL3)
 override RTLOPT+=$(OPTLEVEL3)
 override RTLOPT+=$(OPTLEVEL3)
 override LOCALOPT+=$(LOCALOPTLEVEL3)
 override LOCALOPT+=$(LOCALOPTLEVEL3)
 override RTLOPT+=$(RTLOPTLEVEL3)
 override RTLOPT+=$(RTLOPTLEVEL3)
+override LOCALOPT+=$(OPTNEW)
+override RTLOPT+=$(OPTNEW)
 endif
 endif
 ifeq ($(CYCLELEVEL),4)
 ifeq ($(CYCLELEVEL),4)
 override LOCALOPT+=$(OPTLEVEL4)
 override LOCALOPT+=$(OPTLEVEL4)
 override RTLOPT+=$(OPTLEVEL4)
 override RTLOPT+=$(OPTLEVEL4)
 override LOCALOPT+=$(LOCALOPTLEVEL4)
 override LOCALOPT+=$(LOCALOPTLEVEL4)
 override RTLOPT+=$(RTLOPTLEVEL4)
 override RTLOPT+=$(RTLOPTLEVEL4)
+override LOCALOPT+=$(OPTNEW)
+override RTLOPT+=$(OPTNEW)
 endif
 endif
 endif
 endif
 override OPT=
 override OPT=
@@ -743,6 +749,9 @@ endif
 ifeq ($(FULL_TARGET),mipsel-linux)
 ifeq ($(FULL_TARGET),mipsel-linux)
 override TARGET_DIRS+=utils
 override TARGET_DIRS+=utils
 endif
 endif
+ifeq ($(FULL_TARGET),mipsel-android)
+override TARGET_DIRS+=utils
+endif
 ifeq ($(FULL_TARGET),jvm-java)
 ifeq ($(FULL_TARGET),jvm-java)
 override TARGET_DIRS+=utils
 override TARGET_DIRS+=utils
 endif
 endif
@@ -962,6 +971,9 @@ endif
 ifeq ($(FULL_TARGET),mipsel-linux)
 ifeq ($(FULL_TARGET),mipsel-linux)
 override TARGET_PROGRAMS+=pp
 override TARGET_PROGRAMS+=pp
 endif
 endif
+ifeq ($(FULL_TARGET),mipsel-android)
+override TARGET_PROGRAMS+=pp
+endif
 ifeq ($(FULL_TARGET),jvm-java)
 ifeq ($(FULL_TARGET),jvm-java)
 override TARGET_PROGRAMS+=pp
 override TARGET_PROGRAMS+=pp
 endif
 endif
@@ -1182,6 +1194,9 @@ endif
 ifeq ($(FULL_TARGET),mipsel-linux)
 ifeq ($(FULL_TARGET),mipsel-linux)
 override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
 override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
 endif
 endif
+ifeq ($(FULL_TARGET),mipsel-android)
+override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
+endif
 ifeq ($(FULL_TARGET),jvm-java)
 ifeq ($(FULL_TARGET),jvm-java)
 override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
 override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
 endif
 endif
@@ -1401,6 +1416,9 @@ endif
 ifeq ($(FULL_TARGET),mipsel-linux)
 ifeq ($(FULL_TARGET),mipsel-linux)
 override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
 override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
 endif
 endif
+ifeq ($(FULL_TARGET),mipsel-android)
+override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
+endif
 ifeq ($(FULL_TARGET),jvm-java)
 ifeq ($(FULL_TARGET),jvm-java)
 override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
 override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
 endif
 endif
@@ -1620,6 +1638,9 @@ endif
 ifeq ($(FULL_TARGET),mipsel-linux)
 ifeq ($(FULL_TARGET),mipsel-linux)
 override COMPILER_TARGETDIR+=.
 override COMPILER_TARGETDIR+=.
 endif
 endif
+ifeq ($(FULL_TARGET),mipsel-android)
+override COMPILER_TARGETDIR+=.
+endif
 ifeq ($(FULL_TARGET),jvm-java)
 ifeq ($(FULL_TARGET),jvm-java)
 override COMPILER_TARGETDIR+=.
 override COMPILER_TARGETDIR+=.
 endif
 endif
@@ -1839,6 +1860,9 @@ endif
 ifeq ($(FULL_TARGET),mipsel-linux)
 ifeq ($(FULL_TARGET),mipsel-linux)
 override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 endif
 endif
+ifeq ($(FULL_TARGET),mipsel-android)
+override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
+endif
 ifeq ($(FULL_TARGET),jvm-java)
 ifeq ($(FULL_TARGET),jvm-java)
 override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 endif
 endif
@@ -2421,6 +2445,7 @@ ASNAME=$(BINUTILSPREFIX)as
 LDNAME=$(BINUTILSPREFIX)ld
 LDNAME=$(BINUTILSPREFIX)ld
 ARNAME=$(BINUTILSPREFIX)ar
 ARNAME=$(BINUTILSPREFIX)ar
 RCNAME=$(BINUTILSPREFIX)rc
 RCNAME=$(BINUTILSPREFIX)rc
+NASMNAME=$(BINUTILSPREFIX)nasm
 ifndef ASPROG
 ifndef ASPROG
 ifdef CROSSBINDIR
 ifdef CROSSBINDIR
 ASPROG=$(CROSSBINDIR)/$(ASNAME)$(SRCEXEEXT)
 ASPROG=$(CROSSBINDIR)/$(ASNAME)$(SRCEXEEXT)
@@ -2449,10 +2474,18 @@ else
 ARPROG=$(ARNAME)
 ARPROG=$(ARNAME)
 endif
 endif
 endif
 endif
+ifndef NASMPROG
+ifdef CROSSBINDIR
+NASMPROG=$(CROSSBINDIR)/$(NASMNAME)$(SRCEXEEXT)
+else
+NASMPROG=$(NASMNAME)
+endif
+endif
 AS=$(ASPROG)
 AS=$(ASPROG)
 LD=$(LDPROG)
 LD=$(LDPROG)
 RC=$(RCPROG)
 RC=$(RCPROG)
 AR=$(ARPROG)
 AR=$(ARPROG)
+NASM=$(NASMPROG)
 ifdef inUnix
 ifdef inUnix
 PPAS=./ppas$(SRCBATCHEXT)
 PPAS=./ppas$(SRCBATCHEXT)
 else
 else
@@ -2688,6 +2721,9 @@ endif
 ifeq ($(FULL_TARGET),mipsel-linux)
 ifeq ($(FULL_TARGET),mipsel-linux)
 REQUIRE_PACKAGES_RTL=1
 REQUIRE_PACKAGES_RTL=1
 endif
 endif
+ifeq ($(FULL_TARGET),mipsel-android)
+REQUIRE_PACKAGES_RTL=1
+endif
 ifeq ($(FULL_TARGET),jvm-java)
 ifeq ($(FULL_TARGET),jvm-java)
 REQUIRE_PACKAGES_RTL=1
 REQUIRE_PACKAGES_RTL=1
 endif
 endif
@@ -2877,7 +2913,7 @@ endif
 ifdef ACROSSCOMPILE
 ifdef ACROSSCOMPILE
 override FPCOPT+=$(CROSSOPT)
 override FPCOPT+=$(CROSSOPT)
 endif
 endif
-override COMPILER:=$(FPC) $(FPCOPT)
+override COMPILER:=$(strip $(FPC) $(FPCOPT))
 ifeq (,$(findstring -s ,$(COMPILER)))
 ifeq (,$(findstring -s ,$(COMPILER)))
 EXECPPAS=
 EXECPPAS=
 else
 else
@@ -3533,6 +3569,9 @@ endif
 ifeq ($(FULL_TARGET),mipsel-linux)
 ifeq ($(FULL_TARGET),mipsel-linux)
 TARGET_DIRS_UTILS=1
 TARGET_DIRS_UTILS=1
 endif
 endif
+ifeq ($(FULL_TARGET),mipsel-android)
+TARGET_DIRS_UTILS=1
+endif
 ifeq ($(FULL_TARGET),jvm-java)
 ifeq ($(FULL_TARGET),jvm-java)
 TARGET_DIRS_UTILS=1
 TARGET_DIRS_UTILS=1
 endif
 endif
@@ -3798,13 +3837,13 @@ ifdef RELEASE
 DOWPOCYCLE=1
 DOWPOCYCLE=1
 wpocycle:
 wpocycle:
 	$(RM) $(EXENAME)
 	$(RM) $(EXENAME)
-	$(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3)' 'OPT=$(LOCALOPT) $(OPTWPOCOLLECT)' compiler
+	$(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3)' 'OPT=$(strip $(LOCALOPT) $(OPTWPOCOLLECT) $(OPTNEW))' compiler
 	$(RM) $(EXENAME)
 	$(RM) $(EXENAME)
-	$(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3)' 'OPT=$(RTLOPT) $(OPTWPOPERFORM)' rtlclean rtl
-	$(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3)' 'OPT=$(LOCALOPT) $(OPTWPOPERFORM) $(subst pp1.wpo,pp2.wpo,$(OPTWPOCOLLECT))' $(addsuffix _clean,$(ALLTARGETS)) compiler
+	$(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3)' 'OPT=$(strip $(RTLOPT) $(OPTWPOPERFORM) $(OPTNEW))' rtlclean rtl
+	$(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3)' 'OPT=$(strip $(LOCALOPT) $(OPTNEW) $(OPTWPOPERFORM) $(subst pp1.wpo,pp2.wpo,$(OPTWPOCOLLECT)))' $(addsuffix _clean,$(ALLTARGETS)) compiler
 	$(MOVE) $(EXENAME) $(TEMPWPONAME1)
 	$(MOVE) $(EXENAME) $(TEMPWPONAME1)
-	$(MAKE) 'FPC=$(BASEDIR)/$(TEMPWPONAME1)' 'OPT=$(RTLOPT) $(subst pp1.wpo,pp2.wpo,$(OPTWPOPERFORM))' rtlclean rtl
-	$(MAKE) 'FPC=$(BASEDIR)/$(TEMPWPONAME1)' 'OPT=$(LOCALOPT) $(subst pp1.wpo,pp2.wpo,$(OPTWPOPERFORM))' $(addsuffix _clean,$(ALLTARGETS)) compiler
+	$(MAKE) 'FPC=$(BASEDIR)/$(TEMPWPONAME1)' 'OPT=$(strip $(RTLOPT) $(OPTNEW) $(subst pp1.wpo,pp2.wpo,$(OPTWPOPERFORM)))' rtlclean rtl
+	$(MAKE) 'FPC=$(BASEDIR)/$(TEMPWPONAME1)' 'OPT=$(strip $(LOCALOPT) $(OPTNEW) $(subst pp1.wpo,pp2.wpo,$(OPTWPOPERFORM)))' $(addsuffix _clean,$(ALLTARGETS)) compiler
 	$(COPY) $(EXENAME) $(TEMPWPONAME2)
 	$(COPY) $(EXENAME) $(TEMPWPONAME2)
 endif
 endif
 endif
 endif
@@ -3873,12 +3912,12 @@ cycle: override FPC=
 cycle:
 cycle:
 	$(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl CYCLELEVEL=1
 	$(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl CYCLELEVEL=1
 	$(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) EXENAME=$(TEMPNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=1
 	$(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) EXENAME=$(TEMPNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=1
-	$(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl CYCLELEVEL=
+	$(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl CYCLELEVEL=2
 	$(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) PPC_TARGET=$(CPU_TARGET) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=2
 	$(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) PPC_TARGET=$(CPU_TARGET) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=2
 ifndef CROSSINSTALL
 ifndef CROSSINSTALL
 	$(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' 'OPT=$(RTLOPT) $(CROSSOPT)' rtlclean rtl CYCLELEVEL=3
 	$(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' 'OPT=$(RTLOPT) $(CROSSOPT)' rtlclean rtl CYCLELEVEL=3
 ifndef NoNativeBinaries
 ifndef NoNativeBinaries
-	$(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' 'OPT=$(LOCALOPT) $(CROSSOPT)' cycleclean compiler CYCLELEVEL=3
+	$(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' 'OPT=$(strip $(LOCALOPT) $(CROSSOPT))' cycleclean compiler CYCLELEVEL=3
 endif
 endif
 endif
 endif
 endif
 endif

+ 13 - 7
compiler/Makefile.fpc

@@ -138,18 +138,24 @@ override LOCALOPT+=$(OPTLEVEL2)
 override RTLOPT+=$(OPTLEVEL2)
 override RTLOPT+=$(OPTLEVEL2)
 override LOCALOPT+=$(LOCALOPTLEVEL2)
 override LOCALOPT+=$(LOCALOPTLEVEL2)
 override RTLOPT+=$(RTLOPTLEVEL2)
 override RTLOPT+=$(RTLOPTLEVEL2)
+override LOCALOPT+=$(OPTNEW)
+override RTLOPT+=$(OPTNEW)
 endif
 endif
 ifeq ($(CYCLELEVEL),3)
 ifeq ($(CYCLELEVEL),3)
 override LOCALOPT+=$(OPTLEVEL3)
 override LOCALOPT+=$(OPTLEVEL3)
 override RTLOPT+=$(OPTLEVEL3)
 override RTLOPT+=$(OPTLEVEL3)
 override LOCALOPT+=$(LOCALOPTLEVEL3)
 override LOCALOPT+=$(LOCALOPTLEVEL3)
 override RTLOPT+=$(RTLOPTLEVEL3)
 override RTLOPT+=$(RTLOPTLEVEL3)
+override LOCALOPT+=$(OPTNEW)
+override RTLOPT+=$(OPTNEW)
 endif
 endif
 ifeq ($(CYCLELEVEL),4)
 ifeq ($(CYCLELEVEL),4)
 override LOCALOPT+=$(OPTLEVEL4)
 override LOCALOPT+=$(OPTLEVEL4)
 override RTLOPT+=$(OPTLEVEL4)
 override RTLOPT+=$(OPTLEVEL4)
 override LOCALOPT+=$(LOCALOPTLEVEL4)
 override LOCALOPT+=$(LOCALOPTLEVEL4)
 override RTLOPT+=$(RTLOPTLEVEL4)
 override RTLOPT+=$(RTLOPTLEVEL4)
+override LOCALOPT+=$(OPTNEW)
+override RTLOPT+=$(OPTNEW)
 endif
 endif
 endif
 endif
 
 
@@ -596,13 +602,13 @@ DOWPOCYCLE=1
 wpocycle:
 wpocycle:
 # don't use cycle_clean, it will delete the compiler utilities again
 # don't use cycle_clean, it will delete the compiler utilities again
         $(RM) $(EXENAME)
         $(RM) $(EXENAME)
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3)' 'OPT=$(LOCALOPT) $(OPTWPOCOLLECT)' compiler
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3)' 'OPT=$(strip $(LOCALOPT) $(OPTWPOCOLLECT) $(OPTNEW))' compiler
         $(RM) $(EXENAME)
         $(RM) $(EXENAME)
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3)' 'OPT=$(RTLOPT) $(OPTWPOPERFORM)' rtlclean rtl
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3)' 'OPT=$(LOCALOPT) $(OPTWPOPERFORM) $(subst pp1.wpo,pp2.wpo,$(OPTWPOCOLLECT))' $(addsuffix _clean,$(ALLTARGETS)) compiler
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3)' 'OPT=$(strip $(RTLOPT) $(OPTWPOPERFORM) $(OPTNEW))' rtlclean rtl
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME3)' 'OPT=$(strip $(LOCALOPT) $(OPTNEW) $(OPTWPOPERFORM) $(subst pp1.wpo,pp2.wpo,$(OPTWPOCOLLECT)))' $(addsuffix _clean,$(ALLTARGETS)) compiler
         $(MOVE) $(EXENAME) $(TEMPWPONAME1)
         $(MOVE) $(EXENAME) $(TEMPWPONAME1)
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPWPONAME1)' 'OPT=$(RTLOPT) $(subst pp1.wpo,pp2.wpo,$(OPTWPOPERFORM))' rtlclean rtl
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPWPONAME1)' 'OPT=$(LOCALOPT) $(subst pp1.wpo,pp2.wpo,$(OPTWPOPERFORM))' $(addsuffix _clean,$(ALLTARGETS)) compiler
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPWPONAME1)' 'OPT=$(strip $(RTLOPT) $(OPTNEW) $(subst pp1.wpo,pp2.wpo,$(OPTWPOPERFORM)))' rtlclean rtl
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPWPONAME1)' 'OPT=$(strip $(LOCALOPT) $(OPTNEW) $(subst pp1.wpo,pp2.wpo,$(OPTWPOPERFORM)))' $(addsuffix _clean,$(ALLTARGETS)) compiler
         $(COPY) $(EXENAME) $(TEMPWPONAME2)
         $(COPY) $(EXENAME) $(TEMPWPONAME2)
 endif
 endif
 endif
 endif
@@ -704,14 +710,14 @@ cycle:
         $(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl CYCLELEVEL=1
         $(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl CYCLELEVEL=1
         $(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) EXENAME=$(TEMPNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=1
         $(MAKE) OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) EXENAME=$(TEMPNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=1
 # ppcross<ARCH> (source native)
 # ppcross<ARCH> (source native)
-        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl CYCLELEVEL=
+        $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 rtlclean rtl CYCLELEVEL=2
         $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) PPC_TARGET=$(CPU_TARGET) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=2
         $(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) CPU_TARGET=$(CPU_SOURCE) PPC_TARGET=$(CPU_TARGET) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=2
 # ppc<ARCH> (target native)
 # ppc<ARCH> (target native)
 ifndef CROSSINSTALL
 ifndef CROSSINSTALL
         $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' 'OPT=$(RTLOPT) $(CROSSOPT)' rtlclean rtl CYCLELEVEL=3
         $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' 'OPT=$(RTLOPT) $(CROSSOPT)' rtlclean rtl CYCLELEVEL=3
 # building a native compiler for JVM and embedded targets is not possible
 # building a native compiler for JVM and embedded targets is not possible
 ifndef NoNativeBinaries
 ifndef NoNativeBinaries
-        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' 'OPT=$(LOCALOPT) $(CROSSOPT)' cycleclean compiler CYCLELEVEL=3
+        $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' 'OPT=$(strip $(LOCALOPT) $(CROSSOPT))' cycleclean compiler CYCLELEVEL=3
 endif
 endif
 endif
 endif
 
 

+ 6 - 1
compiler/aasmtai.pas

@@ -147,7 +147,10 @@ interface
           aitconst_64bit_unaligned,
           aitconst_64bit_unaligned,
           { i8086 far pointer; emits: 'DW symbol, SEG symbol' }
           { i8086 far pointer; emits: 'DW symbol, SEG symbol' }
           aitconst_farptr,
           aitconst_farptr,
-          aitconst_got
+          { offset of symbol's GOT slot in GOT }
+          aitconst_got,
+          { offset of symbol itself from GOT }
+          aitconst_gotoff_symbol
         );
         );
 
 
     const
     const
@@ -1873,6 +1876,8 @@ implementation
             result:=LengthSleb128(value);
             result:=LengthSleb128(value);
           aitconst_half16bit:
           aitconst_half16bit:
             result:=2;
             result:=2;
+          aitconst_gotoff_symbol:
+            result:=4;
           else
           else
             internalerror(200603253);
             internalerror(200603253);
         end;
         end;

+ 77 - 32
compiler/aggas.pas

@@ -227,7 +227,7 @@ implementation
         );
         );
 
 
       { Generic unaligned pseudo-instructions, seems ELF specific }
       { Generic unaligned pseudo-instructions, seems ELF specific }
-      use_ua_elf_systems = [system_mipsel_linux,system_mipseb_linux];
+      use_ua_elf_systems = [system_mipsel_linux,system_mipseb_linux,system_mipsel_android];
       ait_ua_elf_const2str : array[aitconst_16bit_unaligned..aitconst_64bit_unaligned]
       ait_ua_elf_const2str : array[aitconst_16bit_unaligned..aitconst_64bit_unaligned]
         of string[20]=(
         of string[20]=(
           #9'.2byte'#9,#9'.4byte'#9,#9'.8byte'#9
           #9'.2byte'#9,#9'.4byte'#9,#9'.8byte'#9
@@ -652,28 +652,18 @@ implementation
               if not(target_info.system in (systems_darwin+systems_aix)) then
               if not(target_info.system in (systems_darwin+systems_aix)) then
                 begin
                 begin
 {$ifdef m68k}
 {$ifdef m68k}
-                  if assigned(lasthp) and
-                      (
-                        (lasthp.typ=ait_instruction) and
-                        (taicpu(lasthp).opcode<>A_JMP)
-                      ) or
-                      (
-                        (lasthp.typ=ait_label)
-                      ) then
+                  if not use_op and (lastsectype=sec_code) then
                     begin
                     begin
-                      if ispowerof2(alignment,i) then
-                        begin
-                          { the Coldfire manual suggests the TBF instruction for
-                            alignments, but somehow QEMU does not interpret that
-                            correctly... }
-                          {if current_settings.cputype in cpu_coldfire then
-                            instr:='0x51fc'
-                          else}
-                            instr:='0x4e71';
-                          AsmWrite(#9'.balignw '+tostr(alignment)+','+instr);
-                        end
-                      else
-                        internalerror(2012102101);
+                      if not ispowerof2(alignment,i) then
+                        internalerror(2014022201);
+                      { the Coldfire manual suggests the TBF instruction for
+                        alignments, but somehow QEMU does not interpret that
+                        correctly... }
+                      {if current_settings.cputype in cpu_coldfire then
+                        instr:='0x51fc'
+                      else}
+                        instr:='0x4e71';
+                      AsmWrite(#9'.balignw '+tostr(alignment)+','+instr);
                     end
                     end
                   else
                   else
                     begin
                     begin
@@ -961,6 +951,32 @@ implementation
                      AsmWrite(#9'.word'#9+tai_const(hp).sym.name+'(GOT)');
                      AsmWrite(#9'.word'#9+tai_const(hp).sym.name+'(GOT)');
                      Asmln;
                      Asmln;
                    end;
                    end;
+
+                 aitconst_gotoff_symbol:
+                   begin
+                     if (tai_const(hp).sym=nil) then
+                       InternalError(2014022601);
+                     case target_info.cpu of
+
+                       cpu_mipseb,cpu_mipsel:
+                         begin
+                           AsmWrite(#9'.gpword'#9);
+                           AsmWrite(tai_const(hp).sym.name);
+                         end;
+
+                       cpu_i386:
+                         begin
+                           AsmWrite(ait_const2str[aitconst_32bit]);
+                           AsmWrite(tai_const(hp).sym.name);
+                         end;
+                     else
+                       InternalError(2014022602);
+                     end;
+                     if (tai_const(hp).value<>0) then
+                       AsmWrite(tostr_with_plus(tai_const(hp).value));
+                     Asmln;
+                   end;
+
                  aitconst_uleb128bit,
                  aitconst_uleb128bit,
                  aitconst_sleb128bit,
                  aitconst_sleb128bit,
 {$ifdef cpu64bitaddr}
 {$ifdef cpu64bitaddr}
@@ -1336,11 +1352,17 @@ implementation
 {$endif arm}
 {$endif arm}
            ait_set:
            ait_set:
              begin
              begin
-               AsmWriteLn(#9'.set '+tai_set(hp).sym^+', '+tai_set(hp).value^);
+               if replaceforbidden then
+                 AsmWriteLn(#9'.set '+ReplaceForbiddenAsmSymbolChars(tai_set(hp).sym^)+', '+ReplaceForbiddenAsmSymbolChars(tai_set(hp).value^))
+               else
+                 AsmWriteLn(#9'.set '+tai_set(hp).sym^+', '+tai_set(hp).value^);
              end;
              end;
            ait_weak:
            ait_weak:
              begin
              begin
-               AsmWriteLn(#9'.weak '+tai_weak(hp).sym^);
+               if replaceforbidden then
+                 AsmWriteLn(#9'.weak '+ReplaceForbiddenAsmSymbolChars(tai_weak(hp).sym^))
+               else
+                 AsmWriteLn(#9'.weak '+tai_weak(hp).sym^);
              end;
              end;
            ait_symbol_end :
            ait_symbol_end :
              begin
              begin
@@ -1518,6 +1540,7 @@ implementation
 
 
       begin
       begin
         pos:=0;
         pos:=0;
+        instring:=false;
         for i:=1 to hp.len do
         for i:=1 to hp.len do
           begin
           begin
             if pos=0 then
             if pos=0 then
@@ -1667,9 +1690,12 @@ implementation
 
 
       for hal:=low(TasmlistType) to high(TasmlistType) do
       for hal:=low(TasmlistType) to high(TasmlistType) do
         begin
         begin
-          AsmWriteLn(target_asm.comment+'Begin asmlist '+AsmlistTypeStr[hal]);
-          writetree(current_asmdata.asmlists[hal]);
-          AsmWriteLn(target_asm.comment+'End asmlist '+AsmlistTypeStr[hal]);
+          if not (current_asmdata.asmlists[hal].empty) then
+            begin
+              AsmWriteLn(target_asm.comment+'Begin asmlist '+AsmlistTypeStr[hal]);
+              writetree(current_asmdata.asmlists[hal]);
+              AsmWriteLn(target_asm.comment+'End asmlist '+AsmlistTypeStr[hal]);
+            end;
         end;
         end;
 
 
       { add weak symbol markers }
       { add weak symbol markers }
@@ -1681,8 +1707,9 @@ implementation
          (target_info.system in systems_darwin) then
          (target_info.system in systems_darwin) then
         AsmWriteLn(#9'.subsections_via_symbols');
         AsmWriteLn(#9'.subsections_via_symbols');
 
 
-      { "no executable stack" marker for Linux }
-      if (target_info.system in (systems_linux + systems_android)) and
+      { "no executable stack" marker }
+      { TODO: used by OpenBSD/NetBSD as well? }
+      if (target_info.system in (systems_linux + systems_android + systems_freebsd)) and
          not(cs_executable_stack in current_settings.moduleswitches) then
          not(cs_executable_stack in current_settings.moduleswitches) then
         begin
         begin
           AsmWriteLn('.section .note.GNU-stack,"",%progbits');
           AsmWriteLn('.section .note.GNU-stack,"",%progbits');
@@ -1791,7 +1818,10 @@ implementation
               end;
               end;
             sec_objc_image_info:
             sec_objc_image_info:
               begin
               begin
-                result:='.section __OBJC, __image_info, regular, no_dead_strip';
+                if (target_info.system in systems_objc_nfabi) then
+                  result:='.section __DATA,__objc_imageinfo,regular,no_dead_strip'
+                else
+                  result:='.section __OBJC, __image_info, regular, no_dead_strip';
                 exit;
                 exit;
               end;
               end;
             sec_objc_cstring_object:
             sec_objc_cstring_object:
@@ -1820,12 +1850,27 @@ implementation
                     exit;
                     exit;
                   end;
                   end;
               end;
               end;
-            sec_objc_meth_var_names,
+            sec_objc_meth_var_types:
+              begin
+                if (target_info.system in systems_objc_nfabi) then
+                  begin
+                    result:='.section __TEXT,__objc_methtype,cstring_literals';
+                    exit
+                  end;
+              end;
+            sec_objc_meth_var_names:
+              begin
+                if (target_info.system in systems_objc_nfabi) then
+                  begin
+                    result:='.section __TEXT,__objc_methname,cstring_literals';
+                    exit
+                  end;
+              end;
             sec_objc_class_names:
             sec_objc_class_names:
               begin
               begin
                 if (target_info.system in systems_objc_nfabi) then
                 if (target_info.system in systems_objc_nfabi) then
                   begin
                   begin
-                    result:='.cstring';
+                    result:='.section __TEXT,__objc_classname,cstring_literals';
                     exit
                     exit
                   end;
                   end;
               end;
               end;

+ 3 - 2
compiler/agjasmin.pas

@@ -687,7 +687,8 @@ implementation
        if cs_asm_extern in current_settings.globalswitches then
        if cs_asm_extern in current_settings.globalswitches then
          Replace(result,'$JASMINJAR',maybequoted(ScriptFixFileName(jasminjar)))
          Replace(result,'$JASMINJAR',maybequoted(ScriptFixFileName(jasminjar)))
        else
        else
-         Replace(result,'$JASMINJAR',ScriptFixFileName(jasminjar))
+         Replace(result,'$JASMINJAR',ScriptFixFileName(jasminjar));
+       Replace(result,'$EXTRAOPT',asmextraopt);
      end;
      end;
 
 
 
 
@@ -1223,7 +1224,7 @@ implementation
          id     : as_jvm_jasmin;
          id     : as_jvm_jasmin;
          idtxt  : 'Jasmin';
          idtxt  : 'Jasmin';
          asmbin : 'java';
          asmbin : 'java';
-         asmcmd : '-jar $JASMINJAR $ASM -d $OBJDIR';
+         asmcmd : '-jar $JASMINJAR $ASM $EXTRAOPT -d $OBJDIR';
          supported_targets : [system_jvm_java32,system_jvm_android32];
          supported_targets : [system_jvm_java32,system_jvm_android32];
          flags : [];
          flags : [];
          labelprefix : 'L';
          labelprefix : 'L';

+ 121 - 113
compiler/aoptobj.pas

@@ -1306,130 +1306,138 @@ Unit AoptObj;
     procedure TAOptObj.PeepHoleOptPass1;
     procedure TAOptObj.PeepHoleOptPass1;
       var
       var
         p,hp1,hp2 : tai;
         p,hp1,hp2 : tai;
+        stoploop:boolean;
       begin
       begin
-        p := BlockStart;
-        ClearUsedRegs;
-        while (p <> BlockEnd) Do
-          begin
-            { I'am not sure why this is done, UsedRegs should reflect the register usage before the instruction
-              If an instruction needs the information of this, it can easily create a TempUsedRegs (FK)
-            UpdateUsedRegs(tai(p.next));
-            }
-{$ifdef DEBUG_OPTALLOC}
-            if p.Typ=ait_instruction then
-              InsertLLItem(tai(p.Previous),p,tai_comment.create(strpnew(GetAllocationString(UsedRegs))));
-{$endif DEBUG_OPTALLOC}
-            if PeepHoleOptPass1Cpu(p) then
-              begin
-                UpdateUsedRegs(p);
-                continue;
-              end;
-            case p.Typ Of
-              ait_instruction:
+        repeat
+          stoploop:=true;
+          p := BlockStart;
+          ClearUsedRegs;
+          while (p <> BlockEnd) Do
+            begin
+              { I'am not sure why this is done, UsedRegs should reflect the register usage before the instruction
+                If an instruction needs the information of this, it can easily create a TempUsedRegs (FK)
+              UpdateUsedRegs(tai(p.next));
+              }
+  {$ifdef DEBUG_OPTALLOC}
+              if p.Typ=ait_instruction then
+                InsertLLItem(tai(p.Previous),p,tai_comment.create(strpnew(GetAllocationString(UsedRegs))));
+  {$endif DEBUG_OPTALLOC}
+              if PeepHoleOptPass1Cpu(p) then
                 begin
                 begin
-                  { Handle Jmp Optimizations }
-                  if taicpu(p).is_jmp then
-                    begin
-                      { the following if-block removes all code between a jmp and the next label,
-                        because it can never be executed
-                      }
-                      if IsJumpToLabel(taicpu(p)) then
-                        begin
-                          hp2:=p;
-                          while GetNextInstruction(hp2, hp1) and
-                                (hp1.typ <> ait_label) do
-                            if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then
-                              begin
-                                if (hp1.typ = ait_instruction) and
-                                   taicpu(hp1).is_jmp and
-                                   (JumpTargetOp(taicpu(hp1))^.typ = top_ref) and
-                                   (JumpTargetOp(taicpu(hp1))^.ref^.symbol is TAsmLabel) then
-                                   TAsmLabel(JumpTargetOp(taicpu(hp1))^.ref^.symbol).decrefs;
-                                { don't kill start/end of assembler block,
-                                  no-line-info-start/end etc }
-                                if hp1.typ<>ait_marker then
-                                  begin
-{$if defined(SPARC) or defined(MIPS) }
-                                    if (hp1.typ=ait_instruction) and (taicpu(hp1).is_jmp) then
-                                      RemoveDelaySlot(hp1);
-{$endif SPARC or MIPS }
-                                    asml.remove(hp1);
-                                    hp1.free;
-                                  end
-                                else
-                                  hp2:=hp1;
-                              end
-                            else break;
-                          end;
-                      { remove jumps to a label coming right after them }
-                      if GetNextInstruction(p, hp1) then
-                        begin
-                          SkipEntryExitMarker(hp1,hp1);
-                          if FindLabel(tasmlabel(JumpTargetOp(taicpu(p))^.ref^.symbol), hp1) and
-        { TODO: FIXME removing the first instruction fails}
-                              (p<>blockstart) then
-                            begin
-                              tasmlabel(JumpTargetOp(taicpu(p))^.ref^.symbol).decrefs;
-{$if defined(SPARC) or defined(MIPS)}
-                              RemoveDelaySlot(p);
-{$endif SPARC or MIPS}
-                              hp2:=tai(hp1.next);
-                              asml.remove(p);
-                              p.free;
-                              p:=hp2;
-                              continue;
-                            end
-                          else if assigned(hp1) then
-                            begin
-                              if hp1.typ = ait_label then
-                                SkipLabels(hp1,hp1);
-                              if (tai(hp1).typ=ait_instruction) and
-                                  IsJumpToLabel(taicpu(hp1)) and
-                                  GetNextInstruction(hp1, hp2) and
-                                  FindLabel(tasmlabel(JumpTargetOp(taicpu(p))^.ref^.symbol), hp2) then
+                  stoploop:=false;
+                  UpdateUsedRegs(p);
+                  continue;
+                end;
+              case p.Typ Of
+                ait_instruction:
+                  begin
+                    { Handle Jmp Optimizations }
+                    if taicpu(p).is_jmp then
+                      begin
+                        { the following if-block removes all code between a jmp and the next label,
+                          because it can never be executed
+                        }
+                        if IsJumpToLabel(taicpu(p)) then
+                          begin
+                            hp2:=p;
+                            while GetNextInstruction(hp2, hp1) and
+                                  (hp1.typ <> ait_label) do
+                              if not(hp1.typ in ([ait_label,ait_align]+skipinstr)) then
                                 begin
                                 begin
-                                  if (taicpu(p).opcode=aopt_condjmp)
-{$ifdef arm}
-                                    and (taicpu(p).condition<>C_None)
-{$endif arm}
-                                  then
+                                  if (hp1.typ = ait_instruction) and
+                                     taicpu(hp1).is_jmp and
+                                     (JumpTargetOp(taicpu(hp1))^.typ = top_ref) and
+                                     (JumpTargetOp(taicpu(hp1))^.ref^.symbol is TAsmLabel) then
+                                     TAsmLabel(JumpTargetOp(taicpu(hp1))^.ref^.symbol).decrefs;
+                                  { don't kill start/end of assembler block,
+                                    no-line-info-start/end etc }
+                                  if hp1.typ<>ait_marker then
                                     begin
                                     begin
-                                      taicpu(p).condition:=inverse_cond(taicpu(p).condition);
-                                      tai_label(hp2).labsym.decrefs;
-                                      JumpTargetOp(taicpu(p))^.ref^.symbol:=JumpTargetOp(taicpu(hp1))^.ref^.symbol;
-                                      { when freeing hp1, the reference count
-                                        isn't decreased, so don't increase
-
-                                       taicpu(p).oper[0]^.ref^.symbol.increfs;
-                                      }
-{$if defined(SPARC) or defined(MIPS)}
-                                      RemoveDelaySlot(hp1);
-{$endif SPARC or MIPS}
+  {$if defined(SPARC) or defined(MIPS) }
+                                      if (hp1.typ=ait_instruction) and (taicpu(hp1).is_jmp) then
+                                        RemoveDelaySlot(hp1);
+  {$endif SPARC or MIPS }
                                       asml.remove(hp1);
                                       asml.remove(hp1);
                                       hp1.free;
                                       hp1.free;
-                                      GetFinalDestination(taicpu(p),0);
+                                      stoploop:=false;
                                     end
                                     end
                                   else
                                   else
-                                    begin
-                                      GetFinalDestination(taicpu(p),0);
-                                      p:=tai(p.next);
-                                      continue;
-                                    end;
+                                    hp2:=hp1;
                                 end
                                 end
-                              else
-                                GetFinalDestination(taicpu(p),0);
+                              else break;
                             end;
                             end;
-                        end;
-                    end
-                  else
-                  { All other optimizes }
-                    begin
-                    end; { if is_jmp }
-                end;
+                        { remove jumps to a label coming right after them }
+                        if GetNextInstruction(p, hp1) then
+                          begin
+                            SkipEntryExitMarker(hp1,hp1);
+                            if FindLabel(tasmlabel(JumpTargetOp(taicpu(p))^.ref^.symbol), hp1) and
+          { TODO: FIXME removing the first instruction fails}
+                                (p<>blockstart) then
+                              begin
+                                tasmlabel(JumpTargetOp(taicpu(p))^.ref^.symbol).decrefs;
+  {$if defined(SPARC) or defined(MIPS)}
+                                RemoveDelaySlot(p);
+  {$endif SPARC or MIPS}
+                                hp2:=tai(hp1.next);
+                                asml.remove(p);
+                                p.free;
+                                p:=hp2;
+                                stoploop:=false;
+                                continue;
+                              end
+                            else if assigned(hp1) then
+                              begin
+                                if hp1.typ = ait_label then
+                                  SkipLabels(hp1,hp1);
+                                if (tai(hp1).typ=ait_instruction) and
+                                    IsJumpToLabel(taicpu(hp1)) and
+                                    GetNextInstruction(hp1, hp2) and
+                                    FindLabel(tasmlabel(JumpTargetOp(taicpu(p))^.ref^.symbol), hp2) then
+                                  begin
+                                    if (taicpu(p).opcode=aopt_condjmp)
+  {$ifdef arm}
+                                      and (taicpu(p).condition<>C_None)
+  {$endif arm}
+                                    then
+                                      begin
+                                        taicpu(p).condition:=inverse_cond(taicpu(p).condition);
+                                        tai_label(hp2).labsym.decrefs;
+                                        JumpTargetOp(taicpu(p))^.ref^.symbol:=JumpTargetOp(taicpu(hp1))^.ref^.symbol;
+                                        { when freeing hp1, the reference count
+                                          isn't decreased, so don't increase
+
+                                         taicpu(p).oper[0]^.ref^.symbol.increfs;
+                                        }
+  {$if defined(SPARC) or defined(MIPS)}
+                                        RemoveDelaySlot(hp1);
+  {$endif SPARC or MIPS}
+                                        asml.remove(hp1);
+                                        hp1.free;
+                                        stoploop:=false;
+                                        GetFinalDestination(taicpu(p),0);
+                                      end
+                                    else
+                                      begin
+                                        GetFinalDestination(taicpu(p),0);
+                                        p:=tai(p.next);
+                                        continue;
+                                      end;
+                                  end
+                                else
+                                  GetFinalDestination(taicpu(p),0);
+                              end;
+                          end;
+                      end
+                    else
+                    { All other optimizes }
+                      begin
+                      end; { if is_jmp }
+                  end;
+              end;
+              UpdateUsedRegs(p);
+              p:=tai(p.next);
             end;
             end;
-            UpdateUsedRegs(p);
-            p:=tai(p.next);
-          end;
+        until stoploop or not(cs_opt_level3 in current_settings.optimizerswitches);
       end;
       end;
 
 
 
 

+ 4 - 7
compiler/arm/aasmcpu.pas

@@ -770,8 +770,7 @@ implementation
             else
             else
               result:=operand_read;
               result:=operand_read;
           A_STREX:
           A_STREX:
-            if opnr in [0,1,2] then
-              result:=operand_write;
+            result:=operand_write;
           else
           else
             internalerror(200403151);
             internalerror(200403151);
         end;
         end;
@@ -1032,6 +1031,7 @@ implementation
                 end;
                 end;
             end;
             end;
             { special case for case jump tables }
             { special case for case jump tables }
+            penalty:=0;
             if SimpleGetNextInstruction(curtai,hp) and
             if SimpleGetNextInstruction(curtai,hp) and
               (tai(hp).typ=ait_instruction) then
               (tai(hp).typ=ait_instruction) then
               begin
               begin
@@ -1081,12 +1081,8 @@ implementation
                   A_ITTTT:
                   A_ITTTT:
                     if GenerateThumb2Code then
                     if GenerateThumb2Code then
                       penalty:=4*multiplier;
                       penalty:=4*multiplier;
-                  else
-                    penalty:=0;
                 end;
                 end;
-              end
-            else
-              penalty:=0;
+              end;
 
 
             { FLD/FST VFP instructions have a limit of +/- 1024, not 4096 }
             { FLD/FST VFP instructions have a limit of +/- 1024, not 4096 }
             if SimpleGetNextInstruction(curtai,hp) and
             if SimpleGetNextInstruction(curtai,hp) and
@@ -2102,6 +2098,7 @@ implementation
 
 
       begin
       begin
         bytes:=$0;
         bytes:=$0;
+        i_field:=0;
         { evaluate and set condition code }
         { evaluate and set condition code }
 
 
         { condition code allowed? }
         { condition code allowed? }

+ 2 - 2
compiler/arm/agarmgas.pas

@@ -356,7 +356,7 @@ unit agarmgas;
 
 
             idtxt  : 'AS';
             idtxt  : 'AS';
             asmbin : 'as';
             asmbin : 'as';
-            asmcmd : '-o $OBJ $ASM';
+            asmcmd : '-o $OBJ $EXTRAOPT $ASM';
             supported_targets : [system_arm_linux,system_arm_wince,system_arm_gba,system_arm_palmos,system_arm_nds,
             supported_targets : [system_arm_linux,system_arm_wince,system_arm_gba,system_arm_palmos,system_arm_nds,
                                  system_arm_embedded,system_arm_symbian,system_arm_android];
                                  system_arm_embedded,system_arm_symbian,system_arm_android];
             flags : [af_needar,af_smartlink_sections];
             flags : [af_needar,af_smartlink_sections];
@@ -370,7 +370,7 @@ unit agarmgas;
             id     : as_darwin;
             id     : as_darwin;
             idtxt  : 'AS-Darwin';
             idtxt  : 'AS-Darwin';
             asmbin : 'as';
             asmbin : 'as';
-            asmcmd : '-o $OBJ $ASM -arch $ARCH';
+            asmcmd : '-o $OBJ $EXTRAOPT $ASM -arch $ARCH';
             supported_targets : [system_arm_darwin];
             supported_targets : [system_arm_darwin];
             flags : [af_needar,af_smartlink_sections,af_supports_dwarf,af_stabs_use_function_absolute_addresses];
             flags : [af_needar,af_smartlink_sections,af_supports_dwarf,af_stabs_use_function_absolute_addresses];
             labelprefix : 'L';
             labelprefix : 'L';

+ 461 - 260
compiler/arm/aoptcpu.pas

@@ -38,14 +38,14 @@ Type
     function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
     function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
     procedure PeepHoleOptPass2;override;
     procedure PeepHoleOptPass2;override;
     Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
     Function RegInInstruction(Reg: TRegister; p1: tai): Boolean;override;
-    procedure RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
+    function RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string): boolean;
     function RegUsedAfterInstruction(reg: Tregister; p: tai;
     function RegUsedAfterInstruction(reg: Tregister; p: tai;
                                      var AllUsedRegs: TAllUsedRegs): Boolean;
                                      var AllUsedRegs: TAllUsedRegs): Boolean;
     { returns true if reg reaches it's end of life at p, this means it is either
     { returns true if reg reaches it's end of life at p, this means it is either
       reloaded with a new value or it is deallocated afterwards }
       reloaded with a new value or it is deallocated afterwards }
     function RegEndOfLife(reg: TRegister;p: taicpu): boolean;
     function RegEndOfLife(reg: TRegister;p: taicpu): boolean;
     { gets the next tai object after current that contains info relevant
     { gets the next tai object after current that contains info relevant
-      to the optimizer in p1 which used the given register or does a 
+      to the optimizer in p1 which used the given register or does a
       change in program flow.
       change in program flow.
       If there is none, it returns false and
       If there is none, it returns false and
       sets p1 to nil                                                     }
       sets p1 to nil                                                     }
@@ -68,6 +68,7 @@ Type
     { uses the same constructor as TAopObj }
     { uses the same constructor as TAopObj }
     function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
     function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
     procedure PeepHoleOptPass2;override;
     procedure PeepHoleOptPass2;override;
+    function PostPeepHoleOptsCpu(var p: tai): boolean; override;
   End;
   End;
 
 
   function MustBeLast(p : tai) : boolean;
   function MustBeLast(p : tai) : boolean;
@@ -151,8 +152,9 @@ Implementation
       result := (oper.typ = top_reg) and (oper.reg = reg);
       result := (oper.typ = top_reg) and (oper.reg = reg);
     end;
     end;
 
 
-  procedure RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList);
+  function RemoveRedundantMove(const cmpp: tai; movp: tai; asml: TAsmList):Boolean;
     begin
     begin
+      Result:=false;
       if (taicpu(movp).condition = C_EQ) and
       if (taicpu(movp).condition = C_EQ) and
          (taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and
          (taicpu(cmpp).oper[0]^.reg = taicpu(movp).oper[0]^.reg) and
          (taicpu(cmpp).oper[1]^.val = taicpu(movp).oper[1]^.val) then
          (taicpu(cmpp).oper[1]^.val = taicpu(movp).oper[1]^.val) then
@@ -160,6 +162,7 @@ Implementation
         asml.insertafter(tai_comment.Create(strpnew('Peephole CmpMovMov - Removed redundant moveq')), movp);
         asml.insertafter(tai_comment.Create(strpnew('Peephole CmpMovMov - Removed redundant moveq')), movp);
         asml.remove(movp);
         asml.remove(movp);
         movp.free;
         movp.free;
+        Result:=true;
       end;
       end;
     end;
     end;
 
 
@@ -202,6 +205,9 @@ Implementation
       {Loads to all register in the registerset}
       {Loads to all register in the registerset}
       A_LDM:
       A_LDM:
         regLoadedWithNewValue := (getsupreg(reg) in p.oper[1]^.regset^);
         regLoadedWithNewValue := (getsupreg(reg) in p.oper[1]^.regset^);
+      A_POP:
+        regLoadedWithNewValue := (getsupreg(reg) in p.oper[0]^.regset^) or
+                                 (reg=NR_STACK_POINTER_REG);
     end;
     end;
 
 
     if regLoadedWithNewValue then
     if regLoadedWithNewValue then
@@ -333,12 +339,13 @@ Implementation
     end;
     end;
 {$endif DEBUG_AOPTCPU}
 {$endif DEBUG_AOPTCPU}
 
 
-  procedure TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string);
+  function TCpuAsmOptimizer.RemoveSuperfluousMove(const p: tai; movp: tai; const optimizer: string):boolean;
     var
     var
       alloc,
       alloc,
       dealloc : tai_regalloc;
       dealloc : tai_regalloc;
       hp1 : tai;
       hp1 : tai;
     begin
     begin
+      Result:=false;
       if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
       if MatchInstruction(movp, A_MOV, [taicpu(p).condition], [PF_None]) and
          (taicpu(movp).ops=2) and {We can't optimize if there is a shiftop}
          (taicpu(movp).ops=2) and {We can't optimize if there is a shiftop}
          MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
          MatchOperand(taicpu(movp).oper[1]^, taicpu(p).oper[0]^.reg) and
@@ -370,6 +377,7 @@ Implementation
           if assigned(dealloc) then
           if assigned(dealloc) then
             begin
             begin
               DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
               DebugMsg('Peephole '+optimizer+' removed superfluous mov', movp);
+              result:=true;
 
 
               { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
               { taicpu(p).oper[0]^.reg is not used anymore, try to find its allocation
                 and remove it if possible }
                 and remove it if possible }
@@ -529,6 +537,7 @@ Implementation
       i, i2: longint;
       i, i2: longint;
       TmpUsedRegs: TAllUsedRegs;
       TmpUsedRegs: TAllUsedRegs;
       tempop: tasmop;
       tempop: tasmop;
+      oldreg: tregister;
 
 
     function IsPowerOf2(const value: DWord): boolean; inline;
     function IsPowerOf2(const value: DWord): boolean; inline;
       begin
       begin
@@ -564,7 +573,10 @@ Implementation
                 N := result[31];
                 N := result[31];
                 EQ = Z=1; NE = Z=0;
                 EQ = Z=1; NE = Z=0;
                 MI = N=1; PL = N=0; }
                 MI = N=1; PL = N=0; }
-              MatchInstruction(hp2, A_B, [C_EQ,C_NE,C_MI,C_PL], []) and
+              (MatchInstruction(hp2, A_B, [C_EQ,C_NE,C_MI,C_PL], []) or
+               { mov is also possible, but only if there is no shifter operand, it could be an rxx,
+                 we are too lazy to check if it is rxx or something else }
+               (MatchInstruction(hp2, A_MOV, [C_EQ,C_NE,C_MI,C_PL], []) and (taicpu(hp2).ops=2))) and
               assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next))) then
               assigned(FindRegDealloc(NR_DEFAULTFLAGS,tai(hp2.Next))) then
              begin
              begin
                DebugMsg('Peephole OpCmp2OpS done', p);
                DebugMsg('Peephole OpCmp2OpS done', p);
@@ -582,6 +594,7 @@ Implementation
 
 
                asml.remove(hp1);
                asml.remove(hp1);
                hp1.free;
                hp1.free;
+               Result:=true;
              end
              end
            else
            else
               case taicpu(p).opcode of
               case taicpu(p).opcode of
@@ -605,7 +618,7 @@ Implementation
                           begin
                           begin
                             DebugMsg('Peephole StrLdr2StrMov 1 done', hp1);
                             DebugMsg('Peephole StrLdr2StrMov 1 done', hp1);
                             asml.remove(hp1);
                             asml.remove(hp1);
-                            hp1.free;                            
+                            hp1.free;
                           end
                           end
                         else
                         else
                           begin
                           begin
@@ -643,8 +656,9 @@ Implementation
                         taicpu(p).oppostfix:=PF_D;
                         taicpu(p).oppostfix:=PF_D;
                         asml.remove(hp1);
                         asml.remove(hp1);
                         hp1.free;
                         hp1.free;
+                        result:=true;
                       end;
                       end;
-                    LookForPostindexedPattern(taicpu(p));
+                    Result:=LookForPostindexedPattern(taicpu(p)) or Result;
                   end;
                   end;
                 A_LDR:
                 A_LDR:
                   begin
                   begin
@@ -707,6 +721,7 @@ Implementation
                             taicpu(p).oppostfix:=PF_D;
                             taicpu(p).oppostfix:=PF_D;
                             asml.remove(hp1);
                             asml.remove(hp1);
                             hp1.free;
                             hp1.free;
+                            result:=true;
                           end;
                           end;
                       end;
                       end;
 
 
@@ -734,8 +749,9 @@ Implementation
                          taicpu(p).oper[0]^.reg := taicpu(hp1).oper[0]^.reg;
                          taicpu(p).oper[0]^.reg := taicpu(hp1).oper[0]^.reg;
                          asml.remove(hp1);
                          asml.remove(hp1);
                          hp1.free;
                          hp1.free;
+                         result:=true;
                        end;
                        end;
-                    LookForPostindexedPattern(taicpu(p));
+                    Result:=LookForPostindexedPattern(taicpu(p)) or Result;
                     { Remove superfluous mov after ldr
                     { Remove superfluous mov after ldr
                       changes
                       changes
                       ldr reg1, ref
                       ldr reg1, ref
@@ -750,8 +766,10 @@ Implementation
                         * ldr+mov have the same conditions
                         * ldr+mov have the same conditions
                         * mov does not set flags
                         * mov does not set flags
                     }
                     }
-                    if (taicpu(p).oppostfix<>PF_D) and GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
-                      RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr');
+                    if (taicpu(p).oppostfix<>PF_D) and
+                       GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+                       RemoveSuperfluousMove(p, hp1, 'LdrMov2Ldr') then
+                      Result:=true;
                   end;
                   end;
                 A_MOV:
                 A_MOV:
                   begin
                   begin
@@ -799,6 +817,7 @@ Implementation
                                 p.free;
                                 p.free;
                                 hp1.free;
                                 hp1.free;
                                 p:=hp2;
                                 p:=hp2;
+                                Result:=true;
                               end;
                               end;
                             ReleaseUsedRegs(TmpUsedRegs);
                             ReleaseUsedRegs(TmpUsedRegs);
                           end
                           end
@@ -940,8 +959,11 @@ Implementation
                        (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
                        (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
                        (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and
                        (taicpu(p).oper[2]^.shifterop^.shiftmode = SM_LSR) and
                        GetNextInstructionUsingReg(p,hp1, taicpu(p).oper[0]^.reg) and
                        GetNextInstructionUsingReg(p,hp1, taicpu(p).oper[0]^.reg) and
-                       (assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) or
-                         regLoadedWithNewValue(taicpu(p).oper[0]^.reg, hp1)) then
+                       (hp1.typ=ait_instruction) and
+                       (taicpu(hp1).ops>=1) and
+                       (taicpu(hp1).oper[0]^.typ=top_reg) and
+                       (not RegModifiedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) and
+                       RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
                        begin
                        begin
                          if (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and
                          if (taicpu(p).oper[2]^.shifterop^.shiftimm >= 24 ) and
                            MatchInstruction(hp1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and
                            MatchInstruction(hp1, A_AND, [taicpu(p).condition], [taicpu(p).oppostfix]) and
@@ -974,7 +996,87 @@ Implementation
                              result:=true;
                              result:=true;
                            end;
                            end;
                        end;
                        end;
+                    { Change
+                      mov rx, ry, lsr/ror #xxx
+                      uxtb/uxth rz,rx/and rz,rx,0xFF
+                      dealloc rx
 
 
+                      to
+
+                      uxtb/uxth rz,ry,ror #xxx
+                    }
+                    if (taicpu(p).ops=3) and
+                       (taicpu(p).oper[2]^.typ = top_shifterop) and
+                       (taicpu(p).oper[2]^.shifterop^.rs = NR_NO) and
+                       (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_LSR,SM_ROR]) and
+                       (GenerateThumb2Code) and
+                       GetNextInstructionUsingReg(p,hp1, taicpu(p).oper[0]^.reg) and
+                       RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
+                       begin
+                         if MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
+                           (taicpu(hp1).ops = 2) and
+                           (taicpu(p).oper[2]^.shifterop^.shiftimm in [8,16,24]) and
+                           MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
+                           begin
+                             taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
+                             taicpu(hp1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
+                             taicpu(hp1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
+                             taicpu(hp1).ops := 3;
+
+                             GetNextInstruction(p,hp1);
+
+                             asml.Remove(p);
+                             p.Free;
+
+                             p:=hp1;
+
+                             result:=true;
+                             exit;
+                           end
+                         else if MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
+                           (taicpu(hp1).ops=2) and
+                           (taicpu(p).oper[2]^.shifterop^.shiftimm in [16]) and
+                           MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
+                           begin
+                             taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
+                             taicpu(hp1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
+                             taicpu(hp1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
+                             taicpu(hp1).ops := 3;
+
+                             GetNextInstruction(p,hp1);
+
+                             asml.Remove(p);
+                             p.Free;
+
+                             p:=hp1;
+
+                             result:=true;
+                             exit;
+                           end
+                         else if MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
+                           (taicpu(hp1).ops = 3) and
+                           (taicpu(hp1).oper[2]^.typ = top_const) and
+                           (taicpu(hp1).oper[2]^.val = $FF) and
+                           (taicpu(p).oper[2]^.shifterop^.shiftimm in [8,16,24]) and
+                           MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) then
+                           begin
+                             taicpu(hp1).ops := 3;
+                             taicpu(hp1).opcode := A_UXTB;
+                             taicpu(hp1).oper[1]^.reg := taicpu(p).oper[1]^.reg;
+                             taicpu(hp1).loadshifterop(2,taicpu(p).oper[2]^.shifterop^);
+                             taicpu(hp1).oper[2]^.shifterop^.shiftmode:=SM_ROR;
+
+                             GetNextInstruction(p,hp1);
+
+                             asml.Remove(p);
+                             p.Free;
+
+                             p:=hp1;
+
+                             result:=true;
+                             exit;
+                           end;
+                       end;
                     {
                     {
                       optimize
                       optimize
                       mov rX, yyyy
                       mov rX, yyyy
@@ -1007,6 +1109,7 @@ Implementation
                               GetNextInstruction(hp2,hp1);
                               GetNextInstruction(hp2,hp1);
                               asml.remove(hp2);
                               asml.remove(hp2);
                               hp2.free;
                               hp2.free;
+                              result:=true;
                               if not assigned(hp1) then break;
                               if not assigned(hp1) then break;
                             end
                             end
                         {
                         {
@@ -1026,6 +1129,7 @@ Implementation
                               p.free;
                               p.free;
                               p:=hp1;
                               p:=hp1;
                               GetNextInstruction(hp1,hp1);
                               GetNextInstruction(hp1,hp1);
+                              result:=true;
                               if not assigned(hp1) then
                               if not assigned(hp1) then
                                 break;
                                 break;
                             end;
                             end;
@@ -1079,9 +1183,46 @@ Implementation
                                 asml.remove(p);
                                 asml.remove(p);
                                 p.free;
                                 p.free;
                                 p:=hp1;
                                 p:=hp1;
+                                Result:=true;
                               end;
                               end;
                             end;
                             end;
                       end;
                       end;
+                    { Fold the very common sequence
+                        mov  regA, regB
+                        ldr* regA, [regA]
+                      to
+                        ldr* regA, [regB]
+                      CAUTION! If this one is successful p might not be a mov instruction anymore!
+                    }
+                    if (taicpu(p).opcode = A_MOV) and
+                       (taicpu(p).ops = 2) and
+                       (taicpu(p).oper[1]^.typ = top_reg) and
+                       (taicpu(p).oppostfix = PF_NONE) and
+                       GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+                       MatchInstruction(hp1, [A_LDR, A_STR], [taicpu(p).condition], []) and
+                       { We can change the base register only when the instruction uses AM_OFFSET }
+                       ((taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) or
+                         ((taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and
+                          (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg))
+                       ) and
+                       not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
+                       RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
+                      begin
+                        DebugMsg('Peephole MovLdr2Ldr done', hp1);
+                        if (taicpu(hp1).oper[1]^.ref^.addressmode = AM_OFFSET) and
+                           (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) then
+                          taicpu(hp1).oper[1]^.ref^.base := taicpu(p).oper[1]^.reg;
+
+                        if taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg then
+                          taicpu(hp1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
+
+                        GetNextInstruction(p, hp1);
+                        asml.remove(p);
+                        p.free;
+                        p:=hp1;
+                        result:=true;
+                      end;
+
                     { This folds shifterops into following instructions
                     { This folds shifterops into following instructions
                       mov r0, r1, lsl #8
                       mov r0, r1, lsl #8
                       add r2, r3, r0
                       add r2, r3, r0
@@ -1103,12 +1244,11 @@ Implementation
                                         [taicpu(p).condition], [PF_None]) and
                                         [taicpu(p).condition], [PF_None]) and
                        (not ((GenerateThumb2Code) and
                        (not ((GenerateThumb2Code) and
                              (taicpu(hp1).opcode in [A_SBC]) and
                              (taicpu(hp1).opcode in [A_SBC]) and
-                             (((taicpu(hp1).ops=3) and 
+                             (((taicpu(hp1).ops=3) and
                                MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^.reg)) or
                                MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[1]^.reg)) or
-                              ((taicpu(hp1).ops=2) and 
+                              ((taicpu(hp1).ops=2) and
                                MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg))))) and
                                MatchOperand(taicpu(p).oper[0]^, taicpu(hp1).oper[0]^.reg))))) and
-                       (assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) or
-                         regLoadedWithNewValue(taicpu(p).oper[0]^.reg, hp1)) and
+                       RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) and
                        (taicpu(hp1).ops >= 2) and
                        (taicpu(hp1).ops >= 2) and
                        {Currently we can't fold into another shifterop}
                        {Currently we can't fold into another shifterop}
                        (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
                        (taicpu(hp1).oper[taicpu(hp1).ops-1]^.typ = top_reg) and
@@ -1176,13 +1316,14 @@ Implementation
                                        taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
                                        taicpu(hp1).oper[0]^.reg, taicpu(p).oper[1]^.reg,
                                        taicpu(p).oper[2]^.shifterop^);
                                        taicpu(p).oper[2]^.shifterop^);
                               asml.insertbefore(hp2, hp1);
                               asml.insertbefore(hp2, hp1);
+                              GetNextInstruction(p, hp2);
                               asml.remove(p);
                               asml.remove(p);
                               asml.remove(hp1);
                               asml.remove(hp1);
                               p.free;
                               p.free;
                               hp1.free;
                               hp1.free;
                               p:=hp2;
                               p:=hp2;
-                              GetNextInstruction(p,hp1);
                               DebugMsg('Peephole FoldShiftProcess done', p);
                               DebugMsg('Peephole FoldShiftProcess done', p);
+                              Result:=true;
                               break;
                               break;
                             end;
                             end;
                       end;
                       end;
@@ -1220,8 +1361,10 @@ Implementation
                        (taicpu(p).oppostfix = PF_NONE) and
                        (taicpu(p).oppostfix = PF_NONE) and
                        GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
                        GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
                        {Only LDR, LDRB, STR, STRB can handle scaled register indexing}
                        {Only LDR, LDRB, STR, STRB can handle scaled register indexing}
-                       MatchInstruction(hp1, [A_LDR, A_STR], [taicpu(p).condition],
-                                             [PF_None, PF_B]) and
+                       (MatchInstruction(hp1, [A_LDR, A_STR], [taicpu(p).condition], [PF_None, PF_B]) or
+                        (GenerateThumb2Code and
+                         MatchInstruction(hp1, [A_LDR, A_STR], [taicpu(p).condition], [PF_None, PF_B, PF_SB, PF_H, PF_SH]))
+                       ) and
                        (
                        (
                          {If this is address by offset, one of the two registers can be used}
                          {If this is address by offset, one of the two registers can be used}
                          ((taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
                          ((taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
@@ -1235,14 +1378,15 @@ Implementation
                            (
                            (
                              (taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) and
                              (taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) and
                              (taicpu(hp1).oper[1]^.ref^.base <> taicpu(p).oper[0]^.reg)
                              (taicpu(hp1).oper[1]^.ref^.base <> taicpu(p).oper[0]^.reg)
-                           )
+                           ) and
+                           (not GenerateThumb2Code)
                          )
                          )
                        ) and
                        ) and
-                       { Only fold if there isn't another shifterop already. }
+                       { Only fold if there isn't another shifterop already, and offset is zero. }
+                       (taicpu(hp1).oper[1]^.ref^.offset = 0) and
                        (taicpu(hp1).oper[1]^.ref^.shiftmode = SM_None) and
                        (taicpu(hp1).oper[1]^.ref^.shiftmode = SM_None) and
                        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
                        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
-                       (assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) or
-                         regLoadedWithNewValue(taicpu(p).oper[0]^.reg, hp1)) then
+                       RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
                        begin
                        begin
                          { If the register we want to do the shift for resides in base, we need to swap that}
                          { If the register we want to do the shift for resides in base, we need to swap that}
                          if (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) then
                          if (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) then
@@ -1251,17 +1395,20 @@ Implementation
                          taicpu(hp1).oper[1]^.ref^.shiftmode := taicpu(p).oper[2]^.shifterop^.shiftmode;
                          taicpu(hp1).oper[1]^.ref^.shiftmode := taicpu(p).oper[2]^.shifterop^.shiftmode;
                          taicpu(hp1).oper[1]^.ref^.shiftimm := taicpu(p).oper[2]^.shifterop^.shiftimm;
                          taicpu(hp1).oper[1]^.ref^.shiftimm := taicpu(p).oper[2]^.shifterop^.shiftimm;
                          DebugMsg('Peephole FoldShiftLdrStr done', hp1);
                          DebugMsg('Peephole FoldShiftLdrStr done', hp1);
+                         GetNextInstruction(p, hp1);
                          asml.remove(p);
                          asml.remove(p);
                          p.free;
                          p.free;
                          p:=hp1;
                          p:=hp1;
+                         Result:=true;
                        end;
                        end;
                     {
                     {
                       Often we see shifts and then a superfluous mov to another register
                       Often we see shifts and then a superfluous mov to another register
                       In the future this might be handled in RedundantMovProcess when it uses RegisterTracking
                       In the future this might be handled in RedundantMovProcess when it uses RegisterTracking
                     }
                     }
-                    if (taicpu(p).opcode = A_MOV) and 
-                        GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
-                      RemoveSuperfluousMove(p, hp1, 'MovMov2Mov');
+                    if (taicpu(p).opcode = A_MOV) and
+                       GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+                       RemoveSuperfluousMove(p, hp1, 'MovMov2Mov') then
+                      Result:=true;
                   end;
                   end;
                 A_ADD,
                 A_ADD,
                 A_ADC,
                 A_ADC,
@@ -1274,6 +1421,7 @@ Implementation
                 A_EOR,
                 A_EOR,
                 A_ORR,
                 A_ORR,
                 A_MLA,
                 A_MLA,
+                A_MLS,
                 A_MUL:
                 A_MUL:
                   begin
                   begin
                         {
                         {
@@ -1316,6 +1464,7 @@ Implementation
                                 taicpu(hp1).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
                                 taicpu(hp1).loadConst(2,taicpu(p).oper[2]^.val and taicpu(hp1).oper[2]^.val);
                                 taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
                                 taicpu(hp1).oppostfix:=taicpu(p).oppostfix;
                                 taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
                                 taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
+                                GetNextInstruction(p, hp1);
                                 asml.remove(p);
                                 asml.remove(p);
                                 p.free;
                                 p.free;
                                 p:=hp1;
                                 p:=hp1;
@@ -1342,6 +1491,7 @@ Implementation
                           begin
                           begin
                             DebugMsg('Peephole AndStrb2Strb done', p);
                             DebugMsg('Peephole AndStrb2Strb done', p);
                             taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
                             taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
+                            GetNextInstruction(p, hp1);
                             asml.remove(p);
                             asml.remove(p);
                             p.free;
                             p.free;
                             p:=hp1;
                             p:=hp1;
@@ -1359,6 +1509,7 @@ Implementation
                           MatchInstruction(p, A_AND, [C_None], [PF_None]) and
                           MatchInstruction(p, A_AND, [C_None], [PF_None]) and
                           GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                           GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                           MatchInstruction(hp1, [A_UXTB,A_UXTH], [C_None], [PF_None]) and
                           MatchInstruction(hp1, [A_UXTB,A_UXTH], [C_None], [PF_None]) and
+                          (taicpu(hp1).ops = 2) and
                           RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
                           RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
                           MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
                           MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
                           { reg1 might not be modified inbetween }
                           { reg1 might not be modified inbetween }
@@ -1433,6 +1584,7 @@ Implementation
                               begin
                               begin
                                 DebugMsg('Peephole AndLsl2Lsl done', p);
                                 DebugMsg('Peephole AndLsl2Lsl done', p);
                                 taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[0]^.reg;
                                 taicpu(hp1).oper[1]^.reg:=taicpu(p).oper[0]^.reg;
+                                GetNextInstruction(p, hp1);
                                 asml.Remove(p);
                                 asml.Remove(p);
                                 p.free;
                                 p.free;
                                 p:=hp1;
                                 p:=hp1;
@@ -1499,6 +1651,7 @@ Implementation
                                 asml.remove(p);
                                 asml.remove(p);
                                 p.free;
                                 p.free;
                                 p:=hp1;
                                 p:=hp1;
+                                result:=true;
                                 break;
                                 break;
                               end;
                               end;
                           end;
                           end;
@@ -1510,11 +1663,10 @@ Implementation
                       to
                       to
                       add reg2, ...
                       add reg2, ...
                     }
                     }
-                    if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
-                      begin
-                        if (taicpu(p).ops=3) then
-                          RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
-                      end;
+                    if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+                       (taicpu(p).ops>=3) and
+                       RemoveSuperfluousMove(p, hp1, 'DataMov2Data') then
+                      Result:=true;
 
 
                     if MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
                     if MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
                       LookForPreindexedPattern(taicpu(p)) then
                       LookForPreindexedPattern(taicpu(p)) then
@@ -1524,9 +1676,97 @@ Implementation
                         asml.remove(p);
                         asml.remove(p);
                         p.free;
                         p.free;
                         p:=hp1;
                         p:=hp1;
+                        Result:=true;
                       end;
                       end;
+                    {
+                     Turn
+                     mul reg0, z,w
+                     sub/add x, y, reg0
+                     dealloc reg0
+
+                     into
+
+                     mls/mla x,z,w,y
+                     }
+                    if MatchInstruction(p, [A_MUL], [C_None], [PF_None]) and
+                      (taicpu(p).ops=3) and
+                      (taicpu(p).oper[0]^.typ = top_reg) and
+                      (taicpu(p).oper[1]^.typ = top_reg) and
+                      (taicpu(p).oper[2]^.typ = top_reg) and
+                      GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+                      MatchInstruction(hp1,[A_ADD,A_SUB],[C_None],[PF_None]) and
+
+                      (((taicpu(hp1).opcode=A_ADD) and (current_settings.cputype>=cpu_armv4)) or
+                       ((taicpu(hp1).opcode=A_SUB) and (current_settings.cputype in [cpu_armv6t2,cpu_armv7,cpu_armv7a,cpu_armv7r,cpu_armv7m,cpu_armv7em]))) and
+
+                      // CPUs before ARMv6 don't recommend having the same Rd and Rm for MLA.
+                      // TODO: A workaround would be to swap Rm and Rs
+                      (not ((taicpu(hp1).opcode=A_ADD) and (current_settings.cputype<=cpu_armv6) and MatchOperand(taicpu(hp1).oper[0]^, taicpu(p).oper[1]^))) and
+
+                      (((taicpu(hp1).ops=3) and
+                        (taicpu(hp1).oper[2]^.typ=top_reg) and
+                        ((MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) and
+                          (not RegModifiedBetween(taicpu(hp1).oper[1]^.reg, p, hp1))) or
+                         ((MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
+                           (taicpu(hp1).opcode=A_ADD) and
+                           (not RegModifiedBetween(taicpu(hp1).oper[2]^.reg, p, hp1)))))) or
+                       ((taicpu(hp1).ops=2) and
+                        (taicpu(hp1).oper[1]^.typ=top_reg) and
+                        MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
+                      (RegEndOfLife(taicpu(p).oper[0]^.reg,taicpu(hp1))) then
+                      begin
+                        if taicpu(hp1).opcode=A_ADD then
+                          begin
+                            taicpu(hp1).opcode:=A_MLA;
+
+                            if taicpu(hp1).ops=3 then
+                              begin
+                                if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^) then
+                                  oldreg:=taicpu(hp1).oper[2]^.reg
+                                else
+                                  oldreg:=taicpu(hp1).oper[1]^.reg;
+                              end
+                            else
+                              oldreg:=taicpu(hp1).oper[0]^.reg;
+
+                            taicpu(hp1).loadreg(1,taicpu(p).oper[1]^.reg);
+                            taicpu(hp1).loadreg(2,taicpu(p).oper[2]^.reg);
+                            taicpu(hp1).loadreg(3,oldreg);
+
+                            DebugMsg('MulAdd2MLA done', p);
+
+                            taicpu(hp1).ops:=4;
+
+                            asml.remove(p);
+                            p.free;
+                            p:=hp1;
+                          end
+                        else
+                          begin
+                            taicpu(hp1).opcode:=A_MLS;
+
+                            taicpu(hp1).loadreg(3,taicpu(hp1).oper[1]^.reg);
+
+                            if taicpu(hp1).ops=2 then
+                              taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg)
+                            else
+                              taicpu(hp1).loadreg(1,taicpu(p).oper[2]^.reg);
+
+                            taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
+
+                            DebugMsg('MulSub2MLS done', p);
+
+                            taicpu(hp1).ops:=4;
+
+                            asml.remove(p);
+                            p.free;
+                            p:=hp1;
+                          end;
+
+                        result:=true;
+                      end
                   end;
                   end;
-{$ifdef dummy}                  
+{$ifdef dummy}
                 A_MVN:
                 A_MVN:
                   begin
                   begin
                     {
                     {
@@ -1563,12 +1803,13 @@ Implementation
                           end
                           end
                         else
                         else
                           taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
                           taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
+                        GetNextInstruction(p, hp1);
                         asml.remove(p);
                         asml.remove(p);
                         p.free;
                         p.free;
                         p:=hp1;
                         p:=hp1;
                       end;
                       end;
                   end;
                   end;
-{$endif dummy}                                    
+{$endif dummy}
                 A_UXTB:
                 A_UXTB:
                   begin
                   begin
                     {
                     {
@@ -1607,18 +1848,16 @@ Implementation
                     else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
                     else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
                       MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
+                      (taicpu(hp1).ops = 2) and
                       MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
                       MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
                       RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
                       RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
                       { reg1 might not be modified inbetween }
                       { reg1 might not be modified inbetween }
                       not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
                       not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
                       begin
                       begin
                         DebugMsg('Peephole UxtbUxth2Uxtb done', p);
                         DebugMsg('Peephole UxtbUxth2Uxtb done', p);
-                        taicpu(hp1).opcode:=A_UXTB;
-                        taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
-                        GetNextInstruction(p,hp2);
-                        asml.remove(p);
-                        p.free;
-                        p:=hp2;
+                        taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
+                        asml.remove(hp1);
+                        hp1.free;
                         result:=true;
                         result:=true;
                       end
                       end
                     {
                     {
@@ -1632,18 +1871,16 @@ Implementation
                     else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
                     else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
                       MatchInstruction(hp1, A_UXTB, [C_None], [PF_None]) and
+                      (taicpu(hp1).ops = 2) and
                       MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
                       MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
                       RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
                       RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
                       { reg1 might not be modified inbetween }
                       { reg1 might not be modified inbetween }
                       not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
                       not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) then
                       begin
                       begin
                         DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
                         DebugMsg('Peephole UxtbUxtb2Uxtb done', p);
-                        taicpu(hp1).opcode:=A_UXTB;
-                        taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
-                        GetNextInstruction(p,hp2);
-                        asml.remove(p);
-                        p.free;
-                        p:=hp2;
+                        taicpu(p).loadReg(0,taicpu(hp1).oper[0]^.reg);
+                        asml.remove(hp1);
+                        hp1.free;
                         result:=true;
                         result:=true;
                       end
                       end
                     {
                     {
@@ -1656,6 +1893,7 @@ Implementation
                     }
                     }
                     else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
                     else if MatchInstruction(p, A_UXTB, [C_None], [PF_None]) and
                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+                      (taicpu(p).ops=2) and
                       MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
                       MatchInstruction(hp1, A_AND, [C_None], [PF_None]) and
                       (taicpu(hp1).ops=3) and
                       (taicpu(hp1).ops=3) and
                       (taicpu(hp1).oper[2]^.typ=top_const) and
                       (taicpu(hp1).oper[2]^.typ=top_const) and
@@ -1675,11 +1913,9 @@ Implementation
                         p:=hp2;
                         p:=hp2;
                         result:=true;
                         result:=true;
                       end
                       end
-                    else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
-                      begin
-                        //if (taicpu(p).ops=3) then
-                          RemoveSuperfluousMove(p, hp1, 'UxtbMov2Data');
-                      end;
+                    else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+                         RemoveSuperfluousMove(p, hp1, 'UxtbMov2Data') then
+                      Result:=true;
                   end;
                   end;
                 A_UXTH:
                 A_UXTH:
                   begin
                   begin
@@ -1702,6 +1938,7 @@ Implementation
                       begin
                       begin
                         DebugMsg('Peephole UXTHStrh2Strh done', p);
                         DebugMsg('Peephole UXTHStrh2Strh done', p);
                         taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
                         taicpu(hp1).loadReg(0,taicpu(p).oper[1]^.reg);
+                        GetNextInstruction(p, hp1);
                         asml.remove(p);
                         asml.remove(p);
                         p.free;
                         p.free;
                         p:=hp1;
                         p:=hp1;
@@ -1718,6 +1955,7 @@ Implementation
                     else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
                     else if MatchInstruction(p, A_UXTH, [C_None], [PF_None]) and
                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
                       MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
                       MatchInstruction(hp1, A_UXTH, [C_None], [PF_None]) and
+                      (taicpu(hp1).ops=2) and
                       MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
                       MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
                       RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
                       RegEndofLife(taicpu(p).oper[0]^.reg,taicpu(hp1)) and
                       { reg1 might not be modified inbetween }
                       { reg1 might not be modified inbetween }
@@ -1726,6 +1964,7 @@ Implementation
                         DebugMsg('Peephole UxthUxth2Uxth done', p);
                         DebugMsg('Peephole UxthUxth2Uxth done', p);
                         taicpu(hp1).opcode:=A_UXTH;
                         taicpu(hp1).opcode:=A_UXTH;
                         taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
                         taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
+                        GetNextInstruction(p, hp1);
                         asml.remove(p);
                         asml.remove(p);
                         p.free;
                         p.free;
                         p:=hp1;
                         p:=hp1;
@@ -1754,16 +1993,15 @@ Implementation
                         taicpu(hp1).opcode:=A_UXTH;
                         taicpu(hp1).opcode:=A_UXTH;
                         taicpu(hp1).ops:=2;
                         taicpu(hp1).ops:=2;
                         taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
                         taicpu(hp1).loadReg(1,taicpu(p).oper[1]^.reg);
+                        GetNextInstruction(p, hp1);
                         asml.remove(p);
                         asml.remove(p);
                         p.free;
                         p.free;
                         p:=hp1;
                         p:=hp1;
                         result:=true;
                         result:=true;
                       end
                       end
-                    else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) then
-                      begin
-                        //if (taicpu(p).ops=3) then
-                          RemoveSuperfluousMove(p, hp1, 'UxthMov2Data');
-                      end;
+                    else if GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+                         RemoveSuperfluousMove(p, hp1, 'UxthMov2Data') then
+                      Result:=true;
                   end;
                   end;
                 A_CMP:
                 A_CMP:
                   begin
                   begin
@@ -1784,8 +2022,8 @@ Implementation
                        MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
                        MatchInstruction(hp2, A_MOV, [C_EQ, C_NE], [PF_NONE]) and
                        (taicpu(hp1).oper[1]^.typ = top_const) then
                        (taicpu(hp1).oper[1]^.typ = top_const) then
                       begin
                       begin
-                        RemoveRedundantMove(p, hp1, asml);
-                        RemoveRedundantMove(p, hp2, asml);
+                        Result:=RemoveRedundantMove(p, hp1, asml) or Result;
+                        Result:=RemoveRedundantMove(p, hp2, asml) or Result;
                       end;
                       end;
                   end;
                   end;
                 A_STM:
                 A_STM:
@@ -2039,6 +2277,9 @@ Implementation
     begin
     begin
       If (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_BL) then
       If (p1.typ = ait_instruction) and (taicpu(p1).opcode=A_BL) then
         Result:=true
         Result:=true
+      else If MatchInstruction(p1, [A_LDR, A_STR], [], [PF_D]) and
+              (getsupreg(taicpu(p1).oper[0]^.reg)+1=getsupreg(reg)) then
+        Result:=true
       else
       else
         Result:=inherited RegInInstruction(Reg, p1);
         Result:=inherited RegInInstruction(Reg, p1);
     end;
     end;
@@ -2147,6 +2388,7 @@ Implementation
             GetNextInstruction(p,hp1) and
             GetNextInstruction(p,hp1) and
             (hp1.typ=ait_instruction) and
             (hp1.typ=ait_instruction) and
             (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
             (taicpu(hp1).opcode in [A_LDR,A_LDRB,A_LDRH,A_LDRSB,A_LDRSH]) and
+            (taicpu(hp1).oppostfix in [PF_NONE, PF_B, PF_H, PF_SB, PF_SH]) and
             { for now we don't reschedule if the previous instruction changes potentially a memory location }
             { for now we don't reschedule if the previous instruction changes potentially a memory location }
             ( (not(taicpu(p).opcode in opcode_could_mem_write) and
             ( (not(taicpu(p).opcode in opcode_could_mem_write) and
                not(RegModifiedByInstruction(NR_PC,p))
                not(RegModifiedByInstruction(NR_PC,p))
@@ -2319,6 +2561,7 @@ Implementation
     var
     var
       hp : taicpu;
       hp : taicpu;
       hp1,hp2 : tai;
       hp1,hp2 : tai;
+      oldreg : TRegister;
     begin
     begin
       result:=false;
       result:=false;
       if inherited PeepHoleOptPass1Cpu(p) then
       if inherited PeepHoleOptPass1Cpu(p) then
@@ -2336,7 +2579,7 @@ Implementation
           p:=hp;
           p:=hp;
           result:=true;
           result:=true;
         end
         end
-      else if (p.typ=ait_instruction) and
+      {else if (p.typ=ait_instruction) and
         MatchInstruction(p, A_STR, [C_None], [PF_None]) and
         MatchInstruction(p, A_STR, [C_None], [PF_None]) and
         (taicpu(p).oper[1]^.ref^.addressmode=AM_PREINDEXED) and
         (taicpu(p).oper[1]^.ref^.addressmode=AM_PREINDEXED) and
         (taicpu(p).oper[1]^.ref^.index=NR_STACK_POINTER_REG) and
         (taicpu(p).oper[1]^.ref^.index=NR_STACK_POINTER_REG) and
@@ -2350,7 +2593,7 @@ Implementation
           p.Free;
           p.Free;
           p:=hp;
           p:=hp;
           result:=true;
           result:=true;
-        end
+        end}
       else if (p.typ=ait_instruction) and
       else if (p.typ=ait_instruction) and
         MatchInstruction(p, A_LDM, [C_None], [PF_FD,PF_IA]) and
         MatchInstruction(p, A_LDM, [C_None], [PF_FD,PF_IA]) and
         (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
         (taicpu(p).oper[0]^.ref^.addressmode=AM_PREINDEXED) and
@@ -2365,7 +2608,7 @@ Implementation
           p:=hp;
           p:=hp;
           result:=true;
           result:=true;
         end
         end
-      else if (p.typ=ait_instruction) and
+      {else if (p.typ=ait_instruction) and
         MatchInstruction(p, A_LDR, [C_None], [PF_None]) and
         MatchInstruction(p, A_LDR, [C_None], [PF_None]) and
         (taicpu(p).oper[1]^.ref^.addressmode=AM_POSTINDEXED) and
         (taicpu(p).oper[1]^.ref^.addressmode=AM_POSTINDEXED) and
         (taicpu(p).oper[1]^.ref^.index=NR_STACK_POINTER_REG) and
         (taicpu(p).oper[1]^.ref^.index=NR_STACK_POINTER_REG) and
@@ -2379,132 +2622,7 @@ Implementation
           p.Free;
           p.Free;
           p:=hp;
           p:=hp;
           result:=true;
           result:=true;
-        end
-      else if (p.typ=ait_instruction) and
-        MatchInstruction(p, A_MOV, [C_None], [PF_None]) and
-        (taicpu(p).oper[1]^.typ=top_const) and
-        (taicpu(p).oper[1]^.val >= 0) and
-        (taicpu(p).oper[1]^.val < 256) and
-        (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
-        begin
-          DebugMsg('Peephole Mov2Movs done', p);
-          asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
-          asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
-          IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
-          taicpu(p).oppostfix:=PF_S;
-          result:=true;
-        end
-      else if (p.typ=ait_instruction) and
-        MatchInstruction(p, A_MVN, [C_None], [PF_None]) and
-        (taicpu(p).oper[1]^.typ=top_reg) and
-        (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
-        begin
-          DebugMsg('Peephole Mvn2Mvns done', p);
-          asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
-          asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
-          IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
-          taicpu(p).oppostfix:=PF_S;
-          result:=true;
-        end
-      else if (p.typ=ait_instruction) and
-        MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
-        (taicpu(p).ops = 3) and
-        MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
-        (not MatchOperand(taicpu(p).oper[0]^, NR_STACK_POINTER_REG)) and
-        (taicpu(p).oper[2]^.typ=top_const) and
-        (taicpu(p).oper[2]^.val >= 0) and
-        (taicpu(p).oper[2]^.val < 256) and
-        (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
-        begin
-          DebugMsg('Peephole AddSub2*s done', p);
-          asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
-          asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
-          IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
-          taicpu(p).loadconst(1,taicpu(p).oper[2]^.val);
-          taicpu(p).oppostfix:=PF_S;
-          taicpu(p).ops := 2;
-          result:=true;
-        end
-      else if (p.typ=ait_instruction) and
-        MatchInstruction(p, [A_ADD], [C_None], [PF_None]) and
-        (taicpu(p).ops = 3) and
-        MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
-        (taicpu(p).oper[2]^.typ=top_reg) then
-        begin
-          DebugMsg('Peephole AddRRR2AddRR done', p);
-          taicpu(p).ops := 2;
-          taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
-          result:=true;
-        end
-      else if (p.typ=ait_instruction) and
-        MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_BIC,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_None]) and
-        (taicpu(p).ops = 3) and
-        MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
-        (taicpu(p).oper[2]^.typ=top_reg) and
-        (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
-        begin
-          asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
-          asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
-          IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
-          taicpu(p).ops := 2;
-          taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
-          taicpu(p).oppostfix:=PF_S;
-          result:=true;
-        end
-      else if (p.typ=ait_instruction) and
-        MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_BIC,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_S]) and
-        (taicpu(p).ops = 3) and
-        MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
-        (taicpu(p).oper[2]^.typ in [top_reg,top_const]) then
-        begin
-          taicpu(p).ops := 2;
-          if taicpu(p).oper[2]^.typ=top_reg then
-            taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg)
-          else
-            taicpu(p).loadconst(1,taicpu(p).oper[2]^.val);
-          result:=true;
-        end
-      else if (p.typ=ait_instruction) and
-        MatchInstruction(p, [A_AND,A_ORR,A_EOR], [C_None], [PF_None,PF_S]) and
-        (taicpu(p).ops = 3) and
-        MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[2]^) and
-        (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
-        begin
-          asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
-          asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
-          IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
-          taicpu(p).oppostfix:=PF_S;
-          taicpu(p).ops := 2;
-          result:=true;
-        end
-      else if (p.typ=ait_instruction) and
-        MatchInstruction(p, [A_MOV], [C_None], [PF_None]) and
-        (taicpu(p).ops=3) and
-        (taicpu(p).oper[2]^.typ=top_shifterop) and
-        (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_LSL,SM_LSR,SM_ASR,SM_ROR]) and
-        //MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
-        (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
-        begin
-          DebugMsg('Peephole Mov2Shift done', p);
-          asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
-          asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
-          IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
-          taicpu(p).oppostfix:=PF_S;
-          //taicpu(p).ops := 2;
-
-          case taicpu(p).oper[2]^.shifterop^.shiftmode of
-            SM_LSL: taicpu(p).opcode:=A_LSL;
-            SM_LSR: taicpu(p).opcode:=A_LSR;
-            SM_ASR: taicpu(p).opcode:=A_ASR;
-            SM_ROR: taicpu(p).opcode:=A_ROR;
-          end;
-
-          if taicpu(p).oper[2]^.shifterop^.rs<>NR_NO then
-            taicpu(p).loadreg(2, taicpu(p).oper[2]^.shifterop^.rs)
-          else
-            taicpu(p).loadconst(2, taicpu(p).oper[2]^.shifterop^.shiftimm);
-          result:=true;
-        end
+        end}
       else if (p.typ=ait_instruction) and
       else if (p.typ=ait_instruction) and
         MatchInstruction(p, [A_AND], [], [PF_None]) and
         MatchInstruction(p, [A_AND], [], [PF_None]) and
         (taicpu(p).ops = 2) and
         (taicpu(p).ops = 2) and
@@ -2539,80 +2657,6 @@ Implementation
 
 
           result := true;
           result := true;
         end
         end
-      {
-       Turn
-       mul reg0, z,w
-       sub/add x, y, reg0
-       dealloc reg0
-
-       into
-
-       mls/mla x,y,z,w
-       }
-      {
-      According to Jeppe Johansen this currently uses operands in the wrong order.
-
-      else if (p.typ=ait_instruction) and
-        MatchInstruction(p, [A_MUL], [C_None], [PF_None]) and
-        (taicpu(p).ops=3) and
-        (taicpu(p).oper[0]^.typ = top_reg) and
-        (taicpu(p).oper[1]^.typ = top_reg) and
-        (taicpu(p).oper[2]^.typ = top_reg) and
-        GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
-        MatchInstruction(hp1,[A_ADD,A_SUB],[C_None],[PF_None]) and
-        (((taicpu(hp1).ops=3) and
-          (taicpu(hp1).oper[2]^.typ=top_reg) and
-          (MatchOperand(taicpu(hp1).oper[2]^, taicpu(p).oper[0]^.reg) or
-           (MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg) and
-            (taicpu(hp1).opcode=A_ADD)))) or
-         ((taicpu(hp1).ops=2) and
-          (taicpu(hp1).oper[1]^.typ=top_reg) and
-          MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^.reg))) and
-        assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) and
-        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
-        not(RegModifiedBetween(taicpu(p).oper[2]^.reg,p,hp1)) then
-        begin
-          if taicpu(hp1).opcode=A_ADD then
-            begin
-              taicpu(hp1).opcode:=A_MLA;
-
-              if taicpu(hp1).ops=3 then
-                if MatchOperand(taicpu(hp1).oper[1]^, taicpu(p).oper[0]^) then
-                  taicpu(hp1).loadreg(1,taicpu(hp1).oper[2]^.reg);
-
-              taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
-              taicpu(hp1).loadreg(3,taicpu(p).oper[2]^.reg);
-
-              DebugMsg('MulAdd2MLA done', p);
-
-              taicpu(hp1).ops:=4;
-
-              asml.remove(p);
-              p.free;
-              p:=hp1;
-            end
-          else
-            begin
-              taicpu(hp1).opcode:=A_MLS;
-
-              if taicpu(hp1).ops=2 then
-                taicpu(hp1).loadreg(1,taicpu(hp1).oper[0]^.reg);
-
-              taicpu(hp1).loadreg(2,taicpu(p).oper[1]^.reg);
-              taicpu(hp1).loadreg(3,taicpu(p).oper[2]^.reg);
-
-              DebugMsg('MulSub2MLS done', p);
-
-              taicpu(hp1).ops:=4;
-
-              asml.remove(p);
-              p.free;
-              p:=hp1;
-            end;
-
-          result:=true;
-        end
-      }
       {else if (p.typ=ait_instruction) and
       {else if (p.typ=ait_instruction) and
         MatchInstruction(p, [A_CMP], [C_None], [PF_None]) and
         MatchInstruction(p, [A_CMP], [C_None], [PF_None]) and
         (taicpu(p).oper[1]^.typ=top_const) and
         (taicpu(p).oper[1]^.typ=top_const) and
@@ -2739,6 +2783,163 @@ Implementation
         end;
         end;
     end;
     end;
 
 
+
+  function TCpuThumb2AsmOptimizer.PostPeepHoleOptsCpu(var p: tai): boolean;
+    begin
+      result:=false;
+
+      if p.typ = ait_instruction then
+        begin
+          if MatchInstruction(p, A_MOV, [C_None], [PF_None]) and
+            (taicpu(p).oper[1]^.typ=top_const) and
+            (taicpu(p).oper[1]^.val >= 0) and
+            (taicpu(p).oper[1]^.val < 256) and
+            (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
+            begin
+              DebugMsg('Peephole Mov2Movs done', p);
+              asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
+              asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
+              IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
+              taicpu(p).oppostfix:=PF_S;
+              result:=true;
+            end
+          else if MatchInstruction(p, A_MVN, [C_None], [PF_None]) and
+            (taicpu(p).oper[1]^.typ=top_reg) and
+            (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
+            begin
+              DebugMsg('Peephole Mvn2Mvns done', p);
+              asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
+              asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
+              IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
+              taicpu(p).oppostfix:=PF_S;
+              result:=true;
+            end
+          else if MatchInstruction(p, A_RSB, [C_None], [PF_None]) and
+            (taicpu(p).ops = 3) and
+            (taicpu(p).oper[2]^.typ=top_const) and
+            (taicpu(p).oper[2]^.val=0) and
+            (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
+            begin
+              DebugMsg('Peephole Rsb2Rsbs done', p);
+              asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
+              asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
+              IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
+              taicpu(p).oppostfix:=PF_S;
+              result:=true;
+            end
+          else if MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
+            (taicpu(p).ops = 3) and
+            MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
+            (not MatchOperand(taicpu(p).oper[0]^, NR_STACK_POINTER_REG)) and
+            (taicpu(p).oper[2]^.typ=top_const) and
+            (taicpu(p).oper[2]^.val >= 0) and
+            (taicpu(p).oper[2]^.val < 256) and
+            (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
+            begin
+              DebugMsg('Peephole AddSub2*s done', p);
+              asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
+              asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
+              IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
+              taicpu(p).loadconst(1,taicpu(p).oper[2]^.val);
+              taicpu(p).oppostfix:=PF_S;
+              taicpu(p).ops := 2;
+              result:=true;
+            end
+          else if MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
+            (taicpu(p).ops = 2) and
+            (taicpu(p).oper[1]^.typ=top_reg) and
+            (not MatchOperand(taicpu(p).oper[0]^, NR_STACK_POINTER_REG)) and
+            (not MatchOperand(taicpu(p).oper[1]^, NR_STACK_POINTER_REG)) and
+            (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
+            begin
+              DebugMsg('Peephole AddSub2*s done', p);
+              asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
+              asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
+              IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
+              taicpu(p).oppostfix:=PF_S;
+              result:=true;
+            end
+          else if MatchInstruction(p, [A_ADD], [C_None], [PF_None]) and
+            (taicpu(p).ops = 3) and
+            MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
+            (taicpu(p).oper[2]^.typ=top_reg) then
+            begin
+              DebugMsg('Peephole AddRRR2AddRR done', p);
+              taicpu(p).ops := 2;
+              taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
+              result:=true;
+            end
+          else if MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_BIC,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_None]) and
+            (taicpu(p).ops = 3) and
+            MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
+            (taicpu(p).oper[2]^.typ=top_reg) and
+            (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
+            begin
+              DebugMsg('Peephole opXXY2opsXY done', p);
+              asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
+              asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
+              IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
+              taicpu(p).ops := 2;
+              taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg);
+              taicpu(p).oppostfix:=PF_S;
+              result:=true;
+            end
+          else if MatchInstruction(p, [A_AND,A_ORR,A_EOR,A_BIC,A_LSL,A_LSR,A_ASR,A_ROR], [C_None], [PF_S]) and
+            (taicpu(p).ops = 3) and
+            MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
+            (taicpu(p).oper[2]^.typ in [top_reg,top_const]) then
+            begin
+              DebugMsg('Peephole opXXY2opXY done', p);
+              taicpu(p).ops := 2;
+              if taicpu(p).oper[2]^.typ=top_reg then
+                taicpu(p).loadreg(1,taicpu(p).oper[2]^.reg)
+              else
+                taicpu(p).loadconst(1,taicpu(p).oper[2]^.val);
+              result:=true;
+            end
+          else if MatchInstruction(p, [A_ADD,A_AND,A_ORR,A_EOR], [C_None], [PF_None,PF_S]) and
+            (taicpu(p).ops = 3) and
+            MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[2]^) and
+            (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
+            begin
+              DebugMsg('Peephole opXYX2opsXY done', p);
+              asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
+              asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
+              IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
+              taicpu(p).oppostfix:=PF_S;
+              taicpu(p).ops := 2;
+              result:=true;
+            end
+          else if MatchInstruction(p, [A_MOV], [C_None], [PF_None]) and
+            (taicpu(p).ops=3) and
+            (taicpu(p).oper[2]^.typ=top_shifterop) and
+            (taicpu(p).oper[2]^.shifterop^.shiftmode in [SM_LSL,SM_LSR,SM_ASR,SM_ROR]) and
+            //MatchOperand(taicpu(p).oper[0]^, taicpu(p).oper[1]^) and
+            (not RegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs)) then
+            begin
+              DebugMsg('Peephole Mov2Shift done', p);
+              asml.InsertBefore(tai_regalloc.alloc(NR_DEFAULTFLAGS,p), p);
+              asml.InsertAfter(tai_regalloc.dealloc(NR_DEFAULTFLAGS,p), p);
+              IncludeRegInUsedRegs(NR_DEFAULTFLAGS,UsedRegs);
+              taicpu(p).oppostfix:=PF_S;
+
+              case taicpu(p).oper[2]^.shifterop^.shiftmode of
+                SM_LSL: taicpu(p).opcode:=A_LSL;
+                SM_LSR: taicpu(p).opcode:=A_LSR;
+                SM_ASR: taicpu(p).opcode:=A_ASR;
+                SM_ROR: taicpu(p).opcode:=A_ROR;
+              end;
+
+              if taicpu(p).oper[2]^.shifterop^.rs<>NR_NO then
+                taicpu(p).loadreg(2, taicpu(p).oper[2]^.shifterop^.rs)
+              else
+                taicpu(p).loadconst(2, taicpu(p).oper[2]^.shifterop^.shiftimm);
+              result:=true;
+            end
+        end;
+    end;
+
+
 begin
 begin
   casmoptimizer:=TCpuAsmOptimizer;
   casmoptimizer:=TCpuAsmOptimizer;
   cpreregallocscheduler:=TCpuPreRegallocScheduler;
   cpreregallocscheduler:=TCpuPreRegallocScheduler;

+ 69 - 44
compiler/arm/cgcpu.pas

@@ -47,7 +47,6 @@ unit cgcpu;
 
 
         procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
         procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
         procedure a_call_reg(list : TAsmList;reg: tregister);override;
         procedure a_call_reg(list : TAsmList;reg: tregister);override;
-        procedure a_call_ref(list : TAsmList;ref: treference);override;
 
 
         { move instructions }
         { move instructions }
         procedure a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);override;
         procedure a_load_reg_ref(list : TAsmList; fromsize, tosize: tcgsize; reg : tregister;const ref : treference);override;
@@ -681,16 +680,6 @@ unit cgcpu;
       end;
       end;
 
 
 
 
-    procedure tbasecgarm.a_call_ref(list : TAsmList;ref: treference);
-      begin
-        a_reg_alloc(list,NR_R12);
-        a_load_ref_reg(list,OS_ADDR,OS_ADDR,ref,NR_R12);
-        a_call_reg(list,NR_R12);
-        a_reg_dealloc(list,NR_R12);
-        include(current_procinfo.flags,pi_do_call);
-      end;
-
-
      procedure tcgarm.a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister);
      procedure tcgarm.a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister);
        begin
        begin
           a_op_const_reg_reg(list,op,size,a,reg,reg);
           a_op_const_reg_reg(list,op,size,a,reg,reg);
@@ -894,12 +883,26 @@ unit cgcpu;
 
 
     procedure tcgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
     procedure tcgarm.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: TOpCg; size: tcgsize; a: tcgint; src, dst: tregister;setflags : boolean;var ovloc : tlocation);
       var
       var
-        shift : byte;
+        shift, lsb, width : byte;
         tmpreg : tregister;
         tmpreg : tregister;
         so : tshifterop;
         so : tshifterop;
         l1 : longint;
         l1 : longint;
         imm1, imm2: DWord;
         imm1, imm2: DWord;
       begin
       begin
+        optimize_op_const(size, op, a);
+        case op of
+          OP_NONE:
+            begin
+              if src <> dst then
+                a_load_reg_reg(list, size, size, src, dst);
+              exit;
+            end;
+          OP_MOVE:
+            begin
+              a_load_const_reg(list, size, a, dst);
+              exit;
+            end;
+        end;
         ovloc.loc:=LOC_VOID;
         ovloc.loc:=LOC_VOID;
         if {$ifopt R+}(a<>-2147483648) and{$endif} not setflags and is_shifter_const(-a,shift) then
         if {$ifopt R+}(a<>-2147483648) and{$endif} not setflags and is_shifter_const(-a,shift) then
           case op of
           case op of
@@ -927,18 +930,13 @@ unit cgcpu;
               begin
               begin
                 if a>32 then
                 if a>32 then
                   internalerror(200308294);
                   internalerror(200308294);
-                if a<>0 then
-                  begin
-                    shifterop_reset(so);
-                    so.shiftmode:=opshift2shiftmode(op);
-                    if op = OP_ROL then
-                      so.shiftimm:=32-a
-                    else
-                      so.shiftimm:=a;
-                    list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
-                  end
+                shifterop_reset(so);
+                so.shiftmode:=opshift2shiftmode(op);
+                if op = OP_ROL then
+                  so.shiftimm:=32-a
                 else
                 else
-                 list.concat(taicpu.op_reg_reg(A_MOV,dst,src));
+                  so.shiftimm:=a;
+                list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
               end;
               end;
             else
             else
               {if (op in [OP_SUB, OP_ADD]) and
               {if (op in [OP_SUB, OP_ADD]) and
@@ -972,11 +970,7 @@ unit cgcpu;
         else
         else
           begin
           begin
             { there could be added some more sophisticated optimizations }
             { there could be added some more sophisticated optimizations }
-            if (op in [OP_MUL,OP_IMUL,OP_DIV,OP_IDIV]) and (a=1) then
-              a_load_reg_reg(list,size,size,src,dst)
-            else if (op in [OP_MUL,OP_IMUL]) and (a=0) then
-              a_load_const_reg(list,size,0,dst)
-            else if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
+            if (op in [OP_IMUL,OP_IDIV]) and (a=-1) then
               a_op_reg_reg(list,OP_NEG,size,src,dst)
               a_op_reg_reg(list,OP_NEG,size,src,dst)
             { we do this here instead in the peephole optimizer because
             { we do this here instead in the peephole optimizer because
               it saves us a register }
               it saves us a register }
@@ -1006,23 +1000,48 @@ unit cgcpu;
               begin
               begin
                 { nothing to do on success }
                 { nothing to do on success }
               end
               end
-            { x := y and 0; just clears a register, this sometimes gets generated on 64bit ops.
-              Just using mov x, #0 might allow some easier optimizations down the line. }
-            else if (op = OP_AND) and (dword(a)=0) then
-              list.concat(taicpu.op_reg_const(A_MOV,dst,0))
-            { x := y AND $FFFFFFFF just copies the register, so use mov for better optimizations }
-            else if (op = OP_AND) and (not(dword(a))=0) then
-              list.concat(taicpu.op_reg_reg(A_MOV,dst,src))
             { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
             { BIC clears the specified bits, while AND keeps them, using BIC allows to use a
               broader range of shifterconstants.}
               broader range of shifterconstants.}
             else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
             else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
               list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
               list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
+            { Doing two shifts instead of two bics might allow the peephole optimizer to fold the second shift
+              into the following instruction}
+            else if (op = OP_AND) and
+                    is_continuous_mask(a, lsb, width) and
+                    ((lsb = 0) or ((lsb + width) = 32)) then
+              begin
+                shifterop_reset(so);
+                if (width = 16) and
+                   (lsb = 0) and
+                   (current_settings.cputype >= cpu_armv6) then
+                  list.concat(taicpu.op_reg_reg(A_UXTH,dst,src))
+                else if (width = 8) and
+                   (lsb = 0) and
+                   (current_settings.cputype >= cpu_armv6) then
+                  list.concat(taicpu.op_reg_reg(A_UXTB,dst,src))
+                else if lsb = 0 then
+                  begin
+                    so.shiftmode:=SM_LSL;
+                    so.shiftimm:=32-width;
+                    list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
+                    so.shiftmode:=SM_LSR;
+                    list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
+                  end
+                else
+                  begin
+                    so.shiftmode:=SM_LSR;
+                    so.shiftimm:=lsb;
+                    list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,src,so));
+                    so.shiftmode:=SM_LSL;
+                    list.concat(taicpu.op_reg_reg_shifterop(A_MOV,dst,dst,so));
+                  end;
+              end
             else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
             else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
               begin
               begin
                 list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,imm1));
                 list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,imm1));
                 list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
                 list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
               end
               end
-            else if (op in [OP_ADD, OP_SUB, OP_OR]) and
+            else if (op in [OP_ADD, OP_SUB, OP_OR, OP_XOR]) and
                     not(cgsetflags or setflags) and
                     not(cgsetflags or setflags) and
                     split_into_shifter_const(a, imm1, imm2) then
                     split_into_shifter_const(a, imm1, imm2) then
               begin
               begin
@@ -1155,7 +1174,7 @@ unit cgcpu;
         if (ref.base=NR_NO) then
         if (ref.base=NR_NO) then
           begin
           begin
             if ref.shiftmode<>SM_None then
             if ref.shiftmode<>SM_None then
-              internalerror(200308294);
+              internalerror(2014020701);
             ref.base:=ref.index;
             ref.base:=ref.index;
             ref.index:=NR_NO;
             ref.index:=NR_NO;
           end;
           end;
@@ -1752,6 +1771,7 @@ unit cgcpu;
         { call instruction does not put anything on the stack }
         { call instruction does not put anything on the stack }
         stackmisalignment:=0;
         stackmisalignment:=0;
         tarmprocinfo(current_procinfo).stackpaddingreg:=High(TSuperRegister);
         tarmprocinfo(current_procinfo).stackpaddingreg:=High(TSuperRegister);
+        lastfloatreg:=RS_NO;
         if not(nostackframe) then
         if not(nostackframe) then
           begin
           begin
             firstfloatreg:=RS_NO;
             firstfloatreg:=RS_NO;
@@ -1978,7 +1998,9 @@ unit cgcpu;
           begin
           begin
             stackmisalignment:=0;
             stackmisalignment:=0;
             firstfloatreg:=RS_NO;
             firstfloatreg:=RS_NO;
+            lastfloatreg:=RS_NO;
             mmregs:=[];
             mmregs:=[];
+            saveregs:=[];
             case current_settings.fputype of
             case current_settings.fputype of
               fpu_fpa,
               fpu_fpa,
               fpu_fpa10,
               fpu_fpa10,
@@ -2203,7 +2225,7 @@ unit cgcpu;
         if (tmpref.base=NR_NO) then
         if (tmpref.base=NR_NO) then
           begin
           begin
             if tmpref.shiftmode<>SM_None then
             if tmpref.shiftmode<>SM_None then
-              internalerror(200308294);
+              internalerror(2014020702);
             if tmpref.signindex<0 then
             if tmpref.signindex<0 then
               internalerror(200312023);
               internalerror(200312023);
             tmpref.base:=tmpref.index;
             tmpref.base:=tmpref.index;
@@ -2271,6 +2293,7 @@ unit cgcpu;
         cg.a_label(current_procinfo.aktlocaldata,l);
         cg.a_label(current_procinfo.aktlocaldata,l);
         tmpref.symboldata:=current_procinfo.aktlocaldata.last;
         tmpref.symboldata:=current_procinfo.aktlocaldata.last;
         piclabel:=nil;
         piclabel:=nil;
+        tmpreg:=NR_NO;
 
 
         indirection_done:=false;
         indirection_done:=false;
         if assigned(ref.symbol) then
         if assigned(ref.symbol) then
@@ -3773,7 +3796,7 @@ unit cgcpu;
            OS_S32:
            OS_S32:
              oppostfix:=PF_None;
              oppostfix:=PF_None;
            else
            else
-             InternalError(200308297);
+             InternalError(200308298);
          end;
          end;
          if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
          if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
            begin
            begin
@@ -4255,7 +4278,7 @@ unit cgcpu;
            OS_S32:
            OS_S32:
              oppostfix:=PF_None;
              oppostfix:=PF_None;
            else
            else
-             InternalError(200308297);
+             InternalError(200308299);
          end;
          end;
          if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
          if (ref.alignment in [1,2]) and (ref.alignment<tcgsize2size[fromsize]) then
            begin
            begin
@@ -4403,7 +4426,7 @@ unit cgcpu;
             OP_SHL:
             OP_SHL:
               begin
               begin
                 if a>32 then
                 if a>32 then
-                  internalerror(200308294);
+                  internalerror(2014020703);
                 if a<>0 then
                 if a<>0 then
                   begin
                   begin
                     shifterop_reset(so);
                     shifterop_reset(so);
@@ -4417,7 +4440,7 @@ unit cgcpu;
             OP_ROL:
             OP_ROL:
               begin
               begin
                 if a>32 then
                 if a>32 then
-                  internalerror(200308294);
+                  internalerror(2014020704);
                 if a<>0 then
                 if a<>0 then
                   begin
                   begin
                     shifterop_reset(so);
                     shifterop_reset(so);
@@ -4431,7 +4454,7 @@ unit cgcpu;
             OP_ROR:
             OP_ROR:
               begin
               begin
                 if a>32 then
                 if a>32 then
-                  internalerror(200308294);
+                  internalerror(2014020705);
                 if a<>0 then
                 if a<>0 then
                   begin
                   begin
                     shifterop_reset(so);
                     shifterop_reset(so);
@@ -4712,6 +4735,7 @@ unit cgcpu;
         if not(nostackframe) then
         if not(nostackframe) then
           begin
           begin
             firstfloatreg:=RS_NO;
             firstfloatreg:=RS_NO;
+            lastfloatreg:=RS_NO;
             { save floating point registers? }
             { save floating point registers? }
             for r:=RS_F0 to RS_F7 do
             for r:=RS_F0 to RS_F7 do
               if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
               if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
@@ -4816,6 +4840,7 @@ unit cgcpu;
             stackmisalignment:=0;
             stackmisalignment:=0;
             { restore floating point register }
             { restore floating point register }
             firstfloatreg:=RS_NO;
             firstfloatreg:=RS_NO;
+            lastfloatreg:=RS_NO;
             { save floating point registers? }
             { save floating point registers? }
             for r:=RS_F0 to RS_F7 do
             for r:=RS_F0 to RS_F7 do
               if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
               if r in rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall) then
@@ -4910,7 +4935,7 @@ unit cgcpu;
         if (ref.base=NR_NO) then
         if (ref.base=NR_NO) then
           begin
           begin
             if ref.shiftmode<>SM_None then
             if ref.shiftmode<>SM_None then
-              internalerror(200308294);
+              internalerror(2014020706);
             ref.base:=ref.index;
             ref.base:=ref.index;
             ref.index:=NR_NO;
             ref.index:=NR_NO;
           end;
           end;

+ 16 - 28
compiler/arm/cpubase.pas

@@ -571,35 +571,23 @@ unit cpubase;
         i : longint;
         i : longint;
         imm : byte;
         imm : byte;
       begin
       begin
-        result:=false;
+        {Loading 0-255 is simple}
         if (d and $FF) = d then
         if (d and $FF) = d then
-          begin
-            result:=true;
-            exit;
-          end;
-        if ((d and $FF00FF00) = 0) and
-           ((d shr 16)=(d and $FFFF)) then
-          begin
-            result:=true;
-            exit;
-          end;
-        if ((d and $00FF00FF) = 0) and
-           ((d shr 16)=(d and $FFFF)) then
-          begin
-            result:=true;
-            exit;
-          end;
-        if ((d shr 16)=(d and $FFFF)) and
-           ((d shr 8)=(d and $FF)) then
-          begin
-            result:=true;
-            exit;
-          end;
-        if is_shifter_const(d,imm) then
-          begin
-            result:=true;
-            exit;
-          end;
+          result:=true
+        { If top and bottom are equal, check if either all 4 bytes are equal
+          or byte 0 and 2 or byte 1 and 3 are equal }
+        else if ((d shr 16)=(d and $FFFF)) and
+                (
+                  ((d and $FF00FF00) = 0) or
+                  ((d and $00FF00FF) = 0) or
+                  ((d shr 8)=(d and $FF))
+                ) then
+          result:=true
+        {Can an 8-bit value be shifted accordingly?}
+        else if is_shifter_const(d,imm) then
+          result:=true
+        else
+          result:=false;
       end;
       end;
     
     
     function is_continuous_mask(d : aint;var lsb, width: byte) : boolean;
     function is_continuous_mask(d : aint;var lsb, width: byte) : boolean;

+ 10 - 0
compiler/arm/cpuelf.pas

@@ -168,6 +168,16 @@ implementation
 
 
     TCB_SIZE = 8;
     TCB_SIZE = 8;
 
 
+    { ELF header e_flags }
+    EF_ARM_BE8          = $00800000;
+    EF_ARM_EABIMASK     = $FF000000;
+    EF_ARM_EABI_UNKNOWN = $00000000;
+    EF_ARM_EABI_VER1    = $01000000;
+    EF_ARM_EABI_VER2    = $02000000;
+    EF_ARM_EABI_VER3    = $03000000;
+    EF_ARM_EABI_VER4    = $04000000;
+    EF_ARM_EABI_VER5    = $05000000;
+
   { Using short identifiers to save typing. This ARM thing has more relocations
   { Using short identifiers to save typing. This ARM thing has more relocations
     than it has instructions... }
     than it has instructions... }
   const
   const

+ 15 - 14
compiler/arm/cpuinfo.pas

@@ -664,28 +664,29 @@ Const
        CPUARM_HAS_LDREX,
        CPUARM_HAS_LDREX,
        CPUARM_HAS_IDIV,
        CPUARM_HAS_IDIV,
        CPUARM_HAS_THUMB_IDIV,
        CPUARM_HAS_THUMB_IDIV,
-       CPUARM_HAS_THUMB2
+       CPUARM_HAS_THUMB2,
+       CPUARM_HAS_UMULL
       );
       );
 
 
  const
  const
    cpu_capabilities : array[tcputype] of set of tcpuflags =
    cpu_capabilities : array[tcputype] of set of tcpuflags =
      ( { cpu_none     } [],
      ( { cpu_none     } [],
        { cpu_armv3    } [],
        { cpu_armv3    } [],
-       { cpu_armv4    } [],
-       { cpu_armv4t   } [CPUARM_HAS_BX],
-       { cpu_armv5    } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ],
-       { cpu_armv5t   } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ],
-       { cpu_armv5te  } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP],
-       { cpu_armv5tej } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP],
-       { cpu_armv6    } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_LDREX],
-       { cpu_armv6k   } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_LDREX],
-       { cpu_armv6t2  } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_RBIT,CPUARM_HAS_LDREX,CPUARM_HAS_THUMB2],
-       { cpu_armv6z   } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_LDREX],
+       { cpu_armv4    } [CPUARM_HAS_UMULL],
+       { cpu_armv4t   } [CPUARM_HAS_BX,CPUARM_HAS_UMULL],
+       { cpu_armv5    } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_UMULL],
+       { cpu_armv5t   } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_UMULL],
+       { cpu_armv5te  } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_UMULL],
+       { cpu_armv5tej } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_UMULL],
+       { cpu_armv6    } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_LDREX,CPUARM_HAS_UMULL],
+       { cpu_armv6k   } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_LDREX,CPUARM_HAS_UMULL],
+       { cpu_armv6t2  } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_RBIT,CPUARM_HAS_LDREX,CPUARM_HAS_THUMB2,CPUARM_HAS_UMULL],
+       { cpu_armv6z   } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_LDREX,CPUARM_HAS_UMULL],
        { cpu_armv6m   } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_REV],
        { cpu_armv6m   } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_REV],
        { the identifier armv7 is should not be used, it is considered being equal to armv7a }
        { the identifier armv7 is should not be used, it is considered being equal to armv7a }
-       { cpu_armv7    } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_RBIT,CPUARM_HAS_LDREX,CPUARM_HAS_DMB,CPUARM_HAS_THUMB2],
-       { cpu_armv7a   } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_RBIT,CPUARM_HAS_LDREX,CPUARM_HAS_DMB,CPUARM_HAS_THUMB2],
-       { cpu_armv7r   } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_RBIT,CPUARM_HAS_LDREX,CPUARM_HAS_THUMB_IDIV,CPUARM_HAS_DMB,CPUARM_HAS_THUMB2],
+       { cpu_armv7    } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_RBIT,CPUARM_HAS_LDREX,CPUARM_HAS_DMB,CPUARM_HAS_THUMB2,CPUARM_HAS_UMULL],
+       { cpu_armv7a   } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_RBIT,CPUARM_HAS_LDREX,CPUARM_HAS_DMB,CPUARM_HAS_THUMB2,CPUARM_HAS_UMULL],
+       { cpu_armv7r   } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_BLX_LABEL,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_RBIT,CPUARM_HAS_LDREX,CPUARM_HAS_THUMB_IDIV,CPUARM_HAS_DMB,CPUARM_HAS_THUMB2,CPUARM_HAS_UMULL],
        { cpu_armv7m   } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_RBIT,CPUARM_HAS_LDREX,CPUARM_HAS_THUMB_IDIV,CPUARM_HAS_DMB,CPUARM_HAS_THUMB2],
        { cpu_armv7m   } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_RBIT,CPUARM_HAS_LDREX,CPUARM_HAS_THUMB_IDIV,CPUARM_HAS_DMB,CPUARM_HAS_THUMB2],
        { cpu_armv7em  } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_RBIT,CPUARM_HAS_LDREX,CPUARM_HAS_THUMB_IDIV,CPUARM_HAS_DMB,CPUARM_HAS_THUMB2]
        { cpu_armv7em  } [CPUARM_HAS_BX,CPUARM_HAS_BLX,CPUARM_HAS_CLZ,CPUARM_HAS_EDSP,CPUARM_HAS_REV,CPUARM_HAS_RBIT,CPUARM_HAS_LDREX,CPUARM_HAS_THUMB_IDIV,CPUARM_HAS_DMB,CPUARM_HAS_THUMB2]
      );
      );

+ 19 - 10
compiler/arm/cpupara.pas

@@ -714,16 +714,25 @@ unit cpupara;
               begin
               begin
                 paraloc^.loc:=LOC_REGISTER;
                 paraloc^.loc:=LOC_REGISTER;
                 paraloc^.register:=NR_FUNCTION_RETURN_REG;
                 paraloc^.register:=NR_FUNCTION_RETURN_REG;
-                if (result.intsize<>3) then
-                  begin
-                    paraloc^.size:=retcgsize;
-                    paraloc^.def:=result.def;
-                  end
-                else
-                  begin
-                    paraloc^.size:=OS_32;
-                    paraloc^.def:=u32inttype;
-                  end;
+                case result.IntSize of
+                  0:
+                    begin
+                      paraloc^.loc:=LOC_VOID;
+                      paraloc^.register:=NR_NO;
+                      paraloc^.size:=OS_NO;
+                      paraloc^.def:=voidpointertype;
+                    end;
+                  3:
+                    begin
+                      paraloc^.size:=OS_32;
+                      paraloc^.def:=u32inttype;
+                    end;
+                  else
+                    begin
+                      paraloc^.size:=retcgsize;
+                      paraloc^.def:=result.def;
+                    end;
+                end;
               end;
               end;
           end;
           end;
       end;
       end;

+ 9 - 4
compiler/arm/cpupi.pas

@@ -111,10 +111,14 @@ unit cpupi;
             localsize:=0;
             localsize:=0;
             for i:=0 to procdef.parast.SymList.Count-1 do
             for i:=0 to procdef.parast.SymList.Count-1 do
               if tsym(procdef.parast.SymList[i]).typ=paravarsym then
               if tsym(procdef.parast.SymList[i]).typ=paravarsym then
-                if is_open_string(tabstractnormalvarsym(procdef.parast.SymList[i]).vardef) then
-                  inc(localsize,256)
-                else
-                  inc(localsize,tabstractnormalvarsym(procdef.parast.SymList[i]).getsize);
+                begin
+                  if tabstractnormalvarsym(procdef.parast.SymList[i]).varspez in [vs_var,vs_out,vs_constref] then
+                    inc(localsize,4)
+                  else if is_open_string(tabstractnormalvarsym(procdef.parast.SymList[i]).vardef) then
+                    inc(localsize,256)
+                  else
+                    inc(localsize,tabstractnormalvarsym(procdef.parast.SymList[i]).getsize);
+                end;
 
 
             inc(stackframesize,localsize);
             inc(stackframesize,localsize);
 
 
@@ -154,6 +158,7 @@ unit cpupi;
                 begin
                 begin
                   { save floating point registers? }
                   { save floating point registers? }
                   firstfloatreg:=RS_NO;
                   firstfloatreg:=RS_NO;
+                  lastfloatreg:=RS_NO;
                   regs:=cg.rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
                   regs:=cg.rg[R_FPUREGISTER].used_in_proc-paramanager.get_volatile_registers_fpu(pocall_stdcall);
                   for r:=RS_F0 to RS_F7 do
                   for r:=RS_F0 to RS_F7 do
                     if r in regs then
                     if r in regs then

+ 42 - 18
compiler/arm/narmadd.pas

@@ -34,8 +34,11 @@ interface
           function  GetResFlags(unsigned:Boolean):TResFlags;
           function  GetResFlags(unsigned:Boolean):TResFlags;
        public
        public
           function pass_1 : tnode;override;
           function pass_1 : tnode;override;
+          function use_generic_mul32to64: boolean; override;
+          function use_generic_mul64bit: boolean; override;
        protected
        protected
           function first_addfloat: tnode; override;
           function first_addfloat: tnode; override;
+          procedure second_addordinal;override;
           procedure second_addfloat;override;
           procedure second_addfloat;override;
           procedure second_cmpfloat;override;
           procedure second_cmpfloat;override;
           procedure second_cmpordinal;override;
           procedure second_cmpordinal;override;
@@ -481,11 +484,7 @@ interface
           begin
           begin
             asmList := current_asmdata.CurrAsmList;
             asmList := current_asmdata.CurrAsmList;
             pass_left_right;
             pass_left_right;
-
-            if not(left.location.loc in [LOC_REGISTER,LOC_CREGISTER]) then
-              hlcg.location_force_reg(asmList,left.location,left.resultdef,left.resultdef,true);
-            if not(right.location.loc in [LOC_REGISTER,LOC_CREGISTER]) then
-              hlcg.location_force_reg(asmList,right.location,right.resultdef,right.resultdef,true);
+            force_reg_left_right(true, (left.location.loc<>LOC_CONSTANT) and (right.location.loc<>LOC_CONSTANT));
             set_result_location_reg;
             set_result_location_reg;
 
 
             { shortcuts to register64s }
             { shortcuts to register64s }
@@ -507,19 +506,7 @@ interface
       var
       var
         unsigned : boolean;
         unsigned : boolean;
       begin
       begin
-        { prepare for MUL64 inlining }
-        if (not(cs_check_overflow in current_settings.localswitches)) and
-           (nodetype in [muln]) and
-           (is_64bitint(left.resultdef)) and
-           (not (GenerateThumbCode)) then
-          begin
-            result := nil;
-            firstpass(left);
-            firstpass(right);
-            expectloc := LOC_REGISTER;
-          end
-        else
-          result:=inherited pass_1;
+        result:=inherited pass_1;
 
 
         if not(assigned(result)) then
         if not(assigned(result)) then
           begin
           begin
@@ -644,6 +631,43 @@ interface
         location.resflags:=getresflags(unsigned);
         location.resflags:=getresflags(unsigned);
       end;
       end;
 
 
+    const
+      multops: array[boolean] of TAsmOp = (A_SMULL, A_UMULL);
+
+    procedure tarmaddnode.second_addordinal;
+      var
+        unsigned: boolean;
+      begin
+        if (nodetype=muln) and
+           is_64bit(resultdef) and
+           not(GenerateThumbCode) and
+           (CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype]) then
+          begin
+            pass_left_right;
+            force_reg_left_right(true, false);
+            set_result_location_reg;
+            unsigned:=not(is_signed(left.resultdef)) or
+                      not(is_signed(right.resultdef));
+            current_asmdata.CurrAsmList.Concat(
+              taicpu.op_reg_reg_reg_reg(multops[unsigned], location.register64.reglo, location.register64.reghi,
+                                        left.location.register,right.location.register));
+          end
+        else
+          inherited second_addordinal;
+      end;
+
+    function tarmaddnode.use_generic_mul32to64: boolean;
+      begin
+        result:=GenerateThumbCode or not(CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype]);
+      end;
+
+    function tarmaddnode.use_generic_mul64bit: boolean;
+      begin
+        result:=GenerateThumbCode or
+          not(CPUARM_HAS_UMULL in cpu_capabilities[current_settings.cputype]) or
+          (cs_check_overflow in current_settings.localswitches);
+      end;
+
 begin
 begin
   caddnode:=tarmaddnode;
   caddnode:=tarmaddnode;
 end.
 end.

+ 91 - 77
compiler/arm/narmmat.pas

@@ -71,19 +71,20 @@ implementation
       var
       var
         power  : longint;
         power  : longint;
       begin
       begin
-        if (right.nodetype=ordconstn) and
-          (nodetype=divn) and
-          (ispowerof2(tordconstnode(right).value,power) or
-           (tordconstnode(right).value=1) or
-           (tordconstnode(right).value=int64(-1))
-          ) and
-          not(is_64bitint(resultdef)) then
+        if not(cs_check_overflow in current_settings.localswitches) and
+           (right.nodetype=ordconstn) and
+           (nodetype=divn) and
+           (ispowerof2(tordconstnode(right).value,power) or
+            (tordconstnode(right).value=1) or
+            (tordconstnode(right).value=int64(-1))
+           ) and
+           not(is_64bitint(resultdef)) then
           result:=nil
           result:=nil
-        else if ((GenerateThumbCode) and (CPUARM_HAS_THUMB_IDIV in cpu_capabilities[current_settings.cputype])) and
+        else if ((GenerateThumbCode or GenerateThumb2Code) and (CPUARM_HAS_THUMB_IDIV in cpu_capabilities[current_settings.cputype])) and
           (nodetype=divn) and
           (nodetype=divn) and
           not(is_64bitint(resultdef)) then
           not(is_64bitint(resultdef)) then
           result:=nil
           result:=nil
-        else if ((GenerateThumbCode) and (CPUARM_HAS_THUMB_IDIV in cpu_capabilities[current_settings.cputype])) and
+        else if ((GenerateThumbCode or GenerateThumb2Code) and (CPUARM_HAS_THUMB_IDIV in cpu_capabilities[current_settings.cputype])) and
           (nodetype=modn) and
           (nodetype=modn) and
           not(is_64bitint(resultdef)) then
           not(is_64bitint(resultdef)) then
           begin
           begin
@@ -220,7 +221,7 @@ implementation
         secondpass(left);
         secondpass(left);
         secondpass(right);
         secondpass(right);
 
 
-        if ((GenerateThumbCode) and (CPUARM_HAS_THUMB_IDIV in cpu_capabilities[current_settings.cputype])) and
+        if ((GenerateThumbCode or GenerateThumb2Code) and (CPUARM_HAS_THUMB_IDIV in cpu_capabilities[current_settings.cputype])) and
            (nodetype=divn) and
            (nodetype=divn) and
            not(is_64bitint(resultdef)) then
            not(is_64bitint(resultdef)) then
           begin
           begin
@@ -445,70 +446,80 @@ implementation
 
 
     procedure tarmshlshrnode.second_64bit;
     procedure tarmshlshrnode.second_64bit;
       var
       var
-        hreg64hi,hreg64lo,shiftreg:Tregister;
         v : TConstExprInt;
         v : TConstExprInt;
-        l1,l2,l3:Tasmlabel;
         so: tshifterop;
         so: tshifterop;
+        lreg, resreg: TRegister64;
 
 
       procedure emit_instr(p: tai);
       procedure emit_instr(p: tai);
         begin
         begin
           current_asmdata.CurrAsmList.concat(p);
           current_asmdata.CurrAsmList.concat(p);
         end;
         end;
 
 
-      {Reg1 gets shifted and moved into reg2, and is set to zero afterwards}
-      procedure shift_more_than_32(reg1, reg2: TRegister; shiftval: Byte ; sm: TShiftMode);
+      {This code is build like it gets called with sm=SM_LSR all the time, for SM_LSL dst* and src* have to be reversed}
+      procedure shift_less_than_32(srchi, srclo, dsthi, dstlo: TRegister; shiftval: Byte; sm: TShiftMode);
         begin
         begin
-          shifterop_reset(so); so.shiftimm:=shiftval - 32; so.shiftmode:=sm;
-          emit_instr(taicpu.op_reg_reg_shifterop(A_MOV, reg2, reg1, so));
-          emit_instr(taicpu.op_reg_const(A_MOV, reg1, 0));
-        end;
+          shifterop_reset(so);
 
 
-      procedure shift_less_than_32(reg1, reg2: TRegister; shiftval: Byte; shiftright: boolean);
-        begin
-          shifterop_reset(so); so.shiftimm:=shiftval;
-          if shiftright then so.shiftmode:=SM_LSR else so.shiftmode:=SM_LSL;
-          emit_instr(taicpu.op_reg_reg_shifterop(A_MOV, reg1, reg1, so));
+          so.shiftimm:=shiftval;
+          so.shiftmode:=sm;
+          emit_instr(taicpu.op_reg_reg_shifterop(A_MOV, dstlo, srclo, so));
+          emit_instr(taicpu.op_reg_reg_shifterop(A_MOV, dsthi, srchi, so));
 
 
-          if shiftright then so.shiftmode:=SM_LSL else so.shiftmode:=SM_LSR;
+          if sm = SM_LSR then so.shiftmode:=SM_LSL else so.shiftmode:=SM_LSR;
           so.shiftimm:=32-shiftval;
           so.shiftimm:=32-shiftval;
-          emit_instr(taicpu.op_reg_reg_reg_shifterop(A_ORR, reg1, reg1, reg2, so));
+          emit_instr(taicpu.op_reg_reg_reg_shifterop(A_ORR, dstlo, dstlo, srchi, so));
 
 
-          if shiftright then so.shiftmode:=SM_LSR else so.shiftmode:=SM_LSL;
-          so.shiftimm:=shiftval;
-          emit_instr(taicpu.op_reg_reg_shifterop(A_MOV, reg2, reg2, so));
         end;
         end;
 
 
-      procedure shift_by_variable(reg1, reg2, shiftval: TRegister; shiftright: boolean);
+      {This code is build like it gets called with sm=SM_LSR all the time, for SM_LSL dst* and src* have to be reversed
+       This will generate
+         mov   shiftval1, shiftval
+         cmp   shiftval1, #64
+         movcs shiftval1, #64
+         rsb   shiftval2, shiftval1, #32
+         mov   dstlo, srclo, lsr shiftval1
+         mov   dsthi, srchi, lsr shiftval1
+         orr   dstlo, srchi, lsl shiftval2
+         subs  shiftval2, shiftval1, #32
+         movpl dstlo, srchi, lsr shiftval2
+      }
+      procedure shift_by_variable(srchi, srclo, dsthi, dstlo, shiftval: TRegister; sm: TShiftMode);
         var
         var
-          shiftval2:TRegister;
+          shiftval1,shiftval2:TRegister;
         begin
         begin
           shifterop_reset(so);
           shifterop_reset(so);
+          shiftval1:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
           shiftval2:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
           shiftval2:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
 
 
+          cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, shiftval, shiftval1);
+
+          {The ARM barrel shifter only considers the lower 8 bits of a register for the shift}
           cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
           cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
+          emit_instr(taicpu.op_reg_const(A_CMP, shiftval1, 64));
+          emit_instr(setcondition(taicpu.op_reg_const(A_MOV, shiftval1, 64), C_CS));
+          cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
 
 
-          {Do we shift more than 32 bits?}
-          emit_instr(setoppostfix(taicpu.op_reg_reg_const(A_RSB, shiftval2, shiftval, 32), PF_S));
+          {Calculate how much the upper register needs to be shifted left}
+          emit_instr(taicpu.op_reg_reg_const(A_RSB, shiftval2, shiftval1, 32));
 
 
-          {This part cares for 32 bits and more}
-          emit_instr(setcondition(taicpu.op_reg_reg_const(A_SUB, shiftval2, shiftval, 32), C_MI));
-          if shiftright then so.shiftmode:=SM_LSR else so.shiftmode:=SM_LSL;
-          so.rs:=shiftval2;
-          emit_instr(setcondition(taicpu.op_reg_reg_shifterop(A_MOV, reg2, reg1, so), C_MI));
+          so.shiftmode:=sm;
+          so.rs:=shiftval1;
+
+          {Shift and zerofill the hi+lo register}
+          emit_instr(taicpu.op_reg_reg_shifterop(A_MOV, dstlo, srclo, so));
+          emit_instr(taicpu.op_reg_reg_shifterop(A_MOV, dsthi, srchi, so));
 
 
-          {Less than 32 bits}
-          so.rs:=shiftval;
-          emit_instr(setcondition(taicpu.op_reg_reg_shifterop(A_MOV, reg2, reg2, so), C_PL));
-          if shiftright then so.shiftmode:=SM_LSL else so.shiftmode:=SM_LSR;
+          {Fold in the lower 32-shiftval bits}
+          if sm = SM_LSR then so.shiftmode:=SM_LSL else so.shiftmode:=SM_LSR;
           so.rs:=shiftval2;
           so.rs:=shiftval2;
-          emit_instr(setcondition(taicpu.op_reg_reg_reg_shifterop(A_ORR, reg2, reg2, reg1, so), C_PL));
+          emit_instr(taicpu.op_reg_reg_reg_shifterop(A_ORR, dstlo, dstlo, srchi, so));
 
 
-          cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
+          cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
+          emit_instr(setoppostfix(taicpu.op_reg_reg_const(A_SUB, shiftval2, shiftval1, 32), PF_S));
 
 
-          {Final adjustments}
-          if shiftright then so.shiftmode:=SM_LSR else so.shiftmode:=SM_LSL;
-          so.rs:=shiftval;
-          emit_instr(taicpu.op_reg_reg_shifterop(A_MOV, reg1, reg1, so));
+          so.shiftmode:=sm;
+          emit_instr(setcondition(taicpu.op_reg_reg_shifterop(A_MOV, dstlo, srchi, so), C_PL));
+          cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
         end;
         end;
 
 
       begin
       begin
@@ -519,13 +530,17 @@ implementation
         end;
         end;
 
 
         location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
         location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
+        location.register64.reghi:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
+        location.register64.reglo:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
 
 
         { load left operator in a register }
         { load left operator in a register }
-        hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
-        hreg64hi:=left.location.register64.reghi;
-        hreg64lo:=left.location.register64.reglo;
-        location.register64.reghi:=hreg64hi;
-        location.register64.reglo:=hreg64lo;
+        if not(left.location.loc in [LOC_CREGISTER,LOC_REGISTER]) or
+           (left.location.size<>OS_64) then
+          hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,true);
+
+        lreg := left.location.register64;
+        resreg := location.register64;
+        shifterop_reset(so);
 
 
         { shifting by a constant directly coded: }
         { shifting by a constant directly coded: }
         if (right.nodetype=ordconstn) then
         if (right.nodetype=ordconstn) then
@@ -537,8 +552,8 @@ implementation
                 begin
                 begin
                   {Shift left by one by 2 simple 32bit additions}
                   {Shift left by one by 2 simple 32bit additions}
                   cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                   cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
-                  emit_instr(setoppostfix(taicpu.op_reg_reg_reg(A_ADD, hreg64lo, hreg64lo, hreg64lo), PF_S));
-                  emit_instr(taicpu.op_reg_reg_reg(A_ADC, hreg64hi, hreg64hi, hreg64hi));
+                  emit_instr(setoppostfix(taicpu.op_reg_reg_reg(A_ADD, resreg.reglo, lreg.reglo, lreg.reglo), PF_S));
+                  emit_instr(taicpu.op_reg_reg_reg(A_ADC, resreg.reghi, lreg.reghi, lreg.reghi));
                   cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                   cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                 end
                 end
               else
               else
@@ -546,42 +561,41 @@ implementation
                   {Shift right by first shifting hi by one and then using RRX (rotate right extended), which rotates through the carry}
                   {Shift right by first shifting hi by one and then using RRX (rotate right extended), which rotates through the carry}
                   shifterop_reset(so); so.shiftmode:=SM_LSR; so.shiftimm:=1;
                   shifterop_reset(so); so.shiftmode:=SM_LSR; so.shiftimm:=1;
                   cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                   cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
-                  emit_instr(setoppostfix(taicpu.op_reg_reg_shifterop(A_MOV, hreg64hi, hreg64hi, so), PF_S));
+                  emit_instr(setoppostfix(taicpu.op_reg_reg_shifterop(A_MOV, resreg.reghi, lreg.reghi, so), PF_S));
                   so.shiftmode:=SM_RRX; so.shiftimm:=0; {RRX does NOT have a shift amount}
                   so.shiftmode:=SM_RRX; so.shiftimm:=0; {RRX does NOT have a shift amount}
-                  emit_instr(taicpu.op_reg_reg_shifterop(A_MOV, hreg64lo, hreg64lo, so));
+                  emit_instr(taicpu.op_reg_reg_shifterop(A_MOV, resreg.reglo, lreg.reglo, so));
                   cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                   cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
                 end
                 end
-            {A 32bit shift just replaces a register and clears the other}
-            else if v = 32 then
+            {Clear one register and use the cg to generate a normal 32-bit shift}
+            else if v >= 32 then
+              if nodetype=shln then
               begin
               begin
-                if nodetype=shln then
-                  emit_instr(taicpu.op_reg_const(A_MOV, hreg64hi, 0))
-                else
-                  emit_instr(taicpu.op_reg_const(A_MOV, hreg64lo, 0));
-                location.register64.reghi:=hreg64lo;
-                location.register64.reglo:=hreg64hi;
+                emit_instr(taicpu.op_reg_const(A_MOV, resreg.reglo, 0));
+                cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHL,OS_32,v.uvalue-32,lreg.reglo,resreg.reghi);
               end
               end
-            {Shift LESS than 32}
-            else if (v < 32) and (v > 1) then
-              if nodetype=shln then
-                shift_less_than_32(hreg64hi, hreg64lo, v.uvalue, false)
               else
               else
-                shift_less_than_32(hreg64lo, hreg64hi, v.uvalue, true)
-            {More than 32}
-            else if v > 32 then
+              begin
+                emit_instr(taicpu.op_reg_const(A_MOV, resreg.reghi, 0));
+                cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHR,OS_32,v.uvalue-32,lreg.reghi,resreg.reglo);
+              end
+            {Shift LESS than 32, thats the tricky one}
+            else if (v < 32) and (v > 1) then
               if nodetype=shln then
               if nodetype=shln then
-                shift_more_than_32(hreg64lo, hreg64hi, v.uvalue, SM_LSL)
+                shift_less_than_32(lreg.reglo, lreg.reghi, resreg.reglo, resreg.reghi, v.uvalue, SM_LSL)
               else
               else
-                shift_more_than_32(hreg64hi, hreg64lo, v.uvalue, SM_LSR);
+                shift_less_than_32(lreg.reghi, lreg.reglo, resreg.reghi, resreg.reglo, v.uvalue, SM_LSR);
           end
           end
         else
         else
           begin
           begin
-            { force right operators in a register }
-            hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,resultdef,false);
+            { force right operator into a register }
+            if not(right.location.loc in [LOC_CREGISTER,LOC_REGISTER]) or
+               (right.location.size<>OS_32) then
+              hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,u32inttype,true);
+
             if nodetype = shln then
             if nodetype = shln then
-              shift_by_variable(hreg64lo,hreg64hi,right.location.register, false)
+              shift_by_variable(lreg.reglo, lreg.reghi, resreg.reglo, resreg.reghi, right.location.register, SM_LSL)
             else
             else
-              shift_by_variable(hreg64hi,hreg64lo,right.location.register, true);
+              shift_by_variable(lreg.reghi, lreg.reglo, resreg.reghi, resreg.reglo, right.location.register, SM_LSR);
           end;
           end;
       end;
       end;
 
 

+ 48 - 3
compiler/arm/rgcpu.pas

@@ -28,7 +28,7 @@ unit rgcpu;
   interface
   interface
 
 
      uses
      uses
-       aasmbase,aasmtai,aasmdata,aasmcpu,
+       aasmbase,aasmtai,aasmsym,aasmdata,aasmcpu,
        cgbase,cgutils,
        cgbase,cgutils,
        cpubase,
        cpubase,
        {$ifdef DEBUG_SPILLING}
        {$ifdef DEBUG_SPILLING}
@@ -43,6 +43,8 @@ unit rgcpu;
        public
        public
          procedure do_spill_read(list: TAsmList; pos: tai; const spilltemp: treference; tempreg: tregister; orgsupreg: tsuperregister); override;
          procedure do_spill_read(list: TAsmList; pos: tai; const spilltemp: treference; tempreg: tregister; orgsupreg: tsuperregister); override;
          procedure do_spill_written(list: TAsmList; pos: tai; const spilltemp: treference; tempreg: tregister; orgsupreg: tsuperregister); override;
          procedure do_spill_written(list: TAsmList; pos: tai; const spilltemp: treference; tempreg: tregister; orgsupreg: tsuperregister); override;
+         function do_spill_replace(list : TAsmList;instr : tai_cpu_abstract_sym;
+           orgreg : tsuperregister;const spilltemp : treference) : boolean;override;
          procedure add_constraints(reg:tregister);override;
          procedure add_constraints(reg:tregister);override;
          function  get_spill_subreg(r:tregister) : tsubregister;override;
          function  get_spill_subreg(r:tregister) : tsubregister;override;
        end;
        end;
@@ -126,9 +128,10 @@ unit rgcpu;
                     end;
                     end;
                 end;
                 end;
               A_MLA,
               A_MLA,
+              A_MLS,
               A_MUL:
               A_MUL:
                 begin
                 begin
-                  if current_settings.cputype<cpu_armv6 then
+                  if (current_settings.cputype<cpu_armv6) and (taicpu(p).opcode<>A_MLS) then
                     add_edge(getsupreg(taicpu(p).oper[0]^.reg),getsupreg(taicpu(p).oper[1]^.reg));
                     add_edge(getsupreg(taicpu(p).oper[0]^.reg),getsupreg(taicpu(p).oper[1]^.reg));
                    add_edge(getsupreg(taicpu(p).oper[0]^.reg),RS_R13);
                    add_edge(getsupreg(taicpu(p).oper[0]^.reg),RS_R13);
                    add_edge(getsupreg(taicpu(p).oper[0]^.reg),RS_R15);
                    add_edge(getsupreg(taicpu(p).oper[0]^.reg),RS_R15);
@@ -136,7 +139,7 @@ unit rgcpu;
                    add_edge(getsupreg(taicpu(p).oper[1]^.reg),RS_R15);
                    add_edge(getsupreg(taicpu(p).oper[1]^.reg),RS_R15);
                    add_edge(getsupreg(taicpu(p).oper[2]^.reg),RS_R13);
                    add_edge(getsupreg(taicpu(p).oper[2]^.reg),RS_R13);
                    add_edge(getsupreg(taicpu(p).oper[2]^.reg),RS_R15);
                    add_edge(getsupreg(taicpu(p).oper[2]^.reg),RS_R15);
-                   if taicpu(p).opcode=A_MLA then
+                   if taicpu(p).opcode<>A_MUL then
                      begin
                      begin
                        add_edge(getsupreg(taicpu(p).oper[3]^.reg),RS_R13);
                        add_edge(getsupreg(taicpu(p).oper[3]^.reg),RS_R13);
                        add_edge(getsupreg(taicpu(p).oper[3]^.reg),RS_R15);
                        add_edge(getsupreg(taicpu(p).oper[3]^.reg),RS_R15);
@@ -279,6 +282,48 @@ unit rgcpu;
       end;
       end;
 
 
 
 
+    function trgcpu.do_spill_replace(list:TAsmList;instr:tai_cpu_abstract_sym;orgreg:tsuperregister;const spilltemp:treference):boolean;
+      var
+        b : byte;
+      begin
+        result:=false;
+        if abs(spilltemp.offset)>4095 then
+          exit;
+
+        { Replace 'mov  dst,orgreg' with 'ldr  dst,spilltemp'
+          and     'mov  orgreg,src' with 'str  dst,spilltemp' }
+        with instr do
+          begin
+            if (opcode=A_MOV) and (ops=2) and (oper[1]^.typ=top_reg) and (oper[0]^.typ=top_reg) then
+              begin
+                if (getregtype(oper[0]^.reg)=regtype) and
+                   (get_alias(getsupreg(oper[0]^.reg))=orgreg) and
+                   (get_alias(getsupreg(oper[1]^.reg))<>orgreg) then
+                  begin
+                    { str expects the register in oper[0] }
+                    oper[0]^.typ:=top_reg;
+                    oper[0]^.reg:=oper[1]^.reg;
+                    oper[1]^.typ:=top_ref;
+                    new(oper[1]^.ref);
+                    oper[1]^.ref^:=spilltemp;
+                    opcode:=A_STR;
+                    result:=true;
+                  end
+                else if (getregtype(oper[1]^.reg)=regtype) and
+                   (get_alias(getsupreg(oper[1]^.reg))=orgreg) and
+                   (get_alias(getsupreg(oper[0]^.reg))<>orgreg) then
+                  begin
+                    oper[1]^.typ:=top_ref;
+                    new(oper[1]^.ref);
+                    oper[1]^.ref^:=spilltemp;
+                    opcode:=A_LDR;
+                    result:=true;
+                  end;
+              end;
+          end;
+      end;
+
+
     procedure trgcpu.add_constraints(reg:tregister);
     procedure trgcpu.add_constraints(reg:tregister);
       var
       var
         supreg,i : Tsuperregister;
         supreg,i : Tsuperregister;

+ 31 - 1
compiler/assemble.pas

@@ -32,6 +32,9 @@ interface
 
 
 
 
     uses
     uses
+{$ifdef hasamiga}
+      exec,
+{$endif}
       SysUtils,
       SysUtils,
       systems,globtype,globals,aasmbase,aasmtai,aasmdata,ogbase,finput;
       systems,globtype,globals,aasmbase,aasmtai,aasmdata,ogbase,finput;
 
 
@@ -297,7 +300,7 @@ Implementation
         var
         var
           dir : TRawByteSearchRec;
           dir : TRawByteSearchRec;
         begin
         begin
-          if findfirst(s+source_info.dirsep+'*'+AExt,faAnyFile,dir) = 0 then
+          if findfirst(FixPath(s,false)+'*'+AExt,faAnyFile,dir) = 0 then
             begin
             begin
               repeat
               repeat
                 DeleteFile(s+source_info.dirsep+dir.name);
                 DeleteFile(s+source_info.dirsep+dir.name);
@@ -606,13 +609,36 @@ Implementation
            Replace(result,'$NOWARN','')
            Replace(result,'$NOWARN','')
          else
          else
            Replace(result,'$NOWARN','-W');
            Replace(result,'$NOWARN','-W');
+         Replace(result,'$EXTRAOPT',asmextraopt);
       end;
       end;
 
 
 
 
     procedure TExternalAssembler.AsmCreate(Aplace:tcutplace);
     procedure TExternalAssembler.AsmCreate(Aplace:tcutplace);
+{$ifdef hasamiga}
+      var
+        tempFileName: TPathStr;
+{$endif}
       begin
       begin
         if SmartAsm then
         if SmartAsm then
          NextSmartName(Aplace);
          NextSmartName(Aplace);
+{$ifdef hasamiga}
+        { on Amiga/MorphOS try to redirect .s files to the T: assign, which is
+          for temp files, and usually (default setting) located in the RAM: drive. 
+          This highly improves assembling speed for complex projects like the 
+          compiler itself, especially on hardware with slow disk I/O.
+          Consider this as a poor man's pipe on Amiga, because real pipe handling 
+          would be much more complex and error prone to implement. (KB) }
+        if (([cs_asm_extern,cs_asm_leave,cs_link_on_target] * current_settings.globalswitches) = []) then
+         begin
+          { try to have an unique name for the .s file }
+          tempFileName:=HexStr(FindTask(nil))+ExtractFileName(AsmFileName);
+{$ifndef morphos}
+          { old Amiga RAM: handler only allows filenames up to 30 char }
+          if Length(tempFileName) < 30 then
+{$endif}
+          AsmFileName:='T:'+tempFileName;
+         end;
+{$endif}
 {$ifdef hasunix}
 {$ifdef hasunix}
         if DoPipe then
         if DoPipe then
          begin
          begin
@@ -1357,6 +1383,8 @@ Implementation
         relative_reloc: boolean;
         relative_reloc: boolean;
       begin
       begin
         fillchar(zerobuf,sizeof(zerobuf),0);
         fillchar(zerobuf,sizeof(zerobuf),0);
+        fillchar(objsym,sizeof(objsym),0);
+        fillchar(objsymend,sizeof(objsymend),0);
         { main loop }
         { main loop }
         while assigned(hp) do
         while assigned(hp) do
          begin
          begin
@@ -1460,6 +1488,8 @@ Implementation
                        { Required for DWARF2 support under Windows }
                        { Required for DWARF2 support under Windows }
                        ObjData.writereloc(Tai_const(hp).symofs,sizeof(longint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_SECREL32);
                        ObjData.writereloc(Tai_const(hp).symofs,sizeof(longint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_SECREL32);
                      end;
                      end;
+                   aitconst_gotoff_symbol:
+                     ObjData.writereloc(Tai_const(hp).symofs,sizeof(longint),Objdata.SymbolRef(tai_const(hp).sym),RELOC_GOTOFF);
                    aitconst_uleb128bit,
                    aitconst_uleb128bit,
                    aitconst_sleb128bit :
                    aitconst_sleb128bit :
                      begin
                      begin

+ 7 - 1
compiler/avr/aasmcpu.pas

@@ -246,12 +246,16 @@ implementation
       begin
       begin
         result:=operand_read;
         result:=operand_read;
         case opcode of
         case opcode of
+          A_CLR,
+          A_MOV, A_MOVW:
+           if opnr=0 then
+             result:=operand_write;
           A_CP,A_CPC,A_CPI,A_PUSH :
           A_CP,A_CPC,A_CPI,A_PUSH :
             ;
             ;
           else
           else
             begin
             begin
               if opnr=0 then
               if opnr=0 then
-                result:=operand_write;
+                result:=operand_readwrite;
             end;
             end;
         end;
         end;
       end;
       end;
@@ -416,6 +420,8 @@ implementation
                       end;
                       end;
                     ait_align:
                     ait_align:
                       inc(CurrOffset,tai_align(curtai).aligntype);
                       inc(CurrOffset,tai_align(curtai).aligntype);
+                    ait_weak,
+                    ait_set,
                     ait_marker:
                     ait_marker:
                       ;
                       ;
                     ait_label:
                     ait_label:

+ 1 - 1
compiler/avr/agavrgas.pas

@@ -203,7 +203,7 @@ unit agavrgas;
 
 
             idtxt  : 'AS';
             idtxt  : 'AS';
             asmbin : 'as';
             asmbin : 'as';
-            asmcmd : '-o $OBJ $ASM';
+            asmcmd : '-o $OBJ $EXTRAOPT $ASM';
             supported_targets : [system_avr_embedded];
             supported_targets : [system_avr_embedded];
             flags : [af_needar,af_smartlink_sections];
             flags : [af_needar,af_smartlink_sections];
             labelprefix : '.L';
             labelprefix : '.L';

+ 233 - 14
compiler/avr/aoptcpu.pas

@@ -28,10 +28,12 @@ Unit aoptcpu;
 
 
 Interface
 Interface
 
 
-uses cpubase, aasmtai, aopt, aoptcpub;
+uses cpubase, cgbase, aasmtai, aopt, aoptcpub;
 
 
 Type
 Type
   TCpuAsmOptimizer = class(TAsmOptimizer)
   TCpuAsmOptimizer = class(TAsmOptimizer)
+    Function GetNextInstructionUsingReg(Current: tai; Var Next: tai;reg : TRegister): Boolean;
+
     { uses the same constructor as TAopObj }
     { uses the same constructor as TAopObj }
     function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
     function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
     procedure PeepHoleOptPass2;override;
     procedure PeepHoleOptPass2;override;
@@ -40,7 +42,9 @@ Type
 Implementation
 Implementation
 
 
   uses
   uses
-    aasmbase,aasmcpu,cgbase;
+    cpuinfo,
+    aasmbase,aasmcpu,
+    globals,globtype;
 
 
   function CanBeCond(p : tai) : boolean;
   function CanBeCond(p : tai) : boolean;
     begin
     begin
@@ -48,40 +52,255 @@ Implementation
     end;
     end;
 
 
 
 
+  function TCpuAsmOptimizer.GetNextInstructionUsingReg(Current: tai;
+    var Next: tai; reg: TRegister): Boolean;
+    begin
+      Next:=Current;
+      repeat
+        Result:=GetNextInstruction(Next,Next);
+      until not(cs_opt_level3 in current_settings.optimizerswitches) or not(Result) or (Next.typ<>ait_instruction) or (RegInInstruction(reg,Next)) or
+        (is_calljmp(taicpu(Next).opcode));
+    end;
+
+
   function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
   function TCpuAsmOptimizer.PeepHoleOptPass1Cpu(var p: tai): boolean;
     var
     var
-      next1: tai;
+      hp1,hp2,hp3: tai;
+      alloc, dealloc: tai_regalloc;
+      i: integer;
     begin
     begin
       result := false;
       result := false;
       case p.typ of
       case p.typ of
         ait_instruction:
         ait_instruction:
           begin
           begin
             case taicpu(p).opcode of
             case taicpu(p).opcode of
+              A_LDI:
+                begin
+                  { turn
+                    ldi reg0, imm
+                    cp reg1, reg0
+                    dealloc reg0
+                    into
+                    cpi reg1, imm
+                  }
+                  if (taicpu(p).ops=2) and
+                     (taicpu(p).oper[0]^.typ=top_reg) and
+                     (taicpu(p).oper[1]^.typ=top_const) and
+                     GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+                     (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
+                     (taicpu(hp1).opcode=A_CP) and
+                     (taicpu(hp1).ops=2) and
+                     (taicpu(hp1).oper[1]^.typ=top_reg) and
+                     (getsupreg(taicpu(hp1).oper[0]^.reg) in [16..31]) and
+                     (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
+                     assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
+                    begin
+                      taicpu(hp1).opcode:=A_CPI;
+                      taicpu(hp1).loadconst(1, taicpu(p).oper[1]^.val);
+
+                      alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
+                      dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
+
+                      if assigned(alloc) and assigned(dealloc) then
+                        begin
+                          asml.Remove(alloc);
+                          alloc.Free;
+                          asml.Remove(dealloc);
+                          dealloc.Free;
+                        end;
+
+                      GetNextInstruction(p,hp1);
+                      asml.Remove(p);
+                      p.Free;
+                      p:=hp1;
+
+                      result:=true;
+                    end;
+                end;
+              A_CLR:
+                begin
+                  { turn the common
+                    clr rX
+                    mov/ld rX, rY
+                    into
+                    mov/ld rX, rY
+                  }
+                  if (taicpu(p).ops=1) and
+                     (taicpu(p).oper[0]^.typ=top_reg) and
+                     GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+                     (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
+                     (hp1.typ=ait_instruction) and
+                     (taicpu(hp1).opcode in [A_MOV,A_LD]) and
+                     (taicpu(hp1).ops>0) and
+                     (taicpu(hp1).oper[0]^.typ=top_reg) and
+                     (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) then
+                    begin
+                      asml.Remove(p);
+                      p.Free;
+                      p:=hp1;
+                      result:=true;
+                    end
+                  { turn
+                    clr rX
+                    ...
+                    adc rY, rX
+                    into
+                    ...
+                    adc rY, r1
+                  }
+                  else if (taicpu(p).ops=1) and
+                     (taicpu(p).oper[0]^.typ=top_reg) and
+                     GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+                     (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
+                     (hp1.typ=ait_instruction) and
+                     (taicpu(hp1).opcode in [A_ADC,A_SBC]) and
+                     (taicpu(hp1).ops=2) and
+                     (taicpu(hp1).oper[1]^.typ=top_reg) and
+                     (taicpu(hp1).oper[1]^.reg=taicpu(p).oper[0]^.reg) and
+                     (taicpu(hp1).oper[0]^.reg<>taicpu(p).oper[0]^.reg) and
+                     assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
+                    begin
+                      taicpu(hp1).oper[1]^.reg:=NR_R1;
+
+                      alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
+                      dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
+
+                      if assigned(alloc) and assigned(dealloc) then
+                        begin
+                          asml.Remove(alloc);
+                          alloc.Free;
+                          asml.Remove(dealloc);
+                          dealloc.Free;
+                        end;
+
+                      GetNextInstruction(p,hp1);
+                      asml.Remove(p);
+                      p.free;
+                      p:=hp1;
+
+                      result:=true;
+                    end;
+                end;
+              A_PUSH:
+                begin
+                  { turn
+                    push reg0
+                    push reg1
+                    pop reg3
+                    pop reg2
+                    into
+                    movw reg2,reg0
+                  }
+                  if (taicpu(p).ops=1) and
+                     (taicpu(p).oper[0]^.typ=top_reg) and
+                     GetNextInstruction(p,hp1) and
+                     (hp1.typ=ait_instruction) and
+                     (taicpu(hp1).opcode=A_PUSH) and
+                     (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and
+                     ((getsupreg(taicpu(p).oper[0]^.reg) mod 2)=0) and
+
+                     GetNextInstruction(hp1,hp2) and
+                     (hp2.typ=ait_instruction) and
+                     (taicpu(hp2).opcode=A_POP) and
+
+                     GetNextInstruction(hp2,hp3) and
+                     (hp3.typ=ait_instruction) and
+                     (taicpu(hp3).opcode=A_POP) and
+                     (getsupreg(taicpu(hp2).oper[0]^.reg)=getsupreg(taicpu(hp3).oper[0]^.reg)+1) and
+                     ((getsupreg(taicpu(hp3).oper[0]^.reg) mod 2)=0) then
+                    begin
+                      taicpu(p).ops:=2;
+                      taicpu(p).opcode:=A_MOVW;
+
+                      taicpu(p).loadreg(1, taicpu(p).oper[0]^.reg);
+                      taicpu(p).loadreg(0, taicpu(hp3).oper[0]^.reg);
+
+                      asml.Remove(hp1);
+                      hp1.Free;
+                      asml.Remove(hp2);
+                      hp2.Free;
+                      asml.Remove(hp3);
+                      hp3.Free;
+
+                      result:=true;
+                    end;
+                end;
               A_MOV:
               A_MOV:
                 begin
                 begin
+                  { turn
+                    mov reg0, reg1
+                    push reg0
+                    dealloc reg0
+                    into
+                    push reg1
+                  }
+                  if (taicpu(p).ops=2) and
+                     (taicpu(p).oper[0]^.typ = top_reg) and
+                     (taicpu(p).oper[1]^.typ = top_reg) and
+                     GetNextInstructionUsingReg(p,hp1,taicpu(p).oper[0]^.reg) and
+                     (not RegModifiedBetween(taicpu(p).oper[0]^.reg, p, hp1)) and
+                     (hp1.typ = ait_instruction) and
+                     (taicpu(hp1).opcode in [A_PUSH,A_MOV,A_CP,A_CPC,A_ADD,A_SUB,A_EOR,A_AND,A_OR]) and
+                     RegInInstruction(taicpu(p).oper[0]^.reg, hp1) and
+                     (not RegModifiedByInstruction(taicpu(p).oper[0]^.reg, hp1)) and
+                     {(taicpu(hp1).ops=1) and
+                     (taicpu(hp1).oper[0]^.typ = top_reg) and
+                     (taicpu(hp1).oper[0]^.reg=taicpu(p).oper[0]^.reg) and  }
+                     assigned(FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) then
+                    begin
+                      for i := 0 to taicpu(hp1).ops-1 do
+                        if taicpu(hp1).oper[i]^.typ=top_reg then
+                          if taicpu(hp1).oper[i]^.reg=taicpu(p).oper[0]^.reg then
+                            taicpu(hp1).oper[i]^.reg:=taicpu(p).oper[1]^.reg;
+
+                      alloc:=FindRegAllocBackward(taicpu(p).oper[0]^.reg,tai(p.Previous));
+                      dealloc:=FindRegDeAlloc(taicpu(p).oper[0]^.reg,tai(hp1.Next));
+
+                      if assigned(alloc) and assigned(dealloc) then
+                        begin
+                          asml.Remove(alloc);
+                          alloc.Free;
+                          asml.Remove(dealloc);
+                          dealloc.Free;
+                        end;
+
+                      GetNextInstruction(p,hp1);
+                      asml.Remove(p);
+                      p.free;
+                      p:=hp1;
+                      result:=true;
+                    end
                   { fold
                   { fold
                     mov reg2,reg0
                     mov reg2,reg0
                     mov reg3,reg1
                     mov reg3,reg1
                     to
                     to
                     movw reg2,reg0
                     movw reg2,reg0
                   }
                   }
-                  if (taicpu(p).ops=2) and
+                  else if (CPUAVR_HAS_MOVW in cpu_capabilities[current_settings.cputype]) and
+                     (taicpu(p).ops=2) and
                      (taicpu(p).oper[0]^.typ = top_reg) and
                      (taicpu(p).oper[0]^.typ = top_reg) and
                      (taicpu(p).oper[1]^.typ = top_reg) and
                      (taicpu(p).oper[1]^.typ = top_reg) and
-                     getnextinstruction(p,next1) and
-                     (next1.typ = ait_instruction) and
-                     (taicpu(next1).opcode = A_MOV) and
-                     (taicpu(next1).ops=2) and
-                     (taicpu(next1).oper[0]^.typ = top_reg) and
-                     (taicpu(next1).oper[1]^.typ = top_reg) and
-                     (getsupreg(taicpu(next1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and
+                     getnextinstruction(p,hp1) and
+                     (hp1.typ = ait_instruction) and
+                     (taicpu(hp1).opcode = A_MOV) and
+                     (taicpu(hp1).ops=2) and
+                     (taicpu(hp1).oper[0]^.typ = top_reg) and
+                     (taicpu(hp1).oper[1]^.typ = top_reg) and
+                     (getsupreg(taicpu(hp1).oper[0]^.reg)=getsupreg(taicpu(p).oper[0]^.reg)+1) and
                      ((getsupreg(taicpu(p).oper[0]^.reg) mod 2)=0) and
                      ((getsupreg(taicpu(p).oper[0]^.reg) mod 2)=0) and
                      ((getsupreg(taicpu(p).oper[1]^.reg) mod 2)=0) and
                      ((getsupreg(taicpu(p).oper[1]^.reg) mod 2)=0) and
-                     (getsupreg(taicpu(next1).oper[1]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)+1) then
+                     (getsupreg(taicpu(hp1).oper[1]^.reg)=getsupreg(taicpu(p).oper[1]^.reg)+1) then
                     begin
                     begin
+                      alloc:=FindRegAllocBackward(taicpu(hp1).oper[0]^.reg,tai(hp1.Previous));
+                      if assigned(alloc) then
+                        begin
+                          asml.Remove(alloc);
+                          asml.InsertBefore(alloc,p);
+                        end;
+
                       taicpu(p).opcode:=A_MOVW;
                       taicpu(p).opcode:=A_MOVW;
-                      asml.remove(next1);
-                      next1.free;
+                      asml.remove(hp1);
+                      hp1.free;
                       result := true;
                       result := true;
                     end;
                     end;
                 end;
                 end;

+ 26 - 8
compiler/avr/aoptcpub.pas

@@ -36,7 +36,10 @@ Unit aoptcpub; { Assembler OPTimizer CPU specific Base }
 Interface
 Interface
 
 
 Uses
 Uses
-  cpubase,aasmcpu,AOptBase;
+  cpubase,
+  cgbase,
+  aasmcpu,aasmtai,
+  AOptBase;
 
 
 Type
 Type
 
 
@@ -58,6 +61,7 @@ Type
 { ************************************************************************* }
 { ************************************************************************* }
 
 
   TAoptBaseCpu = class(TAoptBase)
   TAoptBaseCpu = class(TAoptBase)
+    function RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean; override;
   End;
   End;
 
 
 
 
@@ -103,12 +107,26 @@ Implementation
 { ************************************************************************* }
 { ************************************************************************* }
 { **************************** TCondRegs ********************************** }
 { **************************** TCondRegs ********************************** }
 { ************************************************************************* }
 { ************************************************************************* }
-Constructor TCondRegs.init;
-Begin
-End;
-
-Destructor TCondRegs.Done; {$ifdef inl} inline; {$endif inl}
-Begin
-End;
+  Constructor TCondRegs.init;
+    Begin
+    End;
+
+  Destructor TCondRegs.Done; {$ifdef inl} inline; {$endif inl}
+    Begin
+    End;
+
+
+  function TAoptBaseCpu.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;
+    var
+      i : Longint;
+    begin
+      result:=false;
+      for i:=0 to taicpu(p1).ops-1 do
+        if (taicpu(p1).oper[i]^.typ=top_reg) and (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
+          begin
+            result:=true;
+            exit;
+          end;
+    end;
 
 
 End.
 End.

+ 173 - 62
compiler/avr/cgcpu.pas

@@ -50,10 +50,10 @@ unit cgcpu;
         procedure a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);override;
         procedure a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);override;
         procedure a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const paraloc : TCGPara);override;
         procedure a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const paraloc : TCGPara);override;
         procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);override;
         procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);override;
+        procedure a_load_reg_cgpara(list : TAsmList; size : tcgsize;r : tregister; const cgpara : tcgpara);override;
 
 
         procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
         procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
         procedure a_call_reg(list : TAsmList;reg: tregister);override;
         procedure a_call_reg(list : TAsmList;reg: tregister);override;
-        procedure a_call_ref(list : TAsmList;ref: treference);override;
 
 
         procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister); override;
         procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister); override;
         procedure a_op_reg_reg(list: TAsmList; Op: TOpCG; size: TCGSize; src, dst : TRegister); override;
         procedure a_op_reg_reg(list: TAsmList; Op: TOpCG; size: TCGSize; src, dst : TRegister); override;
@@ -205,25 +205,138 @@ unit cgcpu;
       end;
       end;
 
 
 
 
+    procedure tcgavr.a_load_reg_cgpara(list : TAsmList;size : tcgsize;r : tregister;const cgpara : tcgpara);
+
+      procedure load_para_loc(r : TRegister;paraloc : PCGParaLocation);
+        var
+          ref : treference;
+        begin
+          paramanager.allocparaloc(list,paraloc);
+          case paraloc^.loc of
+             LOC_REGISTER,LOC_CREGISTER:
+               a_load_reg_reg(list,paraloc^.size,paraloc^.size,r,paraloc^.register);
+             LOC_REFERENCE,LOC_CREFERENCE:
+               begin
+                  reference_reset_base(ref,paraloc^.reference.index,paraloc^.reference.offset,2);
+                  a_load_reg_ref(list,paraloc^.size,paraloc^.size,r,ref);
+               end;
+             else
+               internalerror(2002071004);
+          end;
+        end;
+
+      var
+        i, i2 : longint;
+        hp : PCGParaLocation;
+
+      begin
+{        if use_push(cgpara) then
+          begin
+            if tcgsize2size[cgpara.Size] > 2 then
+              begin
+                if tcgsize2size[cgpara.Size] <> 4 then
+                  internalerror(2013031101);
+                if cgpara.location^.Next = nil then
+                  begin
+                    if tcgsize2size[cgpara.location^.size] <> 4 then
+                      internalerror(2013031101);
+                  end
+                else
+                  begin
+                    if tcgsize2size[cgpara.location^.size] <> 2 then
+                      internalerror(2013031101);
+                    if tcgsize2size[cgpara.location^.Next^.size] <> 2 then
+                      internalerror(2013031101);
+                    if cgpara.location^.Next^.Next <> nil then
+                      internalerror(2013031101);
+                  end;
+
+                if tcgsize2size[cgpara.size]>cgpara.alignment then
+                  pushsize:=cgpara.size
+                else
+                  pushsize:=int_cgsize(cgpara.alignment);
+                pushsize2 := int_cgsize(tcgsize2size[pushsize] - 2);
+                list.concat(taicpu.op_reg(A_PUSH,TCgsize2opsize[pushsize2],makeregsize(list,GetNextReg(r),pushsize2)));
+                list.concat(taicpu.op_reg(A_PUSH,S_W,makeregsize(list,r,OS_16)));
+              end
+            else
+              begin
+                cgpara.check_simple_location;
+                if tcgsize2size[cgpara.location^.size]>cgpara.alignment then
+                  pushsize:=cgpara.location^.size
+                else
+                  pushsize:=int_cgsize(cgpara.alignment);
+                list.concat(taicpu.op_reg(A_PUSH,TCgsize2opsize[pushsize],makeregsize(list,r,pushsize)));
+              end;
+
+          end
+        else }
+          begin
+            if not(tcgsize2size[cgpara.Size] in [1..4]) then
+              internalerror(2014011101);
+
+            hp:=cgpara.location;
+
+            i:=0;
+            while i<tcgsize2size[cgpara.Size] do
+              begin
+                if not(assigned(hp)) then
+                  internalerror(2014011102);
+
+                inc(i, tcgsize2size[hp^.Size]);
+
+                if hp^.Loc=LOC_REGISTER then
+                  begin
+                    load_para_loc(r,hp);
+                    hp:=hp^.Next;
+                    r:=GetNextReg(r);
+                  end
+                else
+                  begin
+                    load_para_loc(r,hp);
+
+                    for i2:=1 to tcgsize2size[hp^.Size] do
+                      r:=GetNextReg(r);
+
+                    hp:=hp^.Next;
+                  end;
+              end;
+            if assigned(hp) then
+              internalerror(2014011103);
+          end;
+      end;
+
+
     procedure tcgavr.a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);
     procedure tcgavr.a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const paraloc : TCGPara);
       var
       var
-        ref: treference;
+        i : longint;
+        hp : PCGParaLocation;
       begin
       begin
-        paraloc.check_simple_location;
-        paramanager.allocparaloc(list,paraloc.location);
-        case paraloc.location^.loc of
-          LOC_REGISTER,LOC_CREGISTER:
-            a_load_const_reg(list,size,a,paraloc.location^.register);
-          LOC_REFERENCE:
-            begin
-               reference_reset(ref,paraloc.alignment);
-               ref.base:=paraloc.location^.reference.index;
-               ref.offset:=paraloc.location^.reference.offset;
-               a_load_const_ref(list,size,a,ref);
+        if not(tcgsize2size[paraloc.Size] in [1..4]) then
+          internalerror(2014011101);
+
+        hp:=paraloc.location;
+
+        for i:=1 to tcgsize2size[paraloc.Size] do
+          begin
+            if not(assigned(hp)) or
+              (tcgsize2size[hp^.size]<>1) or
+              (hp^.shiftval<>0) then
+              internalerror(2014011105);
+             case hp^.loc of
+               LOC_REGISTER,LOC_CREGISTER:
+                 a_load_const_reg(list,hp^.size,(a shr (i-1)) and $ff,hp^.register);
+               LOC_REFERENCE,LOC_CREFERENCE:
+                 begin
+                   list.concat(taicpu.op_const(A_PUSH,(a shr (i-1)) and $ff));
+                 end;
+               else
+                 internalerror(2002071004);
             end;
             end;
-          else
-            internalerror(2002081101);
-        end;
+            hp:=hp^.Next;
+          end;
+        if assigned(hp) then
+          internalerror(2014011104);
       end;
       end;
 
 
 
 
@@ -271,26 +384,11 @@ unit cgcpu;
 
 
     procedure tcgavr.a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);
     procedure tcgavr.a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const paraloc : TCGPara);
       var
       var
-        ref: treference;
         tmpreg: tregister;
         tmpreg: tregister;
       begin
       begin
-        paraloc.check_simple_location;
-        paramanager.allocparaloc(list,paraloc.location);
-        case paraloc.location^.loc of
-          LOC_REGISTER,LOC_CREGISTER:
-            a_loadaddr_ref_reg(list,r,paraloc.location^.register);
-          LOC_REFERENCE:
-            begin
-              reference_reset(ref,paraloc.alignment);
-              ref.base := paraloc.location^.reference.index;
-              ref.offset := paraloc.location^.reference.offset;
-              tmpreg := getintregister(list,OS_ADDR);
-              a_loadaddr_ref_reg(list,r,tmpreg);
-              a_load_reg_ref(list,OS_ADDR,OS_ADDR,tmpreg,ref);
-            end;
-          else
-            internalerror(2002080701);
-        end;
+        tmpreg:=getaddressregister(list);
+        a_loadaddr_ref_reg(list,r,tmpreg);
+        a_load_reg_cgpara(list,OS_ADDR,tmpreg,paraloc);
       end;
       end;
 
 
 
 
@@ -321,19 +419,6 @@ unit cgcpu;
       end;
       end;
 
 
 
 
-    procedure tcgavr.a_call_ref(list : TAsmList;ref: treference);
-      begin
-        a_reg_alloc(list,NR_ZLO);
-        a_reg_alloc(list,NR_ZHI);
-        a_load_ref_reg(list,OS_ADDR,OS_ADDR,ref,NR_ZLO);
-        list.concat(taicpu.op_none(A_ICALL));
-        a_reg_dealloc(list,NR_ZLO);
-        a_reg_dealloc(list,NR_ZHI);
-
-        include(current_procinfo.flags,pi_do_call);
-      end;
-
-
      procedure tcgavr.a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister);
      procedure tcgavr.a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister);
        begin
        begin
          if not(size in [OS_S8,OS_8,OS_S16,OS_16,OS_S32,OS_32]) then
          if not(size in [OS_S8,OS_8,OS_S16,OS_16,OS_S32,OS_32]) then
@@ -414,7 +499,15 @@ unit cgcpu;
            OP_NEG:
            OP_NEG:
              begin
              begin
                if src<>dst then
                if src<>dst then
-                 a_load_reg_reg(list,size,size,src,dst);
+                 begin
+                   if size in [OS_S64,OS_64] then
+                     begin
+                       a_load_reg_reg(list,OS_32,OS_32,src,dst);
+                       a_load_reg_reg(list,OS_32,OS_32,srchi,dsthi);
+                     end
+                   else
+                     a_load_reg_reg(list,size,size,src,dst);
+                 end;
 
 
                if size in [OS_S16,OS_16,OS_S32,OS_32,OS_S64,OS_64] then
                if size in [OS_S16,OS_16,OS_S32,OS_32,OS_S64,OS_64] then
                  begin
                  begin
@@ -428,7 +521,7 @@ unit cgcpu;
                    tmpreg:=GetNextReg(dst);
                    tmpreg:=GetNextReg(dst);
                    for i:=2 to tcgsize2size[size] do
                    for i:=2 to tcgsize2size[size] do
                      begin
                      begin
-                       list.concat(taicpu.op_reg_const(A_SBCI,dst,-1));
+                       list.concat(taicpu.op_reg_const(A_SBCI,tmpreg,-1));
                        NextTmp;
                        NextTmp;
                    end;
                    end;
                  end;
                  end;
@@ -488,7 +581,7 @@ unit cgcpu;
                current_asmdata.getjumplabel(l2);
                current_asmdata.getjumplabel(l2);
                countreg:=getintregister(list,OS_8);
                countreg:=getintregister(list,OS_8);
                a_load_reg_reg(list,size,OS_8,src,countreg);
                a_load_reg_reg(list,size,OS_8,src,countreg);
-               list.concat(taicpu.op_reg_const(A_CP,countreg,0));
+               list.concat(taicpu.op_reg_const(A_CPI,countreg,0));
                a_jmp_flags(list,F_EQ,l2);
                a_jmp_flags(list,F_EQ,l2);
                cg.a_label(list,l1);
                cg.a_label(list,l1);
                case op of
                case op of
@@ -606,7 +699,7 @@ unit cgcpu;
              end;
              end;
            OP_SUB:
            OP_SUB:
              begin
              begin
-               list.concat(taicpu.op_reg_const(A_SUBI,reg,a));
+               list.concat(taicpu.op_reg_const(A_SUBI,reg,a and mask));
                if size in [OS_S16,OS_16,OS_S32,OS_32,OS_S64,OS_64] then
                if size in [OS_S16,OS_16,OS_S32,OS_32,OS_S64,OS_64] then
                  begin
                  begin
                    for i:=2 to tcgsize2size[size] do
                    for i:=2 to tcgsize2size[size] do
@@ -618,6 +711,20 @@ unit cgcpu;
                      end;
                      end;
                  end;
                  end;
              end;
              end;
+           {OP_ADD:
+             begin
+               list.concat(taicpu.op_reg_const(A_SUBI,reg,(-a) and mask));
+               if size in [OS_S16,OS_16,OS_S32,OS_32,OS_S64,OS_64] then
+                 begin
+                   for i:=2 to tcgsize2size[size] do
+                     begin
+                       NextReg;
+                       mask:=mask shl 8;
+                       inc(shift,8);
+                       list.concat(taicpu.op_reg_const(A_ADC,reg,(a and mask) shr shift));
+                     end;
+                 end;
+             end; }
          else
          else
            begin
            begin
              if size in [OS_64,OS_S64] then
              if size in [OS_64,OS_S64] then
@@ -716,11 +823,11 @@ unit cgcpu;
         else if (ref.base<>NR_NO) and (ref.index<>NR_NO) then
         else if (ref.base<>NR_NO) and (ref.index<>NR_NO) then
           begin
           begin
             maybegetcpuregister(list,tmpreg);
             maybegetcpuregister(list,tmpreg);
-            emit_mov(list,tmpreg,ref.index);
+            emit_mov(list,tmpreg,ref.base);
             maybegetcpuregister(list,GetNextReg(tmpreg));
             maybegetcpuregister(list,GetNextReg(tmpreg));
-            emit_mov(list,GetNextReg(tmpreg),GetNextReg(ref.index));
-            list.concat(taicpu.op_reg_reg(A_ADD,tmpreg,ref.base));
-            list.concat(taicpu.op_reg_reg(A_ADC,GetNextReg(tmpreg),GetNextReg(ref.base)));
+            emit_mov(list,GetNextReg(tmpreg),GetNextReg(ref.base));
+            list.concat(taicpu.op_reg_reg(A_ADD,tmpreg,ref.index));
+            list.concat(taicpu.op_reg_reg(A_ADC,GetNextReg(tmpreg),GetNextReg(ref.index)));
             ref.base:=tmpreg;
             ref.base:=tmpreg;
             ref.index:=NR_NO;
             ref.index:=NR_NO;
           end
           end
@@ -1175,6 +1282,7 @@ unit cgcpu;
       begin
       begin
         if a=0 then
         if a=0 then
           begin
           begin
+            swapped:=false;
             { swap parameters? }
             { swap parameters? }
             case cmp_op of
             case cmp_op of
               OC_GT:
               OC_GT:
@@ -1195,7 +1303,7 @@ unit cgcpu;
               OC_A:
               OC_A:
                 begin
                 begin
                   swapped:=true;
                   swapped:=true;
-                  cmp_op:=OC_A;
+                  cmp_op:=OC_B;
                 end;
                 end;
             end;
             end;
 
 
@@ -1227,6 +1335,7 @@ unit cgcpu;
         tmpreg : tregister;
         tmpreg : tregister;
         i : byte;
         i : byte;
       begin
       begin
+        swapped:=false;
         { swap parameters? }
         { swap parameters? }
         case cmp_op of
         case cmp_op of
           OC_GT:
           OC_GT:
@@ -1247,7 +1356,7 @@ unit cgcpu;
           OC_A:
           OC_A:
             begin
             begin
               swapped:=true;
               swapped:=true;
-              cmp_op:=OC_A;
+              cmp_op:=OC_B;
             end;
             end;
         end;
         end;
         if swapped then
         if swapped then
@@ -1256,13 +1365,13 @@ unit cgcpu;
             reg1:=reg2;
             reg1:=reg2;
             reg2:=tmpreg;
             reg2:=tmpreg;
           end;
           end;
-        list.concat(taicpu.op_reg_reg(A_CP,reg1,reg2));
+        list.concat(taicpu.op_reg_reg(A_CP,reg2,reg1));
 
 
         for i:=2 to tcgsize2size[size] do
         for i:=2 to tcgsize2size[size] do
           begin
           begin
             reg1:=GetNextReg(reg1);
             reg1:=GetNextReg(reg1);
             reg2:=GetNextReg(reg2);
             reg2:=GetNextReg(reg2);
-            list.concat(taicpu.op_reg_reg(A_CPC,reg1,reg2));
+            list.concat(taicpu.op_reg_reg(A_CPC,reg2,reg1));
           end;
           end;
 
 
         a_jmp_cond(list,cmp_op,l);
         a_jmp_cond(list,cmp_op,l);
@@ -1338,7 +1447,7 @@ unit cgcpu;
         case value of
         case value of
           0:
           0:
             ;
             ;
-          -14..-1:
+          {-14..-1:
             begin
             begin
               if ((-value) mod 2)<>0 then
               if ((-value) mod 2)<>0 then
                 list.concat(taicpu.op_reg(A_PUSH,NR_R0));
                 list.concat(taicpu.op_reg(A_PUSH,NR_R0));
@@ -1349,7 +1458,7 @@ unit cgcpu;
             begin
             begin
               for i:=1 to value do
               for i:=1 to value do
                 list.concat(taicpu.op_reg(A_POP,NR_R0));
                 list.concat(taicpu.op_reg(A_POP,NR_R0));
-            end;
+            end;}
           else
           else
             begin
             begin
               list.concat(taicpu.op_reg_const(A_SUBI,NR_R28,lo(word(-value))));
               list.concat(taicpu.op_reg_const(A_SUBI,NR_R28,lo(word(-value))));
@@ -1440,6 +1549,8 @@ unit cgcpu;
                 LocalSize:=current_procinfo.calc_stackframe_size;
                 LocalSize:=current_procinfo.calc_stackframe_size;
                 a_adjust_sp(list,LocalSize);
                 a_adjust_sp(list,LocalSize);
                 regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
                 regs:=rg[R_INTREGISTER].used_in_proc-paramanager.get_volatile_registers_int(pocall_stdcall);
+                if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
+                  regs:=regs+[RS_R28,RS_R29];
 
 
                 for reg:=RS_R0 to RS_R31 do
                 for reg:=RS_R0 to RS_R31 do
                   if reg in regs then
                   if reg in regs then
@@ -1780,7 +1891,7 @@ unit cgcpu;
 
 
     procedure tcgavr.g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);
     procedure tcgavr.g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);
       begin
       begin
-        internalerror(2011021324);
+        //internalerror(2011021324);
       end;
       end;
 
 
 
 

+ 12 - 4
compiler/avr/cpubase.pas

@@ -235,11 +235,11 @@ unit cpubase;
       }
       }
       NR_PIC_OFFSET_REG = NR_R9;
       NR_PIC_OFFSET_REG = NR_R9;
       { Results are returned in this register (32-bit values) }
       { Results are returned in this register (32-bit values) }
-      NR_FUNCTION_RETURN_REG = NR_R0;
-      RS_FUNCTION_RETURN_REG = RS_R0;
+      NR_FUNCTION_RETURN_REG = NR_R24;
+      RS_FUNCTION_RETURN_REG = RS_R24;
       { Low part of 64bit return value }
       { Low part of 64bit return value }
-      NR_FUNCTION_RETURN64_LOW_REG = NR_R0;
-      RS_FUNCTION_RETURN64_LOW_REG = RS_R0;
+      NR_FUNCTION_RETURN64_LOW_REG = NR_R22;
+      RS_FUNCTION_RETURN64_LOW_REG = RS_R22;
       { High part of 64bit return value }
       { High part of 64bit return value }
       NR_FUNCTION_RETURN64_HIGH_REG = NR_R1;
       NR_FUNCTION_RETURN64_HIGH_REG = NR_R1;
       RS_FUNCTION_RETURN64_HIGH_REG = RS_R1;
       RS_FUNCTION_RETURN64_HIGH_REG = RS_R1;
@@ -323,6 +323,8 @@ unit cpubase;
     { returns the register with the offset of ofs of a continuous set of register starting with r and being continued with rhi }
     { returns the register with the offset of ofs of a continuous set of register starting with r and being continued with rhi }
     function GetOffsetReg64(const r,rhi: TRegister;ofs : shortint): TRegister;
     function GetOffsetReg64(const r,rhi: TRegister;ofs : shortint): TRegister;
 
 
+    function is_calljmp(o:tasmop):boolean;{$ifdef USEINLINE}inline;{$endif USEINLINE}
+
   implementation
   implementation
 
 
     uses
     uses
@@ -476,4 +478,10 @@ unit cpubase;
       end;
       end;
 
 
 
 
+    function is_calljmp(o:tasmop):boolean;{$ifdef USEINLINE}inline;{$endif USEINLINE}
+      begin
+        is_calljmp:= o in jmp_instructions;
+      end;
+
+
 end.
 end.

+ 10 - 10
compiler/avr/cpuinfo.pas

@@ -194,16 +194,16 @@ Const
  const
  const
    cpu_capabilities : array[tcputype] of set of tcpuflags =
    cpu_capabilities : array[tcputype] of set of tcpuflags =
      ( { cpu_none } [],
      ( { cpu_none } [],
-       { cpu_avr1 } [],
-       { cpu_avr2 } [],
-       { cpu_avr25 } [],
-       { cpu_avr3 } [],
-       { cpu_avr31 } [],
-       { cpu_avr35 } [],
-       { cpu_avr4 } [],
-       { cpu_avr5 } [],
-       { cpu_avr51 } [],
-       { cpu_avr6 } []
+       { cpu_avr1 } [CPUAVR_2_BYTE_PC],
+       { cpu_avr2 } [CPUAVR_2_BYTE_PC],
+       { cpu_avr25 } [CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_2_BYTE_PC],
+       { cpu_avr3 } [CPUAVR_HAS_JMP_CALL,CPUAVR_2_BYTE_PC],
+       { cpu_avr31 } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_RAMPZ,CPUAVR_HAS_ELPM,CPUAVR_2_BYTE_PC],
+       { cpu_avr35 } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_2_BYTE_PC],
+       { cpu_avr4 } [CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_2_BYTE_PC],
+       { cpu_avr5 } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_2_BYTE_PC],
+       { cpu_avr51 } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_HAS_RAMPZ,CPUAVR_HAS_ELPM,CPUAVR_HAS_ELPMX,CPUAVR_2_BYTE_PC],
+       { cpu_avr6 } [CPUAVR_HAS_JMP_CALL,CPUAVR_HAS_MOVW,CPUAVR_HAS_LPMX,CPUAVR_HAS_MUL,CPUAVR_HAS_RAMPZ,CPUAVR_HAS_ELPM,CPUAVR_HAS_ELPMX,CPUAVR_3_BYTE_PC]
      );
      );
 
 
 Implementation
 Implementation

+ 60 - 45
compiler/avr/cpupara.pas

@@ -38,7 +38,6 @@ unit cpupara;
           function get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;override;
           function get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;override;
           function push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;override;
           function push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;override;
           function ret_in_param(def:tdef;pd:tabstractprocdef):boolean;override;
           function ret_in_param(def:tdef;pd:tabstractprocdef):boolean;override;
-          procedure getintparaloc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
           function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
           function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
           function  get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
           function  get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
@@ -68,45 +67,6 @@ unit cpupara;
       end;
       end;
 
 
 
 
-    procedure tcpuparamanager.getintparaloc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);
-      var
-        paraloc : pcgparalocation;
-        psym: tparavarsym;
-        pdef: tdef;
-      begin
-        if nr<1 then
-          internalerror(2002070801);
-        psym:=tparavarsym(pd.paras[nr-1]);
-        pdef:=psym.vardef;
-        if push_addr_param(psym.varspez,pdef,pd.proccalloption) then
-          pdef:=getpointerdef(pdef);
-        cgpara.reset;
-        cgpara.size:=def_cgsize(pdef);
-        cgpara.intsize:=tcgsize2size[cgpara.size];
-        cgpara.alignment:=std_param_align;
-        cgpara.def:=pdef;
-        paraloc:=cgpara.add_location;
-        with paraloc^ do
-          begin
-            size:=def_cgsize(pdef);
-            def:=pdef;
-            { the four first parameters are passed into registers }
-            if nr<=9 then
-              begin
-                loc:=LOC_REGISTER;
-                register:=newreg(R_INTREGISTER,RS_R25-(nr-1)*2,R_SUBWHOLE);
-              end
-            else
-              begin
-                { the other parameters are passed on the stack }
-                loc:=LOC_REFERENCE;
-                reference.index:=NR_STACK_POINTER_REG;
-                reference.offset:=(nr-10)*2;
-              end;
-          end;
-      end;
-
-
     function getparaloc(calloption : tproccalloption; p : tdef) : tcgloc;
     function getparaloc(calloption : tproccalloption; p : tdef) : tcgloc;
       begin
       begin
          { Later, the LOC_REFERENCE is in most cases changed into LOC_REGISTER
          { Later, the LOC_REFERENCE is in most cases changed into LOC_REGISTER
@@ -181,6 +141,8 @@ unit cpupara;
             result:=not is_smallset(def);
             result:=not is_smallset(def);
           stringdef :
           stringdef :
             result:=tstringdef(def).stringtype in [st_shortstring,st_longstring];
             result:=tstringdef(def).stringtype in [st_shortstring,st_longstring];
+        else
+          result:=def.size>4;
         end;
         end;
       end;
       end;
 
 
@@ -202,7 +164,10 @@ unit cpupara;
             result:=not(def.size in [1,2,4]);
             result:=not(def.size in [1,2,4]);
           }
           }
           else
           else
-            result:=inherited ret_in_param(def,pd);
+            if (def.size > 4) then
+              result:=true
+            else
+              result:=inherited ret_in_param(def,pd);
         end;
         end;
       end;
       end;
 
 
@@ -481,7 +446,57 @@ unit cpupara;
           { Return in register }
           { Return in register }
         else
         else
           begin
           begin
-            if retcgsize in [OS_64,OS_S64] then
+            case retcgsize of
+              OS_32,OS_S32:
+                begin
+                  paraloc^.loc:=LOC_REGISTER;
+                  paraloc^.register:=NR_R22;
+                  paraloc^.size:=OS_8;
+                  paraloc^.def:=u8inttype;
+
+                  paraloc:=result.add_location;
+                  paraloc^.loc:=LOC_REGISTER;
+                  paraloc^.register:=NR_R23;
+                  paraloc^.size:=OS_8;
+                  paraloc^.def:=u8inttype;
+
+                  paraloc:=result.add_location;
+                  paraloc^.loc:=LOC_REGISTER;
+                  paraloc^.register:=NR_R24;
+                  paraloc^.size:=OS_8;
+                  paraloc^.def:=u8inttype;
+
+                  paraloc:=result.add_location;
+                  paraloc^.loc:=LOC_REGISTER;
+                  paraloc^.register:=NR_R25;
+                  paraloc^.size:=OS_8;
+                  paraloc^.def:=u8inttype;
+                end;
+              OS_16,OS_S16:
+                begin
+                  paraloc^.loc:=LOC_REGISTER;
+                  paraloc^.register:=NR_R24;
+                  paraloc^.size:=OS_8;
+                  paraloc^.def:=u8inttype;
+
+                  paraloc:=result.add_location;
+                  paraloc^.loc:=LOC_REGISTER;
+                  paraloc^.register:=NR_R25;
+                  paraloc^.size:=OS_8;
+                  paraloc^.def:=u8inttype;
+                end;
+              OS_8,OS_S8:
+                begin
+                  paraloc^.loc:=LOC_REGISTER;
+                  paraloc^.register:=NR_R24;
+                  paraloc^.size:=OS_8;
+                  paraloc^.def:=u8inttype;
+                end;
+              else
+                internalerror(2014030101);
+            end;
+
+            {if retcgsize in [OS_64,OS_S64] then
               begin
               begin
                 paraloc^.loc:=LOC_REGISTER;
                 paraloc^.loc:=LOC_REGISTER;
                 paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG;
                 paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG;
@@ -497,9 +512,9 @@ unit cpupara;
               begin
               begin
                 paraloc^.loc:=LOC_REGISTER;
                 paraloc^.loc:=LOC_REGISTER;
                 paraloc^.register:=NR_FUNCTION_RETURN_REG;
                 paraloc^.register:=NR_FUNCTION_RETURN_REG;
-                paraloc^.size:=OS_32;
-                paraloc^.def:=u32inttype;
-              end;
+                paraloc^.size:=OS_INT;
+                paraloc^.def:=u16inttype;
+              end;}
           end;
           end;
       end;
       end;
 
 

+ 1 - 1
compiler/avr/cpupi.pas

@@ -63,7 +63,7 @@ unit cpupi;
     function tavrprocinfo.calc_stackframe_size:longint;
     function tavrprocinfo.calc_stackframe_size:longint;
       begin
       begin
         maxpushedparasize:=align(maxpushedparasize,max(current_settings.alignment.localalignmin,4));
         maxpushedparasize:=align(maxpushedparasize,max(current_settings.alignment.localalignmin,4));
-        result:=0;
+        result:=Align(tg.direction*tg.lasttemp,max(current_settings.alignment.localalignmin,4))+maxpushedparasize;
       end;
       end;
 
 
 
 

+ 24 - 10
compiler/avr/raavrgas.pas

@@ -69,14 +69,20 @@ Unit raavrgas;
           name : string[2];
           name : string[2];
           reg : tregister;
           reg : tregister;
         end;
         end;
-{
+
       const
       const
-        extraregs : array[0..19] of treg2str = (
-          (name: 'X'; reg : NR_Z),
-          (name: 'Y'; reg : NR_R1),
-          (name: 'Z'; reg : NR_R2),
+        extraregs : array[0..8] of treg2str = (
+          (name: 'X'; reg : NR_R26),
+          (name: 'XL'; reg : NR_R26),
+          (name: 'XH'; reg : NR_R27),
+          (name: 'Y'; reg : NR_R28),
+          (name: 'YL'; reg : NR_R28),
+          (name: 'YH'; reg : NR_R29),
+          (name: 'Z'; reg : NR_R30),
+          (name: 'ZL'; reg : NR_R30),
+          (name: 'ZH'; reg : NR_R31)
         );
         );
-}
+
       var
       var
         i : longint;
         i : longint;
 
 
@@ -85,9 +91,9 @@ Unit raavrgas;
         { reg found?
         { reg found?
           possible aliases are always 2 char
           possible aliases are always 2 char
         }
         }
-        if result or (length(s)<>2) then
+        if result or (not (length(s) in [1,2])) then
           exit;
           exit;
-{
+
         for i:=low(extraregs) to high(extraregs) do
         for i:=low(extraregs) to high(extraregs) do
           begin
           begin
             if s=extraregs[i].name then
             if s=extraregs[i].name then
@@ -98,7 +104,6 @@ Unit raavrgas;
                 exit;
                 exit;
               end;
               end;
           end;
           end;
-}
       end;
       end;
 
 
 
 
@@ -480,7 +485,16 @@ Unit raavrgas;
               { save the type of register used. }
               { save the type of register used. }
               tempreg:=actasmregister;
               tempreg:=actasmregister;
               Consume(AS_REGISTER);
               Consume(AS_REGISTER);
-              if (actasmtoken in [AS_END,AS_SEPARATOR,AS_COMMA]) then
+              if (actasmtoken=AS_PLUS) then
+                begin
+                  oper.opr.typ:=OPR_REFERENCE;
+
+                  reference_reset_base(oper.opr.ref,tempreg,0,1);
+                  oper.opr.ref.addressmode:=AM_POSTINCREMENT;
+
+                  consume(AS_PLUS);
+                end
+              else if (actasmtoken in [AS_END,AS_SEPARATOR,AS_COMMA]) then
                 Begin
                 Begin
                   if not (oper.opr.typ in [OPR_NONE,OPR_REGISTER]) then
                   if not (oper.opr.typ in [OPR_NONE,OPR_REGISTER]) then
                     Message(asmr_e_invalid_operand_type);
                     Message(asmr_e_invalid_operand_type);

+ 6 - 6
compiler/cclasses.pas

@@ -87,13 +87,13 @@ type
     procedure Put(Index: Integer; Item: Pointer);
     procedure Put(Index: Integer; Item: Pointer);
     procedure SetCapacity(NewCapacity: Integer);
     procedure SetCapacity(NewCapacity: Integer);
     procedure SetCount(NewCount: Integer);
     procedure SetCount(NewCount: Integer);
-    Procedure RaiseIndexError(Index : Integer);
+    Procedure RaiseIndexError(Index : Integer);{$ifndef VER2_6}noreturn;{$endif VER2_6}
   public
   public
     destructor Destroy; override;
     destructor Destroy; override;
     function Add(Item: Pointer): Integer;
     function Add(Item: Pointer): Integer;
     procedure Clear;
     procedure Clear;
     procedure Delete(Index: Integer);
     procedure Delete(Index: Integer);
-    class procedure Error(const Msg: string; Data: PtrInt);
+    class procedure Error(const Msg: string; Data: PtrInt);{$ifndef VER2_6}noreturn;{$endif VER2_6}
     procedure Exchange(Index1, Index2: Integer);
     procedure Exchange(Index1, Index2: Integer);
     function Expand: TFPList;
     function Expand: TFPList;
     function Extract(item: Pointer): Pointer;
     function Extract(item: Pointer): Pointer;
@@ -224,7 +224,7 @@ type
     function HashOfIndex(Index: Integer): LongWord;
     function HashOfIndex(Index: Integer): LongWord;
     function GetNextCollision(Index: Integer): Integer;
     function GetNextCollision(Index: Integer): Integer;
     procedure Delete(Index: Integer);
     procedure Delete(Index: Integer);
-    class procedure Error(const Msg: string; Data: PtrInt);
+    class procedure Error(const Msg: string; Data: PtrInt);{$ifndef VER2_6}noreturn;{$endif VER2_6}
     function Expand: TFPHashList;
     function Expand: TFPHashList;
     function Extract(item: Pointer): Pointer;
     function Extract(item: Pointer): Pointer;
     function IndexOf(Item: Pointer): Integer;
     function IndexOf(Item: Pointer): Integer;
@@ -649,7 +649,7 @@ implementation
                TFPObjectList (Copied from rtl/objpas/classes/lists.inc)
                TFPObjectList (Copied from rtl/objpas/classes/lists.inc)
 *****************************************************************************}
 *****************************************************************************}
 
 
-procedure TFPList.RaiseIndexError(Index : Integer);
+procedure TFPList.RaiseIndexError(Index : Integer);{$ifndef VER2_6}noreturn;{$endif VER2_6}
 begin
 begin
   Error(SListIndexError, Index);
   Error(SListIndexError, Index);
 end;
 end;
@@ -745,7 +745,7 @@ begin
   end;
   end;
 end;
 end;
 
 
-class procedure TFPList.Error(const Msg: string; Data: PtrInt);
+class procedure TFPList.Error(const Msg: string; Data: PtrInt);{$ifndef VER2_6}noreturn;{$endif VER2_6}
 begin
 begin
   Raise EListError.CreateFmt(Msg,[Data]) at get_caller_addr(get_frame), get_caller_frame(get_frame);
   Raise EListError.CreateFmt(Msg,[Data]) at get_caller_addr(get_frame), get_caller_frame(get_frame);
 end;
 end;
@@ -1443,7 +1443,7 @@ begin
     Self.Delete(Result);
     Self.Delete(Result);
 end;
 end;
 
 
-class procedure TFPHashList.Error(const Msg: string; Data: PtrInt);
+class procedure TFPHashList.Error(const Msg: string; Data: PtrInt);{$ifndef VER2_6}noreturn;{$endif VER2_6}
 begin
 begin
   Raise EListError.CreateFmt(Msg,[Data])  at get_caller_addr(get_frame), get_caller_frame(get_frame);
   Raise EListError.CreateFmt(Msg,[Data])  at get_caller_addr(get_frame), get_caller_frame(get_frame);
 end;
 end;

+ 30 - 3
compiler/cfileutl.pas

@@ -764,6 +764,7 @@ end;
   begin
   begin
     oldpos := 1;
     oldpos := 1;
     slashPos := Pos('/', path);
     slashPos := Pos('/', path);
+    TranslatePathToMac:='';
     if (slashPos <> 0) then   {its a unix path}
     if (slashPos <> 0) then   {its a unix path}
       begin
       begin
         if slashPos = 1 then
         if slashPos = 1 then
@@ -1479,6 +1480,7 @@ end;
         inquotes:=false;
         inquotes:=false;
         result:='';
         result:='';
         i:=1;
         i:=1;
+        temp:='';
         while i<=length(QuotedStr) do
         while i<=length(QuotedStr) do
           begin
           begin
             case QuotedStr[i] of
             case QuotedStr[i] of
@@ -1521,6 +1523,10 @@ end;
       var
       var
         quote_script: tscripttype;
         quote_script: tscripttype;
       begin
       begin
+
+        if do_checkverbosity(V_Executable) then
+          do_comment(V_Executable,'Executing "'+Path+'" with command line "'+
+            ComLine+'"');
         if (cs_link_on_target in current_settings.globalswitches) then
         if (cs_link_on_target in current_settings.globalswitches) then
           quote_script:=target_info.script
           quote_script:=target_info.script
         else
         else
@@ -1533,7 +1539,21 @@ end;
 
 
 
 
     function RequotedExecuteProcess(const Path: AnsiString; const ComLine: array of AnsiString; Flags: TExecuteFlags): Longint;
     function RequotedExecuteProcess(const Path: AnsiString; const ComLine: array of AnsiString; Flags: TExecuteFlags): Longint;
+      var
+        i : longint;
+        st : string;
       begin
       begin
+        if do_checkverbosity(V_Executable) then
+          begin
+            if high(ComLine)=0 then
+              st:=''
+            else
+              st:=ComLine[1];
+            for i:=2 to high(ComLine) do
+              st:=st+' '+ComLine[i];
+            do_comment(V_Executable,'Executing "'+Path+'" with command line "'+
+              st+'"');
+          end;
         result:=sysutils.ExecuteProcess(Path,ComLine,Flags);
         result:=sysutils.ExecuteProcess(Path,ComLine,Flags);
       end;
       end;
 
 
@@ -1543,21 +1563,28 @@ end;
         expansion under linux }
         expansion under linux }
 {$ifdef hasunix}
 {$ifdef hasunix}
       begin
       begin
+        if do_checkverbosity(V_Used) then
+          do_comment(V_Executable,'Executing "'+Command+'" with fpSystem call');
         result := Unix.fpsystem(command);
         result := Unix.fpsystem(command);
       end;
       end;
 {$else hasunix}
 {$else hasunix}
-  {$ifdef amigashell}
+  {$ifdef hasamiga}
       begin
       begin
+        if do_checkverbosity(V_Used) then
+          do_comment(V_Executable,'Executing "'+Command+'" using RequotedExecuteProcess');
         result := RequotedExecuteProcess('',command);
         result := RequotedExecuteProcess('',command);
       end;
       end;
-  {$else amigashell}
+  {$else hasamiga}
       var
       var
         comspec : string;
         comspec : string;
       begin
       begin
         comspec:=GetEnvironmentVariable('COMSPEC');
         comspec:=GetEnvironmentVariable('COMSPEC');
+        if do_checkverbosity(V_Used) then
+          do_comment(V_Executable,'Executing "'+Command+'" using comspec "'
+            +ComSpec+'"');
         result := RequotedExecuteProcess(comspec,' /C '+command);
         result := RequotedExecuteProcess(comspec,' /C '+command);
       end;
       end;
-   {$endif amigashell}
+   {$endif hasamiga}
 {$endif hasunix}
 {$endif hasunix}
 
 
 
 

+ 5 - 1
compiler/cg64f32.pas

@@ -861,7 +861,11 @@ unit cg64f32;
                begin
                begin
                  current_asmdata.getjumplabel(neglabel);
                  current_asmdata.getjumplabel(neglabel);
                  cg.a_cmp_const_reg_label(list,OS_32,OC_EQ,-1,hreg,neglabel);
                  cg.a_cmp_const_reg_label(list,OS_32,OC_EQ,-1,hreg,neglabel);
-               end;
+               end
+             else
+               { we do not have dynamic dfa, so avoid a warning below about the unused
+                 neglabel }
+               neglabel:=nil;
              { For all other values we have a range check error }
              { For all other values we have a range check error }
              cg.a_call_name(list,'fpc_rangeerror',false);
              cg.a_call_name(list,'fpc_rangeerror',false);
 
 

+ 48 - 24
compiler/cgobj.pas

@@ -228,7 +228,6 @@ unit cgobj;
           }
           }
           procedure a_call_name(list : TAsmList;const s : string; weak: boolean);virtual; abstract;
           procedure a_call_name(list : TAsmList;const s : string; weak: boolean);virtual; abstract;
           procedure a_call_reg(list : TAsmList;reg : tregister);virtual; abstract;
           procedure a_call_reg(list : TAsmList;reg : tregister);virtual; abstract;
-          procedure a_call_ref(list : TAsmList;ref : treference);virtual;
           { same as a_call_name, might be overridden on certain architectures to emit
           { same as a_call_name, might be overridden on certain architectures to emit
             static calls without usage of a got trampoline }
             static calls without usage of a got trampoline }
           procedure a_call_name_static(list : TAsmList;const s : string);virtual;
           procedure a_call_name_static(list : TAsmList;const s : string);virtual;
@@ -337,11 +336,12 @@ unit cgobj;
              to emit, and the constant value to emit. This function can opcode OP_NONE to
              to emit, and the constant value to emit. This function can opcode OP_NONE to
              remove the opcode and OP_MOVE to replace it with a simple load
              remove the opcode and OP_MOVE to replace it with a simple load
 
 
+             @param(size Size of the operand in constant)
              @param(op The opcode to emit, returns the opcode which must be emitted)
              @param(op The opcode to emit, returns the opcode which must be emitted)
              @param(a  The constant which should be emitted, returns the constant which must
              @param(a  The constant which should be emitted, returns the constant which must
                     be emitted)
                     be emitted)
           }
           }
-          procedure optimize_op_const(var op: topcg; var a : tcgint);virtual;
+          procedure optimize_op_const(size: TCGSize; var op: topcg; var a : tcgint);virtual;
 
 
          {#
          {#
              This routine is used in exception management nodes. It should
              This routine is used in exception management nodes. It should
@@ -765,7 +765,7 @@ implementation
           No IE can be generated, because the VMT is written
           No IE can be generated, because the VMT is written
           without a valid rg[] }
           without a valid rg[] }
         if assigned(rg[rt]) then
         if assigned(rg[rt]) then
-          rg[rt].add_reg_instruction(instr,r,cg.executionweight);
+          rg[rt].add_reg_instruction(instr,r,executionweight);
       end;
       end;
 
 
 
 
@@ -1137,7 +1137,7 @@ implementation
                end;
                end;
              end;
              end;
            LOC_FPUREGISTER :
            LOC_FPUREGISTER :
-             cg.a_loadfpu_reg_ref(list,paraloc.size,paraloc.size,paraloc.register,ref);
+             a_loadfpu_reg_ref(list,paraloc.size,paraloc.size,paraloc.register,ref);
            LOC_REFERENCE :
            LOC_REFERENCE :
              begin
              begin
                reference_reset_base(href,paraloc.reference.index,paraloc.reference.offset,align);
                reference_reset_base(href,paraloc.reference.index,paraloc.reference.offset,align);
@@ -1293,6 +1293,7 @@ implementation
         tmpreg,
         tmpreg,
         tmpreg2 : tregister;
         tmpreg2 : tregister;
         i : longint;
         i : longint;
+        hisize : tcgsize;
       begin
       begin
         if ref.alignment in [1,2] then
         if ref.alignment in [1,2] then
           begin
           begin
@@ -1305,14 +1306,18 @@ implementation
                   a_load_ref_reg(list,fromsize,tosize,tmpref,register)
                   a_load_ref_reg(list,fromsize,tosize,tmpref,register)
                 else
                 else
                   begin
                   begin
+                    if FromSize=OS_16 then
+                      hisize:=OS_8
+                    else
+                      hisize:=OS_S8;
                     { first load in tmpreg, because the target register }
                     { first load in tmpreg, because the target register }
                     { may be used in ref as well                        }
                     { may be used in ref as well                        }
                     if target_info.endian=endian_little then
                     if target_info.endian=endian_little then
                       inc(tmpref.offset);
                       inc(tmpref.offset);
                     tmpreg:=getintregister(list,OS_8);
                     tmpreg:=getintregister(list,OS_8);
-                    a_load_ref_reg(list,OS_8,OS_8,tmpref,tmpreg);
-                    tmpreg:=makeregsize(list,tmpreg,OS_16);
-                    a_op_const_reg(list,OP_SHL,OS_16,8,tmpreg);
+                    a_load_ref_reg(list,hisize,hisize,tmpref,tmpreg);
+                    tmpreg:=makeregsize(list,tmpreg,FromSize);
+                    a_op_const_reg(list,OP_SHL,FromSize,8,tmpreg);
                     if target_info.endian=endian_little then
                     if target_info.endian=endian_little then
                       dec(tmpref.offset)
                       dec(tmpref.offset)
                     else
                     else
@@ -1444,10 +1449,39 @@ implementation
       end;
       end;
 
 
 
 
-    procedure tcg.optimize_op_const(var op: topcg; var a : tcgint);
+    procedure tcg.optimize_op_const(size: TCGSize; var op: topcg; var a : tcgint);
       var
       var
         powerval : longint;
         powerval : longint;
+        signext_a, zeroext_a: tcgint;
       begin
       begin
+        case size of
+          OS_64,OS_S64:
+            begin
+              signext_a:=int64(a);
+              zeroext_a:=int64(a);
+            end;
+          OS_32,OS_S32:
+            begin
+              signext_a:=longint(a);
+              zeroext_a:=dword(a);
+            end;
+          OS_16,OS_S16:
+            begin
+              signext_a:=smallint(a);
+              zeroext_a:=word(a);
+            end;
+          OS_8,OS_S8:
+            begin
+              signext_a:=shortint(a);
+              zeroext_a:=byte(a);
+            end
+          else
+            begin
+              { Should we internalerror() here instead? }
+              signext_a:=a;
+              zeroext_a:=a;
+            end;
+        end;
         case op of
         case op of
           OP_OR :
           OP_OR :
             begin
             begin
@@ -1456,13 +1490,13 @@ implementation
                 op:=OP_NONE
                 op:=OP_NONE
               else
               else
               { or with max returns max }
               { or with max returns max }
-                if a = -1 then
+                if signext_a = -1 then
                   op:=OP_MOVE;
                   op:=OP_MOVE;
             end;
             end;
           OP_AND :
           OP_AND :
             begin
             begin
               { and with max returns same result }
               { and with max returns same result }
-              if (a = -1) then
+              if (signext_a = -1) then
                 op:=OP_NONE
                 op:=OP_NONE
               else
               else
               { and with 0 returns 0 }
               { and with 0 returns 0 }
@@ -1474,7 +1508,7 @@ implementation
               { division by 1 returns result }
               { division by 1 returns result }
               if a = 1 then
               if a = 1 then
                 op:=OP_NONE
                 op:=OP_NONE
-              else if ispowerof2(int64(a), powerval) and not(cs_check_overflow in current_settings.localswitches) then
+              else if ispowerof2(int64(zeroext_a), powerval) and not(cs_check_overflow in current_settings.localswitches) then
                 begin
                 begin
                   a := powerval;
                   a := powerval;
                   op:= OP_SHR;
                   op:= OP_SHR;
@@ -1492,7 +1526,7 @@ implementation
                else
                else
                  if a=0 then
                  if a=0 then
                    op:=OP_MOVE
                    op:=OP_MOVE
-               else if ispowerof2(int64(a), powerval) and not(cs_check_overflow in current_settings.localswitches)  then
+               else if ispowerof2(int64(zeroext_a), powerval) and not(cs_check_overflow in current_settings.localswitches)  then
                  begin
                  begin
                    a := powerval;
                    a := powerval;
                    op:= OP_SHL;
                    op:= OP_SHL;
@@ -2058,7 +2092,7 @@ implementation
            (tcgsize2size[tosize]<>4) then
            (tcgsize2size[tosize]<>4) then
           internalerror(2009112504);
           internalerror(2009112504);
         tg.gettemp(list,8,8,tt_normal,tmpref);
         tg.gettemp(list,8,8,tt_normal,tmpref);
-        cg.a_loadmm_reg_ref(list,fromsize,fromsize,mmreg,tmpref,shuffle);
+        a_loadmm_reg_ref(list,fromsize,fromsize,mmreg,tmpref,shuffle);
         a_load_ref_reg(list,tosize,tosize,tmpref,intreg);
         a_load_ref_reg(list,tosize,tosize,tmpref,intreg);
         tg.ungettemp(list,tmpref);
         tg.ungettemp(list,tmpref);
       end;
       end;
@@ -2334,7 +2368,7 @@ implementation
 
 
     procedure tcg.g_exception_reason_load(list : TAsmList; const href : treference);
     procedure tcg.g_exception_reason_load(list : TAsmList; const href : treference);
       begin
       begin
-        cg.a_reg_alloc(list,NR_FUNCTION_RESULT_REG);
+        a_reg_alloc(list,NR_FUNCTION_RESULT_REG);
         a_load_ref_reg(list, OS_INT, OS_INT, href, NR_FUNCTION_RESULT_REG);
         a_load_ref_reg(list, OS_INT, OS_INT, href, NR_FUNCTION_RESULT_REG);
       end;
       end;
 
 
@@ -2385,16 +2419,6 @@ implementation
       end;
       end;
 
 
 
 
-    procedure tcg.a_call_ref(list : TAsmList;ref: treference);
-      var
-        tempreg : TRegister;
-      begin
-        tempreg := getintregister(list, OS_ADDR);
-        a_load_ref_reg(list,OS_ADDR,OS_ADDR,ref,tempreg);
-        a_call_reg(list,tempreg);
-      end;
-
-
    function tcg.g_indirect_sym_load(list:TAsmList;const symname: string; const flags: tindsymflags): tregister;
    function tcg.g_indirect_sym_load(list:TAsmList;const symname: string; const flags: tindsymflags): tregister;
       var
       var
         l: tasmsymbol;
         l: tasmsymbol;

+ 62 - 2
compiler/cgutils.pas

@@ -52,9 +52,9 @@ unit cgutils;
          offset      : asizeint;
          offset      : asizeint;
          symbol,
          symbol,
          relsymbol   : tasmsymbol;
          relsymbol   : tasmsymbol;
-{$if defined(x86) or defined(m68k)}
+{$if defined(x86)}
          segment,
          segment,
-{$endif defined(x86) or defined(m68k)}
+{$endif defined(x86)}
          base,
          base,
          index       : tregister;
          index       : tregister;
          refaddr     : trefaddr;
          refaddr     : trefaddr;
@@ -174,6 +174,7 @@ unit cgutils;
     procedure location_reset_ref(var l : tlocation;lt:TCGRefLoc;lsize:TCGSize; alignment: longint);
     procedure location_reset_ref(var l : tlocation;lt:TCGRefLoc;lsize:TCGSize; alignment: longint);
     procedure location_copy(var destloc:tlocation; const sourceloc : tlocation);
     procedure location_copy(var destloc:tlocation; const sourceloc : tlocation);
     procedure location_swap(var destloc,sourceloc : tlocation);
     procedure location_swap(var destloc,sourceloc : tlocation);
+    function location_reg2string(const locreg: tlocation): string;
 
 
     { returns r with the given alignment }
     { returns r with the given alignment }
     function setalignment(const r : treference;b : byte) : treference;
     function setalignment(const r : treference;b : byte) : treference;
@@ -271,5 +272,64 @@ uses
       end;
       end;
 
 
 
 
+    function location_reg2string(const locreg: tlocation): string;
+      begin
+        if not (locreg.loc in [LOC_REGISTER,LOC_CREGISTER,
+            LOC_MMXREGISTER,LOC_CMMXREGISTER,
+            LOC_MMREGISTER,LOC_CMMREGISTER,
+            LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
+          internalerror(2013122301);
+
+        if locreg.loc in [LOC_REGISTER,LOC_CREGISTER] then
+          begin
+            case locreg.size of
+{$if defined(cpu64bitalu)}
+              OS_128,OS_S128:
+                result:=std_regname(locreg.registerhi)+':'+std_regname(locreg.register);
+{$elseif defined(cpu32bitalu)}
+              OS_64,OS_S64:
+                result:=std_regname(locreg.registerhi)+':'+std_regname(locreg.register);
+{$elseif defined(cpu16bitalu)}
+              OS_64,OS_S64:
+                if getsupreg(locreg.register)<first_int_imreg then
+                  result:='??:'+std_regname(locreg.registerhi)+':??:'+std_regname(locreg.register)
+                else
+                  result:=std_regname(GetNextReg(locreg.registerhi))+':'+std_regname(locreg.registerhi)+':'+std_regname(GetNextReg(locreg.register))+':'+std_regname(locreg.register);
+              OS_32,OS_S32:
+                if getsupreg(locreg.register)<first_int_imreg then
+                  result:='??:'+std_regname(locreg.register)
+                else
+                  result:=std_regname(GetNextReg(locreg.register))+':'+std_regname(locreg.register);
+{$elseif defined(cpu8bitalu)}
+              OS_64,OS_S64:
+                if getsupreg(locreg.register)<first_int_imreg then
+                  result:='??:??:??:'+std_regname(locreg.registerhi)+':??:??:??:'+std_regname(locreg.register)
+                else
+                  result:=std_regname(GetNextReg(GetNextReg(GetNextReg(locreg.registerhi))))+':'+std_regname(GetNextReg(GetNextReg(locreg.registerhi)))+':'+std_regname(GetNextReg(locreg.registerhi))+':'+std_regname(locreg.registerhi)+':'+std_regname(GetNextReg(GetNextReg(GetNextReg(locreg.register))))+':'+std_regname(GetNextReg(GetNextReg(locreg.register)))+':'+std_regname(GetNextReg(locreg.register))+':'+std_regname(locreg.register);
+              OS_32,OS_S32:
+                if getsupreg(locreg.register)<first_int_imreg then
+                  result:='??:??:??:'+std_regname(locreg.register)
+                else
+                  result:=std_regname(GetNextReg(GetNextReg(GetNextReg(locreg.register))))+':'+std_regname(GetNextReg(GetNextReg(locreg.register)))+':'+std_regname(GetNextReg(locreg.register))+':'+std_regname(locreg.register);
+              OS_16,OS_S16:
+                if getsupreg(locreg.register)<first_int_imreg then
+                  result:='??:'+std_regname(locreg.register)
+                else
+                  result:=std_regname(GetNextReg(locreg.register))+':'+std_regname(locreg.register);
+{$endif}
+              else
+                result:=std_regname(locreg.register);
+            end;
+          end
+        else
+          begin
+            if locreg.registerhi<>NR_NO then
+              result:=std_regname(locreg.registerhi)+':'+std_regname(locreg.register)
+            else
+              result:=std_regname(locreg.register);
+          end;
+      end;
+
+
 end.
 end.
 
 

+ 1 - 4
compiler/crefs.pas

@@ -50,11 +50,8 @@ begin
 end;
 end;
 
 
 Function TRefItem.GetCopy : TLinkedListItem;
 Function TRefItem.GetCopy : TLinkedListItem;
-var
-  NR : TRefItem;
 begin
 begin
-  NR.Create(RefInfo);
-  GetCopy:=NR;
+  Result:=TRefItem.Create(RefInfo);
 end;
 end;
 
 
 procedure TRefLinkedList.WriteToPPU;
 procedure TRefLinkedList.WriteToPPU;

+ 45 - 61
compiler/cresstr.pas

@@ -32,7 +32,7 @@ implementation
 
 
 uses
 uses
    SysUtils,
    SysUtils,
-   cclasses,
+   cclasses,widestr,
    cutils,globtype,globals,systems,
    cutils,globtype,globals,systems,
    symconst,symtype,symdef,symsym,
    symconst,symtype,symdef,symsym,
    verbose,fmodule,ppu,
    verbose,fmodule,ppu,
@@ -64,7 +64,7 @@ uses
         constructor Create;
         constructor Create;
         destructor  Destroy;override;
         destructor  Destroy;override;
         procedure CreateResourceStringData;
         procedure CreateResourceStringData;
-        Procedure WriteResourceFile;
+        procedure WriteRSJFile;
         procedure RegisterResourceStrings;
         procedure RegisterResourceStrings;
       end;
       end;
 
 
@@ -214,89 +214,73 @@ uses
           current_asmdata.asmlists[al_resourcestrings].concat(Tai_const.create_sym(endsymlab));
           current_asmdata.asmlists[al_resourcestrings].concat(Tai_const.create_sym(endsymlab));
       end;
       end;
 
 
-
-    Procedure Tresourcestrings.WriteResourceFile;
-      Type
-        TMode = (quoted,unquoted);
+    procedure Tresourcestrings.WriteRSJFile;
       Var
       Var
-        F : Text;
-        Mode : TMode;
-        R : TResourceStringItem;
-        C : char;
-        Col,i : longint;
-        ResFileName : string;
-
-        Procedure Add(Const S : String);
-        begin
-          Write(F,S);
-          inc(Col,length(s));
-        end;
-
+        F: Text;
+        R: TResourceStringItem;
+        ResFileName: string;
+        I: Integer;
+        C: tcompilerwidechar;
+        W: pcompilerwidestring;
       begin
       begin
-        ResFileName:=ChangeFileExt(current_module.ppufilename,'.rst');
+        ResFileName:=ChangeFileExt(current_module.ppufilename,'.rsj');
         message1 (general_i_writingresourcefile,ExtractFileName(ResFileName));
         message1 (general_i_writingresourcefile,ExtractFileName(ResFileName));
         Assign(F,ResFileName);
         Assign(F,ResFileName);
         {$push}{$i-}
         {$push}{$i-}
         Rewrite(f);
         Rewrite(f);
         {$pop}
         {$pop}
-        If IOresult<>0 then
+        if IOresult<>0 then
           begin
           begin
             message1(general_e_errorwritingresourcefile,ResFileName);
             message1(general_e_errorwritingresourcefile,ResFileName);
             exit;
             exit;
           end;
           end;
+        writeln(f,'{"version":1,"strings":[');
         R:=TResourceStringItem(List.First);
         R:=TResourceStringItem(List.First);
         while assigned(R) do
         while assigned(R) do
           begin
           begin
-            writeln(f);
-            Writeln(f,'# hash value = ',R.Hash);
-            col:=0;
-            Add(R.Name+'=');
-            Mode:=unquoted;
-            For I:=0 to R.Len-1 do
-             begin
-               C:=R.Value[i];
-               If (ord(C)>31) and (Ord(c)<=128) and (c<>'''') then
-                begin
-                  If mode=Quoted then
-                   Add(c)
+            write(f, '{"hash":',R.Hash,',"name":"',R.Name,'","value":"');
+            initwidestring(W);
+            ascii2unicode(R.Value,R.Len,current_settings.sourcecodepage,W);
+            for I := 0 to W^.len - 1 do
+              begin
+                C := W^.Data[I];
+                case C of
+                  Ord('"'), Ord('\'), Ord('/'):
+                    write(f, '\', Chr(C));
+                  8:
+                    write(f, '\b');
+                  9:
+                    write(f, '\t');
+                  10:
+                    write(f, '\n');
+                  13:
+                    write(f, '\r');
+                  12:
+                    write(f, '\f');
                   else
                   else
-                   begin
-                     Add(''''+c);
-                     mode:=quoted
-                   end;
-                end
-               else
-                begin
-                  If Mode=quoted then
-                   begin
-                     Add('''');
-                     mode:=unquoted;
-                   end;
-                  Add('#'+tostr(ord(c)));
-                end;
-               If Col>72 then
-                begin
-                  if mode=quoted then
-                   Write (F,'''');
-                  Writeln(F,'+');
-                  Col:=0;
-                  Mode:=unQuoted;
+                  if (C < 32) or (C > 127) then
+                    write(f,'\u',hexStr(Longint(C), 4))
+                  else
+                    write(f,Chr(C));
                 end;
                 end;
-             end;
-            if mode=quoted then
-             writeln (f,'''');
-            Writeln(f);
+              end;
+            donewidestring(W);
+            write(f,'"}');
             R:=TResourceStringItem(R.Next);
             R:=TResourceStringItem(R.Next);
+            if assigned(R) then
+              writeln(f,',')
+            else
+              writeln(f);
           end;
           end;
+        writeln(f,']}');
         close(f);
         close(f);
       end;
       end;
 
 
-
     procedure Tresourcestrings.ConstSym_Register(p:TObject;arg:pointer);
     procedure Tresourcestrings.ConstSym_Register(p:TObject;arg:pointer);
       begin
       begin
         if (tsym(p).typ=constsym) and
         if (tsym(p).typ=constsym) and
            (tconstsym(p).consttyp=constresourcestring) then
            (tconstsym(p).consttyp=constresourcestring) then
-          List.Concat(tResourceStringItem.Create(TConstsym(p)));
+          List.Concat(TResourceStringItem.Create(TConstsym(p)));
       end;
       end;
 
 
 
 
@@ -318,7 +302,7 @@ uses
           begin
           begin
             current_module.flags:=current_module.flags or uf_has_resourcestrings;
             current_module.flags:=current_module.flags or uf_has_resourcestrings;
             resstrs.CreateResourceStringData;
             resstrs.CreateResourceStringData;
-            resstrs.WriteResourceFile;
+            resstrs.WriteRSJFile;
           end;
           end;
         resstrs.Free;
         resstrs.Free;
       end;
       end;

+ 9 - 1
compiler/cstreams.pas

@@ -450,6 +450,11 @@ begin
           l:=0;
           l:=0;
          System.Seek(FHandle,l);
          System.Seek(FHandle,l);
        end;
        end;
+     else
+       begin
+         CStreamError:=103;
+         l:=Offset;
+       end;
    end;
    end;
   {$pop}
   {$pop}
   CStreamError:=IOResult;
   CStreamError:=IOResult;
@@ -549,7 +554,10 @@ begin
     Result:=FMemory
     Result:=FMemory
   else
   else
     If NewCapacity=0 then
     If NewCapacity=0 then
-      FreeMem (FMemory,Fcapacity)
+      begin
+        FreeMem (FMemory,Fcapacity);
+        Result:=nil;
+      end
     else
     else
       begin
       begin
       GetMem (Result,NewCapacity);
       GetMem (Result,NewCapacity);

+ 5 - 1
compiler/dbgdwarf.pas

@@ -1806,7 +1806,7 @@ implementation
           append_labelentry_ref(DW_AT_type,def_dwarf_lab(cansichartype));
           append_labelentry_ref(DW_AT_type,def_dwarf_lab(cansichartype));
           finish_entry;
           finish_entry;
           append_entry(DW_TAG_subrange_type,false,[
           append_entry(DW_TAG_subrange_type,false,[
-            DW_AT_lower_bound,DW_FORM_udata,0,
+            DW_AT_lower_bound,DW_FORM_udata,1,
             DW_AT_upper_bound,DW_FORM_udata,qword(slen)
             DW_AT_upper_bound,DW_FORM_udata,qword(slen)
             ]);
             ]);
           append_labelentry_ref(DW_AT_type,def_dwarf_lab(lendef));
           append_labelentry_ref(DW_AT_type,def_dwarf_lab(lendef));
@@ -2330,6 +2330,8 @@ implementation
         has_high_reg : boolean;
         has_high_reg : boolean;
         dreg,dreghigh : byte;
         dreg,dreghigh : byte;
       begin
       begin
+        blocksize:=0;
+        dreghigh:=0;
         { external symbols can't be resolved at link time, so we
         { external symbols can't be resolved at link time, so we
           can't generate stabs for them
           can't generate stabs for them
 
 
@@ -2886,6 +2888,8 @@ implementation
               templist.free;
               templist.free;
               exit;
               exit;
             end;
             end;
+          else
+            internalerror(2013120111);
         end;
         end;
 
 
         append_entry(DW_TAG_variable,false,[
         append_entry(DW_TAG_variable,false,[

+ 1 - 0
compiler/dbgstabs.pas

@@ -241,6 +241,7 @@ implementation
       len:=0;
       len:=0;
       varcounter:=0;
       varcounter:=0;
       varptr:=@varvaluedata[0];
       varptr:=@varvaluedata[0];
+      varvalues[0]:=nil;
       while i<=length(s) do
       while i<=length(s) do
         begin
         begin
           if (s[i]='$') and (i<length(s)) then
           if (s[i]='$') and (i<length(s)) then

+ 1 - 0
compiler/dbgstabx.pas

@@ -274,6 +274,7 @@ implementation
       if vo_is_external in sym.varoptions then
       if vo_is_external in sym.varoptions then
         exit;
         exit;
       ismem:=not(sym.localloc.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_MMREGISTER,LOC_CMMREGISTER,LOC_FPUREGISTER,LOC_CFPUREGISTER]);
       ismem:=not(sym.localloc.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_MMREGISTER,LOC_CMMREGISTER,LOC_FPUREGISTER,LOC_CFPUREGISTER]);
+      isglobal:=false;
       if ismem then
       if ismem then
         isglobal:=current_asmdata.RefAsmSymbol(sym.mangledname).bind=AB_GLOBAL;
         isglobal:=current_asmdata.RefAsmSymbol(sym.mangledname).bind=AB_GLOBAL;
 
 

+ 1 - 0
compiler/dirparse.pas

@@ -255,6 +255,7 @@ implementation
         opt   : ttargetswitch;
         opt   : ttargetswitch;
       begin
       begin
         result:=true;
         result:=true;
+        value:='';
         repeat
         repeat
           tok:=GetToken(s,',');
           tok:=GetToken(s,',');
           if tok='' then
           if tok='' then

+ 3 - 0
compiler/fpcdefs.inc

@@ -74,6 +74,7 @@
   {$define cpurox}
   {$define cpurox}
   {$define SUPPORT_SAFECALL}
   {$define SUPPORT_SAFECALL}
   {$define SUPPORT_GET_FRAME}
   {$define SUPPORT_GET_FRAME}
+  {$define cpucapabilities}
 {$endif i386}
 {$endif i386}
 
 
 {$ifdef x86_64}
 {$ifdef x86_64}
@@ -89,6 +90,7 @@
   {$define cpurefshaveindexreg}
   {$define cpurefshaveindexreg}
   {$define SUPPORT_SAFECALL}
   {$define SUPPORT_SAFECALL}
   {$define SUPPORT_GET_FRAME}
   {$define SUPPORT_GET_FRAME}
+  {$define cpucapabilities}
 {$endif x86_64}
 {$endif x86_64}
 
 
 {$ifdef ia64}
 {$ifdef ia64}
@@ -222,6 +224,7 @@
   {$define cpurefshaveindexreg}
   {$define cpurefshaveindexreg}
   {$define fpc_compiler_has_fixup_jmps}
   {$define fpc_compiler_has_fixup_jmps}
   {$define SUPPORT_GET_FRAME}
   {$define SUPPORT_GET_FRAME}
+  {$define SUPPORT_SAFECALL}
 {$endif mips}
 {$endif mips}
 
 
 {$ifdef jvm}
 {$ifdef jvm}

+ 14 - 8
compiler/globals.pas

@@ -228,6 +228,9 @@ interface
        wpofeedbackinput,
        wpofeedbackinput,
        wpofeedbackoutput : TPathStr;
        wpofeedbackoutput : TPathStr;
 
 
+       { external assembler extra option }
+       asmextraopt       : string;
+
        { things specified with parameters }
        { things specified with parameters }
        paratarget        : tsystem;
        paratarget        : tsystem;
        paratargetdbg     : tdbg;
        paratargetdbg     : tdbg;
@@ -253,8 +256,11 @@ interface
        do_build,
        do_build,
        do_release,
        do_release,
        do_make       : boolean;
        do_make       : boolean;
+       { Path to ppc }
+       exepath       : TPathStr;
+       { Path to unicode charmap/collation binaries }
+       unicodepath   : TPathStr;
        { path for searching units, different paths can be seperated by ; }
        { path for searching units, different paths can be seperated by ; }
-       exepath            : TPathStr;  { Path to ppc }
        librarysearchpath,
        librarysearchpath,
        unitsearchpath,
        unitsearchpath,
        objectsearchpath,
        objectsearchpath,
@@ -324,7 +330,7 @@ interface
      { parameter switches }
      { parameter switches }
        debugstop : boolean;
        debugstop : boolean;
 {$EndIf EXTDEBUG}
 {$EndIf EXTDEBUG}
-       { Application type (platform specific) 
+       { Application type (platform specific)
          see globtype.pas for description }
          see globtype.pas for description }
        apptype : tapptype;
        apptype : tapptype;
 
 
@@ -560,11 +566,7 @@ implementation
       macutils,
       macutils,
 {$endif}
 {$endif}
 {$ifdef mswindows}
 {$ifdef mswindows}
-{$ifdef VER2_4}
-      cwindirs,
-{$else VER2_4}
       windirs,
       windirs,
-{$endif VER2_4}
 {$endif}
 {$endif}
       comphook;
       comphook;
 
 
@@ -916,7 +918,7 @@ implementation
         {$undef GETENVOK}
         {$undef GETENVOK}
       {$else}
       {$else}
         GetEnvPchar:=StrPNew(GetEnvironmentVariable(envname));
         GetEnvPchar:=StrPNew(GetEnvironmentVariable(envname));
-        if (length(GetEnvPChar)=0) then 
+        if (length(GetEnvPChar)=0) then
           begin
           begin
             FreeEnvPChar(GetEnvPChar);
             FreeEnvPChar(GetEnvPChar);
             GetEnvPChar:=nil;
             GetEnvPChar:=nil;
@@ -1107,7 +1109,9 @@ implementation
              (abiinfo[t].name=hs) then
              (abiinfo[t].name=hs) then
             begin
             begin
               a:=t;
               a:=t;
-              result:=true;
+              { abi_old_win32_gnu is a win32 i386 specific "feature" }
+              if (t<>abi_old_win32_gnu) or (target_info.system=system_i386_win32) then
+                result:=true;
               break;
               break;
             end;
             end;
       end;
       end;
@@ -1299,6 +1303,7 @@ implementation
 {$endif need_path_search}
 {$endif need_path_search}
      begin
      begin
        localexepath:=GetEnvironmentVariable('PPC_EXEC_PATH');
        localexepath:=GetEnvironmentVariable('PPC_EXEC_PATH');
+       exeName := '';
        if localexepath='' then
        if localexepath='' then
          begin
          begin
            exeName := FixFileName(system.paramstr(0));
            exeName := FixFileName(system.paramstr(0));
@@ -1364,6 +1369,7 @@ implementation
         sysrootpath:='';
         sysrootpath:='';
 
 
         { Search Paths }
         { Search Paths }
+        unicodepath:='';
         librarysearchpath:=TSearchPathList.Create;
         librarysearchpath:=TSearchPathList.Create;
         unitsearchpath:=TSearchPathList.Create;
         unitsearchpath:=TSearchPathList.Create;
         includesearchpath:=TSearchPathList.Create;
         includesearchpath:=TSearchPathList.Create;

+ 9 - 6
compiler/globtype.pas

@@ -263,7 +263,7 @@ interface
          cs_opt_level1,cs_opt_level2,cs_opt_level3,
          cs_opt_level1,cs_opt_level2,cs_opt_level3,
          cs_opt_regvar,cs_opt_uncertain,cs_opt_size,cs_opt_stackframe,
          cs_opt_regvar,cs_opt_uncertain,cs_opt_size,cs_opt_stackframe,
          cs_opt_peephole,cs_opt_asmcse,cs_opt_loopunroll,cs_opt_tailrecursion,cs_opt_nodecse,
          cs_opt_peephole,cs_opt_asmcse,cs_opt_loopunroll,cs_opt_tailrecursion,cs_opt_nodecse,
-         cs_opt_nodedfa,cs_opt_loopstrength,cs_opt_scheduler,cs_opt_autoinline,cs_useebp,
+         cs_opt_nodedfa,cs_opt_loopstrength,cs_opt_scheduler,cs_opt_autoinline,cs_useebp,cs_userbp,
          cs_opt_reorder_fields,cs_opt_fastmath,
          cs_opt_reorder_fields,cs_opt_fastmath,
          { Allow removing expressions whose result is not used, even when this
          { Allow removing expressions whose result is not used, even when this
            can change program behaviour (range check errors disappear,
            can change program behaviour (range check errors disappear,
@@ -308,7 +308,7 @@ interface
          'LEVEL1','LEVEL2','LEVEL3',
          'LEVEL1','LEVEL2','LEVEL3',
          'REGVAR','UNCERTAIN','SIZE','STACKFRAME',
          'REGVAR','UNCERTAIN','SIZE','STACKFRAME',
          'PEEPHOLE','ASMCSE','LOOPUNROLL','TAILREC','CSE',
          'PEEPHOLE','ASMCSE','LOOPUNROLL','TAILREC','CSE',
-         'DFA','STRENGTH','SCHEDULE','AUTOINLINE','USEEBP',
+         'DFA','STRENGTH','SCHEDULE','AUTOINLINE','USEEBP','USERBP',
          'ORDERFIELDS','FASTMATH','DEADVALUES','REMOVEEMPTYPROCS',
          'ORDERFIELDS','FASTMATH','DEADVALUES','REMOVEEMPTYPROCS',
          'CONSTPROP',
          'CONSTPROP',
          'DEADSTORE'
          'DEADSTORE'
@@ -336,7 +336,7 @@ interface
        { switches being applied to all CPUs at the given level }
        { switches being applied to all CPUs at the given level }
        genericlevel1optimizerswitches = [cs_opt_level1];
        genericlevel1optimizerswitches = [cs_opt_level1];
        genericlevel2optimizerswitches = [cs_opt_level2,cs_opt_remove_emtpy_proc];
        genericlevel2optimizerswitches = [cs_opt_level2,cs_opt_remove_emtpy_proc];
-       genericlevel3optimizerswitches = [cs_opt_level3,cs_opt_constant_propagate];
+       genericlevel3optimizerswitches = [cs_opt_level3,cs_opt_constant_propagate,cs_opt_nodedfa];
        genericlevel4optimizerswitches = [cs_opt_reorder_fields,cs_opt_dead_values,cs_opt_fastmath];
        genericlevel4optimizerswitches = [cs_opt_reorder_fields,cs_opt_dead_values,cs_opt_fastmath];
 
 
        { whole program optimizations whose information generation requires
        { whole program optimizations whose information generation requires
@@ -389,8 +389,10 @@ interface
          m_final_fields,        { allows declaring fields as "final", which means they must be initialised
          m_final_fields,        { allows declaring fields as "final", which means they must be initialised
                                   in the (class) constructor and are constant from then on (same as final
                                   in the (class) constructor and are constant from then on (same as final
                                   fields in Java) }
                                   fields in Java) }
-         m_default_unicodestring { makes the default string type in $h+ mode unicodestring rather than
-                                   ansistring; similarly, char becomes unicodechar rather than ansichar }
+         m_default_unicodestring, { makes the default string type in $h+ mode unicodestring rather than
+                                    ansistring; similarly, char becomes unicodechar rather than ansichar }
+         m_type_helpers         { allows the declaration of "type helper" (non-Delphi) or "record helper"
+                                  (Delphi) for primitive types }
        );
        );
        tmodeswitches = set of tmodeswitch;
        tmodeswitches = set of tmodeswitch;
 
 
@@ -554,7 +556,8 @@ interface
          'ISOUNARYMINUS',
          'ISOUNARYMINUS',
          'SYSTEMCODEPAGE',
          'SYSTEMCODEPAGE',
          'FINALFIELDS',
          'FINALFIELDS',
-         'UNICODESTRINGS');
+         'UNICODESTRINGS',
+         'TYPEHELPERS');
 
 
 
 
      type
      type

+ 15 - 10
compiler/hlcg2ll.pas

@@ -151,7 +151,6 @@ unit hlcg2ll;
 
 
           function a_call_name(list : TAsmList;pd : tprocdef;const s : TSymStr; forceresdef: tdef; weak: boolean): tcgpara;override;
           function a_call_name(list : TAsmList;pd : tprocdef;const s : TSymStr; forceresdef: tdef; weak: boolean): tcgpara;override;
           procedure a_call_reg(list : TAsmList;pd : tabstractprocdef;reg : tregister);override;
           procedure a_call_reg(list : TAsmList;pd : tabstractprocdef;reg : tregister);override;
-          procedure a_call_ref(list : TAsmList;pd : tabstractprocdef;const ref : treference);override;
           { same as a_call_name, might be overridden on certain architectures to emit
           { same as a_call_name, might be overridden on certain architectures to emit
             static calls without usage of a got trampoline }
             static calls without usage of a got trampoline }
           function a_call_name_static(list : TAsmList;pd : tprocdef;const s : TSymStr; forceresdef: tdef): tcgpara;override;
           function a_call_name_static(list : TAsmList;pd : tprocdef;const s : TSymStr; forceresdef: tdef): tcgpara;override;
@@ -462,11 +461,6 @@ implementation
       cg.a_call_reg(list,reg);
       cg.a_call_reg(list,reg);
     end;
     end;
 
 
-  procedure thlcg2ll.a_call_ref(list: TAsmList; pd: tabstractprocdef; const ref: treference);
-    begin
-      cg.a_call_ref(list,ref);
-    end;
-
   function thlcg2ll.a_call_name_static(list: TAsmList; pd: tprocdef; const s: TSymStr; forceresdef: tdef): tcgpara;
   function thlcg2ll.a_call_name_static(list: TAsmList; pd: tprocdef; const s: TSymStr; forceresdef: tdef): tcgpara;
     begin
     begin
       cg.a_call_name_static(list,s);
       cg.a_call_name_static(list,s);
@@ -1021,7 +1015,7 @@ implementation
       hl: tasmlabel;
       hl: tasmlabel;
       oldloc : tlocation;
       oldloc : tlocation;
       const_location: boolean;
       const_location: boolean;
-      dst_cgsize: tcgsize;
+      dst_cgsize,tmpsize: tcgsize;
     begin
     begin
       oldloc:=l;
       oldloc:=l;
       dst_cgsize:=def_cgsize(dst_size);
       dst_cgsize:=def_cgsize(dst_size);
@@ -1051,7 +1045,7 @@ implementation
 {$ifdef cpuflags}
 {$ifdef cpuflags}
               LOC_FLAGS :
               LOC_FLAGS :
                 begin
                 begin
-                  cg.g_flags2reg(list,OS_INT,l.resflags,hregister);
+                  cg.g_flags2reg(list,OS_32,l.resflags,hregister);
                   cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
                   cg.a_reg_dealloc(list,NR_DEFAULTFLAGS);
                 end;
                 end;
 {$endif cpuflags}
 {$endif cpuflags}
@@ -1064,6 +1058,9 @@ implementation
                   cg.a_label(list,current_procinfo.CurrFalseLabel);
                   cg.a_label(list,current_procinfo.CurrFalseLabel);
                   cg.a_load_const_reg(list,OS_INT,0,hregister);
                   cg.a_load_const_reg(list,OS_INT,0,hregister);
                   cg.a_label(list,hl);
                   cg.a_label(list,hl);
+{$if defined(cpu8bitalu) or defined(cpu16bitalu)}
+                  cg.a_load_reg_reg(list,OS_INT,OS_32,hregister,hregister);
+{$endif}
                 end;
                 end;
               else
               else
                 a_load_loc_reg(list,src_size,u32inttype,l,hregister);
                 a_load_loc_reg(list,src_size,u32inttype,l,hregister);
@@ -1149,13 +1146,21 @@ implementation
 {$endif cpuflags}
 {$endif cpuflags}
             LOC_JUMP :
             LOC_JUMP :
               begin
               begin
+                tmpsize:=dst_cgsize;
+{$if defined(cpu8bitalu) or defined(cpu16bitalu)}
+                if TCGSize2Size[dst_cgsize]>TCGSize2Size[OS_INT] then
+                  tmpsize:=OS_INT;
+{$endif}
                 cg.a_label(list,current_procinfo.CurrTrueLabel);
                 cg.a_label(list,current_procinfo.CurrTrueLabel);
-                cg.a_load_const_reg(list,dst_cgsize,1,hregister);
+                cg.a_load_const_reg(list,tmpsize,1,hregister);
                 current_asmdata.getjumplabel(hl);
                 current_asmdata.getjumplabel(hl);
                 cg.a_jmp_always(list,hl);
                 cg.a_jmp_always(list,hl);
                 cg.a_label(list,current_procinfo.CurrFalseLabel);
                 cg.a_label(list,current_procinfo.CurrFalseLabel);
-                cg.a_load_const_reg(list,dst_cgsize,0,hregister);
+                cg.a_load_const_reg(list,tmpsize,0,hregister);
                 cg.a_label(list,hl);
                 cg.a_label(list,hl);
+{$if defined(cpu8bitalu) or defined(cpu16bitalu)}
+                cg.a_load_reg_reg(list,tmpsize,dst_cgsize,hregister,hregister);
+{$endif}
               end;
               end;
             else
             else
               begin
               begin

+ 16 - 29
compiler/hlcgobj.pas

@@ -195,7 +195,6 @@ unit hlcgobj;
           }
           }
           function a_call_name(list : TAsmList;pd : tprocdef;const s : TSymStr; forceresdef: tdef; weak: boolean): tcgpara;virtual;abstract;
           function a_call_name(list : TAsmList;pd : tprocdef;const s : TSymStr; forceresdef: tdef; weak: boolean): tcgpara;virtual;abstract;
           procedure a_call_reg(list : TAsmList;pd : tabstractprocdef;reg : tregister);virtual;abstract;
           procedure a_call_reg(list : TAsmList;pd : tabstractprocdef;reg : tregister);virtual;abstract;
-          procedure a_call_ref(list : TAsmList;pd : tabstractprocdef;const ref : treference);virtual;
           { same as a_call_name, might be overridden on certain architectures to emit
           { same as a_call_name, might be overridden on certain architectures to emit
             static calls without usage of a got trampoline }
             static calls without usage of a got trampoline }
           function a_call_name_static(list : TAsmList;pd : tprocdef;const s : TSymStr; forceresdef: tdef): tcgpara;virtual;
           function a_call_name_static(list : TAsmList;pd : tprocdef;const s : TSymStr; forceresdef: tdef): tcgpara;virtual;
@@ -987,22 +986,6 @@ implementation
          end;
          end;
     end;
     end;
 
 
-  procedure thlcgobj.a_call_ref(list: TAsmList; pd: tabstractprocdef; const ref: treference);
-    var
-      reg: tregister;
-      size: tdef;
-    begin
-      { the loaded data is always a pointer to a procdef. A procvardef is
-        implicitly a pointer already, but a procdef isn't -> create one }
-      if pd.typ=procvardef then
-        size:=pd
-      else
-        size:=getpointerdef(pd);
-      reg:=getaddressregister(list,size);
-      a_load_ref_reg(list,size,size,ref,reg);
-      a_call_reg(list,pd,reg);
-    end;
-
   function thlcgobj.a_call_name_static(list: TAsmList; pd: tprocdef; const s: TSymStr; forceresdef: tdef): tcgpara;
   function thlcgobj.a_call_name_static(list: TAsmList; pd: tprocdef; const s: TSymStr; forceresdef: tdef): tcgpara;
     begin
     begin
       result:=a_call_name(list,pd,s,forceresdef,false);
       result:=a_call_name(list,pd,s,forceresdef,false);
@@ -1562,6 +1545,8 @@ implementation
                   tmpreg:=getintregister(list,locsize);
                   tmpreg:=getintregister(list,locsize);
                   a_load_const_reg(list,locsize,loc.value,tmpreg);
                   a_load_const_reg(list,locsize,loc.value,tmpreg);
                 end;
                 end;
+              else
+                internalerror(2013112909);
             end;
             end;
             a_bit_test_reg_reg_reg(list,bitnumbersize,locsize,destsize,bitnumber,tmpreg,destreg);
             a_bit_test_reg_reg_reg(list,bitnumbersize,locsize,destsize,bitnumber,tmpreg,destreg);
           end;
           end;
@@ -2229,7 +2214,9 @@ implementation
       tmpreg: tregister;
       tmpreg: tregister;
       subsetregdef: torddef;
       subsetregdef: torddef;
       stopbit: byte;
       stopbit: byte;
+
     begin
     begin
+      tmpreg:=NR_NO;
       subsetregdef:=cgsize_orddef(sreg.subsetregsize);
       subsetregdef:=cgsize_orddef(sreg.subsetregsize);
       stopbit:=sreg.startbit+sreg.bitlen;
       stopbit:=sreg.startbit+sreg.bitlen;
       // on x86(64), 1 shl 32(64) = 1 instead of 0
       // on x86(64), 1 shl 32(64) = 1 instead of 0
@@ -2245,7 +2232,10 @@ implementation
            if (slopt<>SL_REGNOSRCMASK) then
            if (slopt<>SL_REGNOSRCMASK) then
             a_op_const_reg(list,OP_AND,subsetregdef,tcgint(not(bitmask)),tmpreg);
             a_op_const_reg(list,OP_AND,subsetregdef,tcgint(not(bitmask)),tmpreg);
         end;
         end;
-      if (slopt<>SL_SETMAX) then
+      if (slopt<>SL_SETMAX) and
+      { the "and" is not needed if the whole register is modified (except for SL_SETZERO),
+        because later on we do a move in this case instead of an or }
+        ((sreg.bitlen<>AIntBits) or (slopt=SL_SETZERO)) then
         a_op_const_reg(list,OP_AND,subsetregdef,tcgint(bitmask),sreg.subsetreg);
         a_op_const_reg(list,OP_AND,subsetregdef,tcgint(bitmask),sreg.subsetreg);
 
 
       case slopt of
       case slopt of
@@ -2257,8 +2247,11 @@ implementation
               sreg.subsetreg)
               sreg.subsetreg)
           else
           else
             a_load_const_reg(list,subsetregdef,-1,sreg.subsetreg);
             a_load_const_reg(list,subsetregdef,-1,sreg.subsetreg);
+        { if the whole register is modified, no "or" is needed }
+        else if sreg.bitlen=AIntBits then
+          a_load_reg_reg(list,subsetregdef,subsetregdef,tmpreg,sreg.subsetreg)
         else
         else
-          a_op_reg_reg(list,OP_OR,subsetregdef,tmpreg,sreg.subsetreg);
+          a_op_reg_reg(list,OP_OR,subsetregdef,tmpreg,sreg.subsetreg)
        end;
        end;
     end;
     end;
 
 
@@ -3565,6 +3558,8 @@ implementation
           toreg:=getaddressregister(list,regsize);
           toreg:=getaddressregister(list,regsize);
         R_FPUREGISTER:
         R_FPUREGISTER:
           toreg:=getfpuregister(list,regsize);
           toreg:=getfpuregister(list,regsize);
+        else
+          internalerror(2013112910);
       end;
       end;
       a_load_reg_reg(list,regsize,regsize,fromreg,toreg);
       a_load_reg_reg(list,regsize,regsize,fromreg,toreg);
     end;
     end;
@@ -3580,6 +3575,8 @@ implementation
             toreg:=getaddressregister(list,regsize);
             toreg:=getaddressregister(list,regsize);
           R_FPUREGISTER:
           R_FPUREGISTER:
             toreg:=getfpuregister(list,regsize);
             toreg:=getfpuregister(list,regsize);
+        else
+          internalerror(2013112915);
         end;
         end;
         a_load_reg_reg(list,regsize,regsize,fromreg,toreg);
         a_load_reg_reg(list,regsize,regsize,fromreg,toreg);
       end;
       end;
@@ -3878,12 +3875,6 @@ implementation
     end;
     end;
 
 
 
 
-  function use_ent : boolean;
-    begin
-	  use_ent := (target_info.system in [system_mipsel_linux,system_mipseb_linux])
-	             or (target_info.cpu=cpu_alpha);
-    end;
-
   procedure thlcgobj.gen_proc_symbol(list: TAsmList);
   procedure thlcgobj.gen_proc_symbol(list: TAsmList);
     var
     var
       item,
       item,
@@ -3917,15 +3908,11 @@ implementation
           previtem:=item;
           previtem:=item;
           item := TCmdStrListItem(item.next);
           item := TCmdStrListItem(item.next);
         end;
         end;
-      if (use_ent) then
-        list.concat(Tai_directive.create(asd_ent,current_procinfo.procdef.mangledname));
       current_procinfo.procdef.procstarttai:=tai(list.last);
       current_procinfo.procdef.procstarttai:=tai(list.last);
     end;
     end;
 
 
   procedure thlcgobj.gen_proc_symbol_end(list: TAsmList);
   procedure thlcgobj.gen_proc_symbol_end(list: TAsmList);
     begin
     begin
-      if (use_ent) then
-        list.concat(Tai_directive.create(asd_ent_end,current_procinfo.procdef.mangledname));
       list.concat(Tai_symbol_end.Createname(current_procinfo.procdef.mangledname));
       list.concat(Tai_symbol_end.Createname(current_procinfo.procdef.mangledname));
 
 
       current_procinfo.procdef.procendtai:=tai(list.last);
       current_procinfo.procdef.procendtai:=tai(list.last);

+ 9 - 2
compiler/htypechk.pas

@@ -870,6 +870,8 @@ implementation
       begin
       begin
         isbinaryoverloaded:=false;
         isbinaryoverloaded:=false;
         operpd:=nil;
         operpd:=nil;
+        ppn:=nil;
+
         { load easier access variables }
         { load easier access variables }
         ld:=tbinarynode(t).left.resultdef;
         ld:=tbinarynode(t).left.resultdef;
         rd:=tbinarynode(t).right.resultdef;
         rd:=tbinarynode(t).right.resultdef;
@@ -972,6 +974,8 @@ implementation
     { marks an lvalue as "unregable" }
     { marks an lvalue as "unregable" }
     procedure make_not_regable_intern(p : tnode; how: tregableinfoflags; records_only: boolean);
     procedure make_not_regable_intern(p : tnode; how: tregableinfoflags; records_only: boolean);
       begin
       begin
+        if ra_addr_taken in how then
+          include(p.flags,nf_address_taken);
         repeat
         repeat
           case p.nodetype of
           case p.nodetype of
             subscriptn:
             subscriptn:
@@ -1153,6 +1157,9 @@ implementation
              vecn:
              vecn:
                begin
                begin
                  set_varstate(tbinarynode(p).right,vs_read,[vsf_must_be_valid]);
                  set_varstate(tbinarynode(p).right,vs_read,[vsf_must_be_valid]);
+                 { dyn. arrays and dyn. strings are read }
+                 if is_implicit_array_pointer(tunarynode(p).left.resultdef) then
+                   newstate:=vs_read;
                  if (newstate in [vs_read,vs_readwritten]) or
                  if (newstate in [vs_read,vs_readwritten]) or
                     not(tunarynode(p).left.resultdef.typ in [stringdef,arraydef]) then
                     not(tunarynode(p).left.resultdef.typ in [stringdef,arraydef]) then
                    include(varstateflags,vsf_must_be_valid)
                    include(varstateflags,vsf_must_be_valid)
@@ -1510,8 +1517,8 @@ implementation
                      begin
                      begin
                        { pointer -> array conversion is done then we need to see it
                        { pointer -> array conversion is done then we need to see it
                          as a deref, because a ^ is then not required anymore }
                          as a deref, because a ^ is then not required anymore }
-                       if (ttypeconvnode(hp).left.resultdef.typ=pointerdef) then
-                        gotderef:=true;
+                       if ttypeconvnode(hp).convtype=tc_pointer_2_array then
+                         gotderef:=true;
                      end;
                      end;
                  end;
                  end;
                  hp:=ttypeconvnode(hp).left;
                  hp:=ttypeconvnode(hp).left;

+ 16 - 10
compiler/i386/aopt386.pas

@@ -64,13 +64,16 @@ Begin
    { or nil                                                                }
    { or nil                                                                }
      While Assigned(BlockStart) Do
      While Assigned(BlockStart) Do
        Begin
        Begin
-         if pass = 0 then
-           PrePeepHoleOpts(AsmL, BlockStart, BlockEnd);
-        { Peephole optimizations }
-         PeepHoleOptPass1(AsmL, BlockStart, BlockEnd);
-        { Only perform them twice in the first pass }
-         if pass = 0 then
-           PeepHoleOptPass1(AsmL, BlockStart, BlockEnd);
+         if (cs_opt_peephole in current_settings.optimizerswitches) then
+           begin
+            if (pass = 0) then
+              PrePeepHoleOpts(AsmL, BlockStart, BlockEnd);
+              { Peephole optimizations }
+               PeepHoleOptPass1(AsmL, BlockStart, BlockEnd);
+              { Only perform them twice in the first pass }
+               if pass = 0 then
+                 PeepHoleOptPass1(AsmL, BlockStart, BlockEnd);
+           end;
         { Data flow analyzer }
         { Data flow analyzer }
          If (cs_opt_asmcse in current_settings.optimizerswitches) Then
          If (cs_opt_asmcse in current_settings.optimizerswitches) Then
            begin
            begin
@@ -79,9 +82,12 @@ Begin
                changed := CSE(asmL, blockStart, blockEnd, pass) or changed;
                changed := CSE(asmL, blockStart, blockEnd, pass) or changed;
            end;
            end;
         { More peephole optimizations }
         { More peephole optimizations }
-         PeepHoleOptPass2(AsmL, BlockStart, BlockEnd);
-         if lastLoop then
-           PostPeepHoleOpts(AsmL, BlockStart, BlockEnd);
+         if (cs_opt_peephole in current_settings.optimizerswitches) then
+           begin
+             PeepHoleOptPass2(AsmL, BlockStart, BlockEnd);
+             if lastLoop then
+               PostPeepHoleOpts(AsmL, BlockStart, BlockEnd);
+           end;
 
 
         { Free memory }
         { Free memory }
         dfa.clear;
         dfa.clear;

+ 17 - 22
compiler/i386/cgcpu.pas

@@ -294,17 +294,15 @@ unit cgcpu;
 
 
     procedure tcg386.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
     procedure tcg386.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
 
 
-      procedure increase_fp(a : tcgint);
+      procedure increase_sp(a : tcgint);
         var
         var
           href : treference;
           href : treference;
         begin
         begin
-          reference_reset_base(href,current_procinfo.framepointer,a,0);
+          reference_reset_base(href,NR_STACK_POINTER_REG,a,0);
           { normally, lea is a better choice than an add }
           { normally, lea is a better choice than an add }
-          list.concat(Taicpu.op_ref_reg(A_LEA,TCGSize2OpSize[OS_ADDR],href,current_procinfo.framepointer));
+          list.concat(Taicpu.op_ref_reg(A_LEA,TCGSize2OpSize[OS_ADDR],href,NR_STACK_POINTER_REG));
         end;
         end;
 
 
-      var
-        stacksize : longint;
       begin
       begin
         { MMX needs to call EMMS }
         { MMX needs to call EMMS }
         if assigned(rg[R_MMXREGISTER]) and
         if assigned(rg[R_MMXREGISTER]) and
@@ -314,20 +312,15 @@ unit cgcpu;
         { remove stackframe }
         { remove stackframe }
         if not nostackframe then
         if not nostackframe then
           begin
           begin
-            if current_procinfo.framepointer=NR_STACK_POINTER_REG then
+            if (current_procinfo.framepointer=NR_STACK_POINTER_REG) or
+               (current_procinfo.procdef.proctypeoption=potype_exceptfilter) then
               begin
               begin
-                stacksize:=current_procinfo.calc_stackframe_size;
-                if (target_info.stackalign>4) and
-                   ((stacksize <> 0) or
-                    (pi_do_call in current_procinfo.flags) or
-                    { can't detect if a call in this case -> use nostackframe }
-                    { if you (think you) know what you are doing              }
-                    (po_assembler in current_procinfo.procdef.procoptions)) then
-                  stacksize := align(stacksize+sizeof(aint),target_info.stackalign) - sizeof(aint);
-                if stacksize<>0 then
-                  increase_fp(stacksize);
+                if current_procinfo.final_localsize<>0 then
+                  increase_sp(current_procinfo.final_localsize);
                 if (not paramanager.use_fixed_stack) then
                 if (not paramanager.use_fixed_stack) then
                   internal_restore_regs(list,true);
                   internal_restore_regs(list,true);
+                if (current_procinfo.procdef.proctypeoption=potype_exceptfilter) then
+                  list.concat(Taicpu.op_reg(A_POP,tcgsize2opsize[OS_ADDR],NR_FRAME_POINTER_REG));
               end
               end
             else
             else
               begin
               begin
@@ -387,7 +380,8 @@ unit cgcpu;
            { but not on win32 }
            { but not on win32 }
            { and not for safecall with hidden exceptions, because the result }
            { and not for safecall with hidden exceptions, because the result }
            { wich contains the exception is passed in EAX }
            { wich contains the exception is passed in EAX }
-           if (target_info.system <> system_i386_win32) and
+           if ((target_info.system <> system_i386_win32) or
+               (target_info.abi=abi_old_win32_gnu)) and
               not ((current_procinfo.procdef.proccalloption = pocall_safecall) and
               not ((current_procinfo.procdef.proccalloption = pocall_safecall) and
                (tf_safecall_exceptions in target_info.flags)) and
                (tf_safecall_exceptions in target_info.flags)) and
               paramanager.ret_in_param(current_procinfo.procdef.returndef,
               paramanager.ret_in_param(current_procinfo.procdef.returndef,
@@ -561,7 +555,7 @@ unit cgcpu;
       begin
       begin
         if not paramanager.use_fixed_stack then
         if not paramanager.use_fixed_stack then
           begin
           begin
-            cg.a_reg_alloc(list,NR_FUNCTION_RESULT_REG);
+            a_reg_alloc(list,NR_FUNCTION_RESULT_REG);
             list.concat(Taicpu.op_reg(A_POP,tcgsize2opsize[OS_INT],NR_FUNCTION_RESULT_REG))
             list.concat(Taicpu.op_reg(A_POP,tcgsize2opsize[OS_INT],NR_FUNCTION_RESULT_REG))
           end
           end
         else
         else
@@ -590,7 +584,7 @@ unit cgcpu;
                (current_settings.optimizecputype in [cpu_Pentium2,cpu_Pentium3,cpu_Pentium4]) } then
                (current_settings.optimizecputype in [cpu_Pentium2,cpu_Pentium3,cpu_Pentium4]) } then
               begin
               begin
                 current_module.requires_ebx_pic_helper:=true;
                 current_module.requires_ebx_pic_helper:=true;
-                cg.a_call_name_static(list,'fpc_geteipasebx');
+                a_call_name_static(list,'fpc_geteipasebx');
               end
               end
             else
             else
               begin
               begin
@@ -654,6 +648,7 @@ unit cgcpu;
           for i:=0 to procdef.paras.count-1 do
           for i:=0 to procdef.paras.count-1 do
            begin
            begin
              hp:=tparavarsym(procdef.paras[i]);
              hp:=tparavarsym(procdef.paras[i]);
+             procdef.init_paraloc_info(calleeside);
              paraloc:=hp.paraloc[calleeside].Location;
              paraloc:=hp.paraloc[calleeside].Location;
              while paraloc<>nil do
              while paraloc<>nil do
                begin
                begin
@@ -679,7 +674,7 @@ unit cgcpu;
               else
               else
                 selfoffsetfromsp:=sizeof(aint);
                 selfoffsetfromsp:=sizeof(aint);
               reference_reset_base(href,NR_ESP,selfoffsetfromsp+offs,4);
               reference_reset_base(href,NR_ESP,selfoffsetfromsp+offs,4);
-              cg.a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_EAX);
+              a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_EAX);
             end;
             end;
         end;
         end;
 
 
@@ -689,7 +684,7 @@ unit cgcpu;
         begin
         begin
           { mov  0(%eax),%reg ; load vmt}
           { mov  0(%eax),%reg ; load vmt}
           reference_reset_base(href,NR_EAX,0,4);
           reference_reset_base(href,NR_EAX,0,4);
-          cg.a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,reg);
+          a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,reg);
         end;
         end;
 
 
       procedure op_onregmethodaddr(op: TAsmOp; reg: tregister);
       procedure op_onregmethodaddr(op: TAsmOp; reg: tregister);
@@ -712,7 +707,7 @@ unit cgcpu;
             Internalerror(200006139);
             Internalerror(200006139);
           { mov vmtoffs(%eax),%eax ; method offs }
           { mov vmtoffs(%eax),%eax ; method offs }
           reference_reset_base(href,NR_EAX,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),4);
           reference_reset_base(href,NR_EAX,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),4);
-          cg.a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_EAX);
+          a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_EAX);
         end;
         end;
 
 
 
 

+ 2 - 0
compiler/i386/cpuelf.pas

@@ -110,6 +110,8 @@ implementation
           result:=R_386_GOTPC;
           result:=R_386_GOTPC;
         RELOC_PLT32 :
         RELOC_PLT32 :
           result:=R_386_PLT32;
           result:=R_386_PLT32;
+        RELOC_GOTOFF:
+          result:=R_386_GOTOFF;
       else
       else
         result:=0;
         result:=0;
         InternalError(2012082301);
         InternalError(2012082301);

+ 34 - 2
compiler/i386/cpuinfo.pas

@@ -46,7 +46,10 @@ Type
        cpu_Pentium2,
        cpu_Pentium2,
        cpu_Pentium3,
        cpu_Pentium3,
        cpu_Pentium4,
        cpu_Pentium4,
-       cpu_PentiumM
+       cpu_PentiumM,
+       cpu_core_i,
+       cpu_core_avx,
+       cpu_core_avx2
       );
       );
 
 
    tfputype =
    tfputype =
@@ -85,7 +88,10 @@ Const
      'PENTIUM2',
      'PENTIUM2',
      'PENTIUM3',
      'PENTIUM3',
      'PENTIUM4',
      'PENTIUM4',
-     'PENTIUMM'
+     'PENTIUMM',
+     'COREI',
+     'COREAVX',
+     'COREAVX2'
    );
    );
 
 
    fputypestr : array[tfputype] of string[6] = ('',
    fputypestr : array[tfputype] of string[6] = ('',
@@ -123,6 +129,32 @@ Const
    level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [{,cs_opt_loopunroll}];
    level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [{,cs_opt_loopunroll}];
    level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [cs_useebp];
    level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [cs_useebp];
 
 
+type
+   tcpuflags =
+      (CPUX86_HAS_SSEUNIT,
+       CPUX86_HAS_BMI1,
+       CPUX86_HAS_BMI2,
+       CPUX86_HAS_POPCNT,
+       CPUX86_HAS_AVXUNIT,
+       CPUX86_HAS_LZCNT,
+       CPUX86_HAS_MOVBE
+      );
+
+ const
+   cpu_capabilities : array[tcputype] of set of tcpuflags = (
+     { cpu_none      } [],
+     { cpu_386       } [],
+     { cpu_Pentium   } [],
+     { cpu_Pentium2  } [],
+     { cpu_Pentium3  } [CPUX86_HAS_SSEUNIT],
+     { cpu_Pentium4  } [CPUX86_HAS_SSEUNIT],
+     { cpu_PentiumM  } [CPUX86_HAS_SSEUNIT],
+     { cpu_core_i    } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT],
+     { cpu_core_avx  } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT],
+     { cpu_core_avx2 } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT,CPUX86_HAS_BMI1,CPUX86_HAS_BMI2,CPUX86_HAS_LZCNT,CPUX86_HAS_MOVBE]
+   );
+
+
 Implementation
 Implementation
 
 
 end.
 end.

+ 3 - 0
compiler/i386/cpunode.pas

@@ -54,6 +54,9 @@ unit cpunode;
        n386mem,
        n386mem,
        n386set,
        n386set,
        n386inl,
        n386inl,
+{$ifdef TEST_WIN32_SEH}
+       n386flw,
+{$endif TEST_WIN32_SEH}
        n386mat
        n386mat
        ;
        ;
 
 

+ 0 - 53
compiler/i386/cpupara.pas

@@ -40,12 +40,6 @@ unit cpupara;
           function get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;override;
           function get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;override;
           function get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;override;
           function get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;override;
           function get_volatile_registers_mm(calloption : tproccalloption):tcpuregisterset;override;
           function get_volatile_registers_mm(calloption : tproccalloption):tcpuregisterset;override;
-          { Returns the location for the nr-st 32 Bit int parameter
-            if every parameter before is an 32 Bit int parameter as well
-            and if the calling conventions for the helper routines of the
-            rtl are used.
-          }
-          procedure getintparaloc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
           function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
           function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
           function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
           procedure createtempparaloc(list: TAsmList;calloption : tproccalloption;parasym : tparavarsym;can_use_final_stack_loc : boolean;var cgpara:TCGPara);override;
           procedure createtempparaloc(list: TAsmList;calloption : tproccalloption;parasym : tparavarsym;can_use_final_stack_loc : boolean;var cgpara:TCGPara);override;
@@ -274,53 +268,6 @@ unit cpupara;
       end;
       end;
 
 
 
 
-    procedure tcpuparamanager.getintparaloc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);
-      var
-        paraloc : pcgparalocation;
-        psym: tparavarsym;
-        pdef: tdef;
-      begin
-        psym:=tparavarsym(pd.paras[nr-1]);
-        pdef:=psym.vardef;
-        if push_addr_param(psym.varspez,pdef,pd.proccalloption) then
-          pdef:=getpointerdef(pdef);
-        cgpara.reset;
-        cgpara.size:=def_cgsize(pdef);
-        cgpara.intsize:=tcgsize2size[cgpara.size];
-        cgpara.alignment:=get_para_align(pd.proccalloption);
-        cgpara.def:=pdef;
-        paraloc:=cgpara.add_location;
-        with paraloc^ do
-         begin
-           size:=def_cgsize(pdef);
-           def:=pdef;
-           if pd.proccalloption=pocall_register then
-             begin
-               if (nr<=length(parasupregs)) then
-                 begin
-                   if nr=0 then
-                     internalerror(200309271);
-                   loc:=LOC_REGISTER;
-                   register:=newreg(R_INTREGISTER,parasupregs[nr-1],R_SUBWHOLE);
-                 end
-               else
-                 begin
-                   loc:=LOC_REFERENCE;
-                   reference.index:=NR_STACK_POINTER_REG;
-                   { the previous parameters didn't take up room in memory }
-                   reference.offset:=sizeof(aint)*(nr-length(parasupregs)-1)
-                 end;
-             end
-           else
-             begin
-               loc:=LOC_REFERENCE;
-               reference.index:=NR_STACK_POINTER_REG;
-               reference.offset:=sizeof(aint)*nr;
-             end;
-          end;
-      end;
-
-
     function  tcpuparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): TCGPara;
     function  tcpuparamanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): TCGPara;
       var
       var
         retcgsize  : tcgsize;
         retcgsize  : tcgsize;

+ 17 - 7
compiler/i386/csopt386.pas

@@ -353,6 +353,7 @@ var
 
 
   function getNextRegToTest(var prev: tai; currentReg: tsuperregister): tsuperregister;
   function getNextRegToTest(var prev: tai; currentReg: tsuperregister): tsuperregister;
   begin
   begin
+    getNextRegToTest := RS_INVALID;
     if not checkingPrevSequences then
     if not checkingPrevSequences then
       begin
       begin
         if (currentreg = RS_INVALID) then
         if (currentreg = RS_INVALID) then
@@ -423,9 +424,13 @@ begin {CheckSequence}
   TmpResult := False;
   TmpResult := False;
   FillChar(OrgRegInfo, Sizeof(OrgRegInfo), 0);
   FillChar(OrgRegInfo, Sizeof(OrgRegInfo), 0);
   FillChar(startRegInfo, sizeof(startRegInfo), 0);
   FillChar(startRegInfo, sizeof(startRegInfo), 0);
+  FillChar(HighRegInfo, sizeof(HighRegInfo), 0);
+  FillChar(prevreginfo, sizeof(prevreginfo), 0);
   OrgRegFound := 0;
   OrgRegFound := 0;
   HighFound := 0;
   HighFound := 0;
   OrgRegResult := False;
   OrgRegResult := False;
+  highPrev := nil;
+  orgPrev := nil;
   with startRegInfo do
   with startRegInfo do
     begin
     begin
       newRegsEncountered := [RS_EBP, RS_ESP];
       newRegsEncountered := [RS_EBP, RS_ESP];
@@ -1074,6 +1079,7 @@ var
   prev: tai;
   prev: tai;
   newOrgRegRState, newOrgRegWState: byte;
   newOrgRegRState, newOrgRegWState: byte;
 begin
 begin
+  newOrgRegwState := 0;
   if getLastInstruction(hp,prev) then
   if getLastInstruction(hp,prev) then
     with ptaiprop(prev.optinfo)^ do
     with ptaiprop(prev.optinfo)^ do
       begin
       begin
@@ -1429,6 +1435,7 @@ var
 
 
 begin
 begin
   replacereg := false;
   replacereg := false;
+  readStateChanged := false;
   if canreplacereg(orgsupreg,newsupreg,p,orgregcanbemodified,newregmodified, orgregread, removelast,endp) then
   if canreplacereg(orgsupreg,newsupreg,p,orgregcanbemodified,newregmodified, orgregread, removelast,endp) then
     begin
     begin
 {$ifdef replaceregdebug}
 {$ifdef replaceregdebug}
@@ -1595,12 +1602,12 @@ var
   regcounter: tsuperregister;
   regcounter: tsuperregister;
   optimizable: boolean;
   optimizable: boolean;
 begin
 begin
+  memtoreg := NR_NO;
+
   if not getlastinstruction(t,hp) or
   if not getlastinstruction(t,hp) or
      not issimplememloc(ref) then
      not issimplememloc(ref) then
-    begin
-      memtoreg := NR_NO;
-      exit;
-    end;
+    exit;
+
   p := ptaiprop(hp.optinfo);
   p := ptaiprop(hp.optinfo);
   optimizable := false;
   optimizable := false;
   for regcounter := RS_EAX to RS_EDI do
   for regcounter := RS_EAX to RS_EDI do
@@ -1832,15 +1839,18 @@ procedure doCSE(asml: TAsmList; First, Last: tai; findPrevSeqs, doSubOpts: boole
  removed immediately because sometimes an instruction needs to be checked in
  removed immediately because sometimes an instruction needs to be checked in
  two different sequences}
  two different sequences}
 var cnt, cnt2, {cnt3,} orgNrofMods: longint;
 var cnt, cnt2, {cnt3,} orgNrofMods: longint;
-    p, hp1, hp2, prevSeq: tai;
-    hp4: tai;
-    hp5 : tai;
+    p, hp1, hp2, hp4, hp5, prevSeq: tai;
     reginfo: toptreginfo;
     reginfo: toptreginfo;
     memreg: tregister;
     memreg: tregister;
     regcounter: tsuperregister;
     regcounter: tsuperregister;
 begin
 begin
   p := First;
   p := First;
   SkipHead(p);
   SkipHead(p);
+  hp1 := nil;
+  hp2 := nil;
+  hp4 := nil;
+  hp5 := nil;
+  cnt := 0;
   while (p <> Last) do
   while (p <> Last) do
     begin
     begin
       case p.typ of
       case p.typ of

+ 1 - 0
compiler/i386/daopt386.pas

@@ -901,6 +901,7 @@ var
   Cnt: Word;
   Cnt: Word;
 begin
 begin
   TmpResult := False;
   TmpResult := False;
+  Result := False;
   if supreg = RS_INVALID then
   if supreg = RS_INVALID then
     exit;
     exit;
   if (p1.typ = ait_instruction) then
   if (p1.typ = ait_instruction) then

+ 1 - 0
compiler/i386/i386att.inc

@@ -946,6 +946,7 @@
 'vzeroupper',
 'vzeroupper',
 'andn',
 'andn',
 'bextr',
 'bextr',
+'tzcnt',
 'rorx',
 'rorx',
 'sarx',
 'sarx',
 'shlx',
 'shlx',

+ 1 - 0
compiler/i386/i386atts.inc

@@ -963,5 +963,6 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
 attsufNONE
 attsufNONE
 );
 );

+ 1 - 0
compiler/i386/i386int.inc

@@ -946,6 +946,7 @@
 'vzeroupper',
 'vzeroupper',
 'andn',
 'andn',
 'bextr',
 'bextr',
+'tzcnt',
 'rorx',
 'rorx',
 'sarx',
 'sarx',
 'shlx',
 'shlx',

+ 1 - 1
compiler/i386/i386nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
 { don't edit, this file is generated from x86ins.dat }
-1657;
+1658;

+ 1 - 0
compiler/i386/i386op.inc

@@ -946,6 +946,7 @@ A_VZEROALL,
 A_VZEROUPPER,
 A_VZEROUPPER,
 A_ANDN,
 A_ANDN,
 A_BEXTR,
 A_BEXTR,
+A_TZCNT,
 A_RORX,
 A_RORX,
 A_SARX,
 A_SARX,
 A_SHLX,
 A_SHLX,

+ 79 - 78
compiler/i386/i386prop.inc

@@ -224,8 +224,8 @@
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -389,7 +389,7 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_ROp1, Ch_WOp2, Ch_RFLAGS)),
+(Ch: (Ch_ROp1, Ch_RWOp2, Ch_RFLAGS)),
 (Ch: (Ch_RFLAGS, Ch_None, Ch_None)),
 (Ch: (Ch_RFLAGS, Ch_None, Ch_None)),
 (Ch: (Ch_RFLAGS, Ch_WOp1, Ch_None)),
 (Ch: (Ch_RFLAGS, Ch_WOp1, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
@@ -489,8 +489,8 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Wop2, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Wop2, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -570,7 +570,7 @@
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Wop2, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -730,6 +730,10 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -753,6 +757,12 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -761,28 +771,20 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
@@ -796,7 +798,15 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -859,6 +869,14 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -867,6 +885,24 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -876,15 +912,26 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -893,63 +940,17 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_Wop1, Ch_Rop2, Ch_Rop3)),
-(Ch: (Ch_Wop1, Ch_Rop2, Ch_Rop3)),
-(Ch: (Ch_Wop1, Ch_Rop2, Ch_None)),
-(Ch: (Ch_Wop1, Ch_Rop2, Ch_Rop3)),
-(Ch: (Ch_Wop1, Ch_Rop2, Ch_Rop3)),
-(Ch: (Ch_Wop1, Ch_Rop2, Ch_Rop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Wop2, Ch_WFlags, Ch_Rop1)),
+(Ch: (Ch_Rop1, Ch_Wop2, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),

+ 7 - 0
compiler/i386/i386tab.inc

@@ -11571,6 +11571,13 @@
     code    : #242#249#1#247#62#72;
     code    : #242#249#1#247#62#72;
     flags   : if_bmi1
     flags   : if_bmi1
   ),
   ),
+  (
+    opcode  : A_TZCNT;
+    ops     : 2;
+    optypes : (ot_reg16 or ot_bits32 or ot_bits64,ot_rm_gpr,ot_none,ot_none);
+    code    : #208#219#2#15#188#72;
+    flags   : if_bmi1 or if_sm
+  ),
   (
   (
     opcode  : A_RORX;
     opcode  : A_RORX;
     ops     : 3;
     ops     : 3;

+ 146 - 21
compiler/i386/n386add.pas

@@ -31,10 +31,14 @@ interface
     type
     type
        ti386addnode = class(tx86addnode)
        ti386addnode = class(tx86addnode)
          function use_generic_mul32to64: boolean; override;
          function use_generic_mul32to64: boolean; override;
+         function use_generic_mul64bit: boolean; override;
          procedure second_addordinal; override;
          procedure second_addordinal; override;
          procedure second_add64bit;override;
          procedure second_add64bit;override;
          procedure second_cmp64bit;override;
          procedure second_cmp64bit;override;
          procedure second_mul(unsigned: boolean);
          procedure second_mul(unsigned: boolean);
+         procedure second_mul64bit;
+       protected
+         procedure set_mul_result_location;
        end;
        end;
 
 
   implementation
   implementation
@@ -58,6 +62,12 @@ interface
       result := False;
       result := False;
     end;
     end;
 
 
+    function ti386addnode.use_generic_mul64bit: boolean;
+    begin
+      result:=(cs_check_overflow in current_settings.localswitches) or
+        (cs_opt_size in current_settings.optimizerswitches);
+    end;
+
     { handles all unsigned multiplications, and 32->64 bit signed ones.
     { handles all unsigned multiplications, and 32->64 bit signed ones.
       32bit-only signed mul is handled by generic codegen }
       32bit-only signed mul is handled by generic codegen }
     procedure ti386addnode.second_addordinal;
     procedure ti386addnode.second_addordinal;
@@ -66,6 +76,11 @@ interface
     begin
     begin
       unsigned:=not(is_signed(left.resultdef)) or
       unsigned:=not(is_signed(left.resultdef)) or
                 not(is_signed(right.resultdef));
                 not(is_signed(right.resultdef));
+      { use IMUL instead of MUL in case overflow checking is off and we're
+        doing a 32->32-bit multiplication }
+      if not (cs_check_overflow in current_settings.localswitches) and
+         not is_64bit(resultdef) then
+        unsigned:=false;
       if (nodetype=muln) and (unsigned or is_64bit(resultdef)) then
       if (nodetype=muln) and (unsigned or is_64bit(resultdef)) then
         second_mul(unsigned)
         second_mul(unsigned)
       else
       else
@@ -117,6 +132,11 @@ interface
             op:=OP_OR;
             op:=OP_OR;
           andn:
           andn:
             op:=OP_AND;
             op:=OP_AND;
+          muln:
+            begin
+              second_mul64bit;
+              exit;
+            end
           else
           else
             begin
             begin
               { everything should be handled in pass_1 (JM) }
               { everything should be handled in pass_1 (JM) }
@@ -366,6 +386,32 @@ interface
                                 x86 MUL
                                 x86 MUL
 *****************************************************************************}
 *****************************************************************************}
 
 
+    procedure ti386addnode.set_mul_result_location;
+    begin
+      location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
+      {Free EAX,EDX}
+      cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EDX);
+      if is_64bit(resultdef) then
+      begin
+        {Allocate a couple of registers and store EDX:EAX into it}
+        location.register64.reghi := cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
+        cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, NR_EDX, location.register64.reghi);
+        cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
+        location.register64.reglo := cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
+        cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, NR_EAX, location.register64.reglo);
+      end
+      else
+      begin
+        {Allocate a new register and store the result in EAX in it.}
+        location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
+        cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
+        cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_EAX,location.register);
+      end;
+      location_freetemp(current_asmdata.CurrAsmList,left.location);
+      location_freetemp(current_asmdata.CurrAsmList,right.location);
+    end;
+
+
     procedure ti386addnode.second_mul(unsigned: boolean);
     procedure ti386addnode.second_mul(unsigned: boolean);
 
 
     var reg:Tregister;
     var reg:Tregister;
@@ -379,8 +425,6 @@ interface
     begin
     begin
       pass_left_right;
       pass_left_right;
 
 
-      {The location.register will be filled in later (JM)}
-      location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
       { Mul supports registers and references, so if not register/reference,
       { Mul supports registers and references, so if not register/reference,
         load the location into a register.
         load the location into a register.
         The variant of IMUL which is capable of doing 32->64 bits has the same restrictions. }
         The variant of IMUL which is capable of doing 32->64 bits has the same restrictions. }
@@ -418,26 +462,107 @@ interface
           cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
           cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
           cg.a_label(current_asmdata.CurrAsmList,hl4);
           cg.a_label(current_asmdata.CurrAsmList,hl4);
         end;
         end;
-      {Free EAX,EDX}
-      cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EDX);
-      if is_64bit(resultdef) then
-      begin
-        {Allocate a couple of registers and store EDX:EAX into it}
-        location.register64.reghi := cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
-        cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, NR_EDX, location.register64.reghi);
-        cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
-        location.register64.reglo := cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
-        cg.a_load_reg_reg(current_asmdata.CurrAsmList, OS_INT, OS_INT, NR_EAX, location.register64.reglo);
-      end
+      set_mul_result_location;
+    end;
+
+
+    procedure ti386addnode.second_mul64bit;
+    var
+      list: TAsmList;
+      hreg1,hreg2: tregister;
+    begin
+      { 64x64 multiplication yields 128-bit result, but we're only
+        interested in its lower 64 bits. This lower part is independent
+        of operand signs, and so is the generated code. }
+      { pass_left_right already called from second_add64bit }
+      list:=current_asmdata.CurrAsmList;
+      if left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
+        tcgx86(cg).make_simple_ref(list,left.location.reference);
+      if right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
+        tcgx86(cg).make_simple_ref(list,right.location.reference);
+
+      { calculate 32-bit terms lo(right)*hi(left) and hi(left)*lo(right) }
+      if (right.location.loc=LOC_CONSTANT) then
+        begin
+          { Omit zero terms, if any }
+          hreg1:=NR_NO;
+          hreg2:=NR_NO;
+          if lo(right.location.value64)<>0 then
+            hreg1:=cg.getintregister(list,OS_INT);
+          if hi(right.location.value64)<>0 then
+            hreg2:=cg.getintregister(list,OS_INT);
+
+          { Take advantage of 3-operand form of IMUL }
+          case left.location.loc of
+            LOC_REGISTER,LOC_CREGISTER:
+              begin
+                if hreg1<>NR_NO then
+                  emit_const_reg_reg(A_IMUL,S_L,longint(lo(right.location.value64)),left.location.register64.reghi,hreg1);
+                if hreg2<>NR_NO then
+                  emit_const_reg_reg(A_IMUL,S_L,longint(hi(right.location.value64)),left.location.register64.reglo,hreg2);
+              end;
+            LOC_REFERENCE,LOC_CREFERENCE:
+              begin
+                if hreg2<>NR_NO then
+                  list.concat(taicpu.op_const_ref_reg(A_IMUL,S_L,longint(hi(right.location.value64)),left.location.reference,hreg2));
+                inc(left.location.reference.offset,4);
+                if hreg1<>NR_NO then
+                  list.concat(taicpu.op_const_ref_reg(A_IMUL,S_L,longint(lo(right.location.value64)),left.location.reference,hreg1));
+                dec(left.location.reference.offset,4);
+              end;
+          else
+            InternalError(2014011602);
+          end;
+        end
       else
       else
-      begin
-        {Allocate a new register and store the result in EAX in it.}
-        location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
-        cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_EAX);
-        cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_EAX,location.register);
-      end;
-      location_freetemp(current_asmdata.CurrAsmList,left.location);
-      location_freetemp(current_asmdata.CurrAsmList,right.location);
+        begin
+          hreg1:=cg.getintregister(list,OS_INT);
+          hreg2:=cg.getintregister(list,OS_INT);
+          cg64.a_load64low_loc_reg(list,left.location,hreg1);
+          cg64.a_load64high_loc_reg(list,left.location,hreg2);
+          case right.location.loc of
+            LOC_REGISTER,LOC_CREGISTER:
+              begin
+                emit_reg_reg(A_IMUL,S_L,right.location.register64.reghi,hreg1);
+                emit_reg_reg(A_IMUL,S_L,right.location.register64.reglo,hreg2);
+              end;
+            LOC_REFERENCE,LOC_CREFERENCE:
+              begin
+                emit_ref_reg(A_IMUL,S_L,right.location.reference,hreg2);
+                inc(right.location.reference.offset,4);
+                emit_ref_reg(A_IMUL,S_L,right.location.reference,hreg1);
+                dec(right.location.reference.offset,4);
+              end;
+          else
+            InternalError(2014011603);
+          end;
+        end;
+      { add hi*lo and lo*hi terms together }
+      if (hreg1<>NR_NO) and (hreg2<>NR_NO) then
+        emit_reg_reg(A_ADD,S_L,hreg2,hreg1);
+
+      { load lo(right) into EAX }
+      cg.getcpuregister(list,NR_EAX);
+      cg64.a_load64low_loc_reg(list,right.location,NR_EAX);
+
+      { multiply EAX by lo(left), producing 64-bit value in EDX:EAX }
+      cg.getcpuregister(list,NR_EDX);
+      if (left.location.loc in [LOC_REGISTER,LOC_CREGISTER]) then
+        emit_reg(A_MUL,S_L,left.location.register64.reglo)
+      else if (left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
+        emit_ref(A_MUL,S_L,left.location.reference)
+      else
+        InternalError(2014011604);
+      { add previously calculated terms to the high half }
+      if (hreg1<>NR_NO) then
+        emit_reg_reg(A_ADD,S_L,hreg1,NR_EDX)
+      else if (hreg2<>NR_NO) then
+        emit_reg_reg(A_ADD,S_L,hreg2,NR_EDX)
+      else
+        InternalError(2014011604);
+
+      { Result is now in EDX:EAX. Copy it to virtual registers. }
+      set_mul_result_location;
     end;
     end;
 
 
 
 

+ 2 - 1
compiler/i386/n386cal.pas

@@ -93,7 +93,8 @@ implementation
           it is always the first parameter (apart from hidden parentfp,
           it is always the first parameter (apart from hidden parentfp,
           but this one is never put into a register (vs_nonregable set)
           but this one is never put into a register (vs_nonregable set)
           so funcret is always in EAX for register calling }
           so funcret is always in EAX for register calling }
-        if (target_info.system = system_i386_win32) and
+        if ((target_info.system = system_i386_win32) and
+            not (target_info.abi=abi_old_win32_gnu)) and
             paramanager.ret_in_param(procdefinition.returndef,procdefinition) and
             paramanager.ret_in_param(procdefinition.returndef,procdefinition) and
             not ((procdefinition.proccalloption=pocall_register) or
             not ((procdefinition.proccalloption=pocall_register) or
                  ((procdefinition.proccalloption=pocall_internproc) and
                  ((procdefinition.proccalloption=pocall_internproc) and

+ 685 - 0
compiler/i386/n386flw.pas

@@ -0,0 +1,685 @@
+{
+    Copyright (c) 2011 by Free Pascal development team
+
+    Generate Win32-specific exception handling code
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit n386flw;
+
+{$i fpcdefs.inc}
+
+interface
+
+  uses
+    node,nflw,ncgflw,psub;
+
+  type
+    ti386raisenode=class(tcgraisenode)
+      function pass_1 : tnode;override;
+    end;
+
+    ti386onnode=class(tcgonnode)
+      procedure pass_generate_code;override;
+    end;
+
+    ti386tryexceptnode=class(tcgtryexceptnode)
+      procedure pass_generate_code;override;
+    end;
+
+    ti386tryfinallynode=class(tcgtryfinallynode)
+      finalizepi: tcgprocinfo;
+      constructor create(l,r:TNode);override;
+      constructor create_implicit(l,r,_t1:TNode);override;
+      function pass_1: tnode;override;
+      function simplify(forinline: boolean): tnode;override;
+      procedure pass_generate_code;override;
+    end;
+
+implementation
+
+  uses
+    cutils,globtype,globals,verbose,systems,
+    nbas,ncal,nmem,nutils,
+    symconst,symbase,symtable,symsym,symdef,
+    cgbase,cgobj,cgcpu,cgutils,tgobj,
+    cpubase,htypechk,
+    parabase,paramgr,pdecsub,pass_1,pass_2,ncgutil,cga,
+    aasmbase,aasmtai,aasmdata,aasmcpu,procinfo,cpupi;
+
+  var
+    endexceptlabel: tasmlabel;
+
+
+{ ti386raisenode }
+
+function ti386raisenode.pass_1 : tnode;
+  var
+    statements : tstatementnode;
+    raisenode : tcallnode;
+  begin
+    { difference from generic code is that address stack is not popped on reraise }
+    if (target_info.system<>system_i386_win32) or assigned(left) then
+      result:=inherited pass_1
+    else
+      begin
+        result:=internalstatements(statements);
+        raisenode:=ccallnode.createintern('fpc_reraise',nil);
+        include(raisenode.callnodeflags,cnf_call_never_returns);
+        addstatement(statements,raisenode);
+      end;
+end;
+
+{ ti386onnode }
+
+procedure ti386onnode.pass_generate_code;
+  var
+    oldflowcontrol : tflowcontrol;
+    exceptvarsym : tlocalvarsym;
+  begin
+    if (target_info.system<>system_i386_win32) then
+      begin
+        inherited pass_generate_code;
+        exit;
+      end;
+
+    location_reset(location,LOC_VOID,OS_NO);
+
+    oldflowcontrol:=flowcontrol;
+    flowcontrol:=flowcontrol*[fc_unwind]+[fc_inflowcontrol];
+
+    { RTL will put exceptobject into EAX when jumping here }
+    cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_FUNCTION_RESULT_REG);
+
+    { Retrieve exception variable }
+    if assigned(excepTSymtable) then
+      exceptvarsym:=tlocalvarsym(excepTSymtable.SymList[0])
+    else
+      exceptvarsym:=nil;
+
+    if assigned(exceptvarsym) then
+      begin
+        exceptvarsym.localloc.loc:=LOC_REFERENCE;
+        exceptvarsym.localloc.size:=OS_ADDR;
+        tg.GetLocal(current_asmdata.CurrAsmList,sizeof(pint),voidpointertype,exceptvarsym.localloc.reference);
+        cg.a_load_reg_ref(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,NR_FUNCTION_RESULT_REG,exceptvarsym.localloc.reference);
+      end;
+    cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_FUNCTION_RESULT_REG);
+
+    if assigned(right) then
+      secondpass(right);
+
+    { deallocate exception symbol }
+    if assigned(exceptvarsym) then
+      begin
+        tg.UngetLocal(current_asmdata.CurrAsmList,exceptvarsym.localloc.reference);
+        exceptvarsym.localloc.loc:=LOC_INVALID;
+      end;
+    cg.g_call(current_asmdata.CurrAsmList,'FPC_DONEEXCEPTION');
+    cg.a_jmp_always(current_asmdata.CurrAsmList,endexceptlabel);
+
+    flowcontrol:=oldflowcontrol+(flowcontrol-[fc_inflowcontrol]);
+  end;
+
+{ ti386tryfinallynode }
+
+function reset_regvars(var n: tnode; arg: pointer): foreachnoderesult;
+  begin
+    case n.nodetype of
+      temprefn:
+        make_not_regable(n,[]);
+      calln:
+        include(tprocinfo(arg).flags,pi_do_call);
+    end;
+    result:=fen_true;
+  end;
+
+function copy_parasize(var n: tnode; arg: pointer): foreachnoderesult;
+  begin
+    case n.nodetype of
+      calln:
+        tcgprocinfo(arg).allocate_push_parasize(tcallnode(n).pushed_parasize);
+    end;
+    result:=fen_true;
+  end;
+
+constructor ti386tryfinallynode.create(l, r: TNode);
+  begin
+    inherited create(l,r);
+    if (target_info.system<>system_i386_win32) or (
+      { Don't create child procedures for generic methods, their nested-like
+        behavior causes compilation errors because real nested procedures
+        aren't allowed for generics. Not creating them doesn't harm because
+        generic node tree is discarded without generating code. }
+        assigned(current_procinfo.procdef.struct) and
+        (df_generic in current_procinfo.procdef.struct.defoptions)
+      ) then
+      exit;
+    finalizepi:=tcgprocinfo(cprocinfo.create(current_procinfo));
+    finalizepi.force_nested;
+    finalizepi.procdef:=create_finalizer_procdef;
+    finalizepi.entrypos:=r.fileinfo;
+    finalizepi.entryswitches:=r.localswitches;
+    finalizepi.exitpos:=current_filepos; // last_endtoken_pos?
+    finalizepi.exitswitches:=current_settings.localswitches;
+    { Regvar optimization for symbols is suppressed when using exceptions, but
+      temps may be still placed into registers. This must be fixed. }
+    foreachnodestatic(r,@reset_regvars,finalizepi);
+    include(finalizepi.flags,pi_has_assembler_block);
+    include(finalizepi.flags,pi_do_call);
+    include(finalizepi.flags,pi_uses_exceptions);
+  end;
+
+constructor ti386tryfinallynode.create_implicit(l, r, _t1: TNode);
+  begin
+    inherited create_implicit(l, r, _t1);
+    if (target_info.system<>system_i386_win32) then
+      exit;
+
+    { safecall procedures can handle implicit finalization as part of "except" flow }
+    if implicitframe and (current_procinfo.procdef.proccalloption=pocall_safecall) then
+      exit;
+
+    if assigned(current_procinfo.procdef.struct) and
+      (df_generic in current_procinfo.procdef.struct.defoptions) then
+      InternalError(2013012501);
+
+    finalizepi:=tcgprocinfo(cprocinfo.create(current_procinfo));
+    finalizepi.force_nested;
+    finalizepi.procdef:=create_finalizer_procdef;
+    finalizepi.entrypos:=current_filepos;
+    finalizepi.exitpos:=current_filepos; // last_endtoken_pos?
+    finalizepi.entryswitches:=r.localswitches;
+    finalizepi.exitswitches:=current_settings.localswitches;
+    include(finalizepi.flags,pi_has_assembler_block);
+    include(finalizepi.flags,pi_do_call);
+    include(finalizepi.flags,pi_uses_exceptions);
+  end;
+
+
+function ti386tryfinallynode.pass_1: tnode;
+  var
+    selfsym: tparavarsym;
+  begin
+    result:=inherited pass_1;
+    if (target_info.system=system_i386_win32) then
+      begin
+        { safecall method will access 'self' from except block -> make it non-regable }
+        if implicitframe and (current_procinfo.procdef.proccalloption=pocall_safecall) and
+          is_class(current_procinfo.procdef.struct) then
+          begin
+            selfsym:=tparavarsym(current_procinfo.procdef.parast.Find('self'));
+            if (selfsym=nil) or (selfsym.typ<>paravarsym) then
+              InternalError(2011123101);
+            selfsym.varregable:=vr_none;
+          end;
+      end;
+  end;
+
+
+function ti386tryfinallynode.simplify(forinline: boolean): tnode;
+  begin
+    result:=inherited simplify(forinline);
+    if (target_info.system<>system_i386_win32) then
+      exit;
+
+    if (result=nil) and assigned(finalizepi) then
+      begin
+        finalizepi.code:=right;
+        foreachnodestatic(right,@copy_parasize,finalizepi);
+        right:=ccallnode.create(nil,tprocsym(finalizepi.procdef.procsym),nil,nil,[]);
+        firstpass(right);
+        { For implicit frames, no actual code is available at this time,
+          it is added later in assembler form. So store the nested procinfo
+          for later use. }
+        if implicitframe then
+          begin
+            current_procinfo.finalize_procinfo:=finalizepi;
+            { don't leave dangling pointer }
+            tcgprocinfo(current_procinfo).final_asmnode:=nil;
+          end;
+      end;
+  end;
+
+
+procedure emit_scope_start(handler,data: TAsmSymbol);
+  var
+    href: treference;
+    hreg: tregister;
+  begin
+    hreg:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
+    reference_reset_base(href,hreg,0,sizeof(pint));
+    href.segment:=NR_FS;
+    emit_reg_reg(A_XOR,S_L,hreg,hreg);
+    emit_sym(A_PUSH,S_L,data);
+    emit_reg(A_PUSH,S_L,NR_FRAME_POINTER_REG);
+    emit_sym(A_PUSH,S_L,handler);
+    emit_ref(A_PUSH,S_L,href);
+    emit_reg_ref(A_MOV,S_L,NR_ESP,href);
+  end;
+
+procedure emit_scope_end;
+  var
+    href: treference;
+    hreg,hreg2: tregister;
+  begin
+    hreg:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
+    hreg2:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
+    reference_reset_base(href,hreg,0,sizeof(pint));
+    href.segment:=NR_FS;
+    emit_reg_reg(A_XOR,S_L,hreg,hreg);
+    emit_reg(A_POP,S_L,hreg2);
+    emit_const_reg(A_ADD,S_L,3*sizeof(pint),NR_ESP);
+    emit_reg_ref(A_MOV,S_L,hreg2,href);
+  end;
+
+procedure ti386tryfinallynode.pass_generate_code;
+  var
+    finallylabel,
+    exceptlabel,
+    safecalllabel,
+    endfinallylabel,
+    exitfinallylabel,
+    continuefinallylabel,
+    breakfinallylabel,
+    oldCurrExitLabel,
+    oldContinueLabel,
+    oldBreakLabel : tasmlabel;
+    oldflowcontrol,tryflowcontrol : tflowcontrol;
+    is_safecall: boolean;
+    hreg: tregister;
+  begin
+    if (target_info.system<>system_i386_win32) then
+      begin
+        inherited pass_generate_code;
+        exit;
+      end;
+    location_reset(location,LOC_VOID,OS_NO);
+    tryflowcontrol:=[];
+    oldBreakLabel:=nil;
+    oldContinueLabel:=nil;
+    continuefinallylabel:=nil;
+    breakfinallylabel:=nil;
+    exceptlabel:=nil;
+    safecalllabel:=nil;
+    is_safecall:=implicitframe and (current_procinfo.procdef.proccalloption=pocall_safecall);
+
+    { check if child nodes do a break/continue/exit }
+    oldflowcontrol:=flowcontrol;
+    flowcontrol:=[fc_inflowcontrol];
+    current_asmdata.getjumplabel(finallylabel);
+    current_asmdata.getjumplabel(endfinallylabel);
+
+    { the finally block must catch break, continue and exit }
+    { statements                                            }
+    oldCurrExitLabel:=current_procinfo.CurrExitLabel;
+    if implicitframe then
+      exitfinallylabel:=finallylabel
+    else
+      current_asmdata.getjumplabel(exitfinallylabel);
+    current_procinfo.CurrExitLabel:=exitfinallylabel;
+    if assigned(current_procinfo.CurrBreakLabel) then
+      begin
+        oldContinueLabel:=current_procinfo.CurrContinueLabel;
+        oldBreakLabel:=current_procinfo.CurrBreakLabel;
+        if implicitframe then
+          begin
+            breakfinallylabel:=finallylabel;
+            continuefinallylabel:=finallylabel;
+          end
+        else
+          begin
+            current_asmdata.getjumplabel(breakfinallylabel);
+            current_asmdata.getjumplabel(continuefinallylabel);
+          end;
+        current_procinfo.CurrContinueLabel:=continuefinallylabel;
+        current_procinfo.CurrBreakLabel:=breakfinallylabel;
+      end;
+
+    { Start of scope }
+    if is_safecall then
+      begin
+        with cg.rg[R_INTREGISTER] do
+          used_in_proc:=used_in_proc+[RS_EBX,RS_ESI,RS_EDI];
+
+        current_asmdata.getjumplabel(exceptlabel);
+        emit_scope_start(
+          current_asmdata.RefAsmSymbol('__FPC_except_safecall'),
+          exceptlabel
+        );
+      end
+    else
+      emit_scope_start(
+        current_asmdata.RefAsmSymbol('__FPC_finally_handler'),
+        current_asmdata.RefAsmSymbol(finalizepi.procdef.mangledname)
+      );
+
+    { try code }
+    if assigned(left) then
+      begin
+        secondpass(left);
+        tryflowcontrol:=flowcontrol;
+        if codegenerror then
+          exit;
+      end;
+
+    { don't generate line info for internal cleanup }
+    current_asmdata.CurrAsmList.concat(tai_marker.create(mark_NoLineInfoStart));
+
+    cg.a_label(current_asmdata.CurrAsmList,finallylabel);
+    emit_scope_end;
+    if is_safecall then
+      begin
+        current_asmdata.getjumplabel(safecalllabel);
+        hreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_INT);
+        cg.a_load_const_reg(current_asmdata.CurrAsmList,OS_INT,0,hreg);
+        cg.a_jmp_always(current_asmdata.CurrAsmList,safecalllabel);
+        { RTL handler will jump here on exception }
+        cg.a_label(current_asmdata.CurrAsmList,exceptlabel);
+        handle_safecall_exception;
+        cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_FUNCTION_RESULT_REG,hreg);
+        cg.a_label(current_asmdata.CurrAsmList,safecalllabel);
+      end;
+
+    { end cleanup }
+    current_asmdata.CurrAsmList.concat(tai_marker.create(mark_NoLineInfoEnd));
+
+    { generate finally code as a separate procedure }
+    { !!! this resets flowcontrol, how to check flow away? }
+    if not implicitframe then
+      tcgprocinfo(current_procinfo).generate_exceptfilter(finalizepi);
+
+    flowcontrol:=[fc_inflowcontrol];
+    { right is a call to finalizer procedure }
+    secondpass(right);
+
+    { goto is allowed if it stays inside the finally block,
+      this is checked using the exception block number }
+    if (flowcontrol-[fc_gotolabel])<>[fc_inflowcontrol] then
+      CGMessage(cg_e_control_flow_outside_finally);
+    if codegenerror then
+      exit;
+
+    { don't generate line info for internal cleanup }
+    current_asmdata.CurrAsmList.concat(tai_marker.create(mark_NoLineInfoStart));
+
+    if not implicitframe then
+      begin
+        if tryflowcontrol*[fc_exit,fc_break,fc_continue]<>[] then
+          cg.a_jmp_always(current_asmdata.CurrAsmList,endfinallylabel);
+        { do some magic for exit,break,continue in the try block }
+        if fc_exit in tryflowcontrol then
+          begin
+            cg.a_label(current_asmdata.CurrAsmList,exitfinallylabel);
+            cg.g_call(current_asmdata.CurrAsmList,'_FPC_leave');
+            cg.a_jmp_always(current_asmdata.CurrAsmList,oldCurrExitLabel);
+          end;
+        if fc_break in tryflowcontrol then
+          begin
+            cg.a_label(current_asmdata.CurrAsmList,breakfinallylabel);
+            cg.g_call(current_asmdata.CurrAsmList,'_FPC_leave');
+            cg.a_jmp_always(current_asmdata.CurrAsmList,oldBreakLabel);
+          end;
+        if fc_continue in tryflowcontrol then
+          begin
+            cg.a_label(current_asmdata.CurrAsmList,continuefinallylabel);
+            cg.g_call(current_asmdata.CurrAsmList,'_FPC_leave');
+            cg.a_jmp_always(current_asmdata.CurrAsmList,oldContinueLabel);
+          end;
+      end;
+    if is_safecall then
+      cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,hreg,NR_FUNCTION_RETURN_REG);
+    cg.a_label(current_asmdata.CurrAsmList,endfinallylabel);
+
+    { end cleanup }
+    current_asmdata.CurrAsmList.concat(tai_marker.create(mark_NoLineInfoEnd));
+
+    current_procinfo.CurrExitLabel:=oldCurrExitLabel;
+    if assigned(current_procinfo.CurrBreakLabel) then
+      begin
+        current_procinfo.CurrContinueLabel:=oldContinueLabel;
+        current_procinfo.CurrBreakLabel:=oldBreakLabel;
+      end;
+    flowcontrol:=oldflowcontrol+(tryflowcontrol-[fc_inflowcontrol]);
+  end;
+
+{ ti386tryexceptnode }
+
+procedure ti386tryexceptnode.pass_generate_code;
+  var
+    exceptlabel,oldendexceptlabel,
+    lastonlabel,
+    exitexceptlabel,
+    continueexceptlabel,
+    breakexceptlabel,
+    exittrylabel,
+    continuetrylabel,
+    breaktrylabel,
+    oldCurrExitLabel,
+    oldContinueLabel,
+    oldBreakLabel : tasmlabel;
+    onlabel,
+    filterlabel: tasmlabel;
+    oldflowcontrol,tryflowcontrol,
+    exceptflowcontrol : tflowcontrol;
+    hnode : tnode;
+    hlist : tasmlist;
+    onnodecount : tai_const;
+  label
+    errorexit;
+  begin
+    if (target_info.system<>system_i386_win32) then
+      begin
+        inherited pass_generate_code;
+        exit;
+      end;
+    location_reset(location,LOC_VOID,OS_NO);
+
+    oldflowcontrol:=flowcontrol;
+    flowcontrol:=[fc_inflowcontrol];
+    { this can be called recursivly }
+    oldBreakLabel:=nil;
+    oldContinueLabel:=nil;
+    oldendexceptlabel:=endexceptlabel;
+
+    { Win32 SEH unwinding does not preserve registers. Indicate that they are
+      going to be destroyed. }
+    cg.alloccpuregisters(current_asmdata.CurrAsmList,R_INTREGISTER,[RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]);
+    cg.dealloccpuregisters(current_asmdata.CurrAsmList,R_INTREGISTER,[RS_EAX,RS_EBX,RS_ECX,RS_EDX,RS_ESI,RS_EDI]);
+
+    { save the old labels for control flow statements }
+    oldCurrExitLabel:=current_procinfo.CurrExitLabel;
+    if assigned(current_procinfo.CurrBreakLabel) then
+      begin
+        oldContinueLabel:=current_procinfo.CurrContinueLabel;
+        oldBreakLabel:=current_procinfo.CurrBreakLabel;
+      end;
+
+    { get new labels for the control flow statements }
+    current_asmdata.getjumplabel(exittrylabel);
+    current_asmdata.getjumplabel(exitexceptlabel);
+    if assigned(current_procinfo.CurrBreakLabel) then
+      begin
+        current_asmdata.getjumplabel(breaktrylabel);
+        current_asmdata.getjumplabel(continuetrylabel);
+        current_asmdata.getjumplabel(breakexceptlabel);
+        current_asmdata.getjumplabel(continueexceptlabel);
+      end;
+
+    current_asmdata.getjumplabel(exceptlabel);
+    current_asmdata.getjumplabel(endexceptlabel);
+    current_asmdata.getjumplabel(lastonlabel);
+    filterlabel:=nil;
+
+    { start of scope }
+    if assigned(right) then
+      begin
+        current_asmdata.getdatalabel(filterlabel);
+        emit_scope_start(
+          current_asmdata.RefAsmSymbol('__FPC_on_handler'),
+          filterlabel);
+      end
+    else
+      emit_scope_start(
+        current_asmdata.RefAsmSymbol('__FPC_except_handler'),
+        exceptlabel);
+
+    { set control flow labels for the try block }
+    current_procinfo.CurrExitLabel:=exittrylabel;
+    if assigned(oldBreakLabel) then
+      begin
+        current_procinfo.CurrContinueLabel:=continuetrylabel;
+        current_procinfo.CurrBreakLabel:=breaktrylabel;
+      end;
+
+    secondpass(left);
+    tryflowcontrol:=flowcontrol;
+    if codegenerror then
+      goto errorexit;
+
+    emit_scope_end;
+    { jump over except handlers }
+    cg.a_jmp_always(current_asmdata.CurrAsmList,endexceptlabel);
+
+    if fc_exit in tryflowcontrol then
+      begin
+        cg.a_label(current_asmdata.CurrAsmList,exittrylabel);
+        emit_scope_end;
+        cg.a_jmp_always(current_asmdata.CurrAsmList,oldCurrExitLabel);
+      end;
+    if fc_break in tryflowcontrol then
+      begin
+        cg.a_label(current_asmdata.CurrAsmList,breaktrylabel);
+        emit_scope_end;
+        cg.a_jmp_always(current_asmdata.CurrAsmList,oldBreakLabel);
+      end;
+    if fc_continue in tryflowcontrol then
+      begin
+        cg.a_label(current_asmdata.CurrAsmList,continuetrylabel);
+        emit_scope_end;
+        cg.a_jmp_always(current_asmdata.CurrAsmList,oldContinueLabel);
+      end;
+
+    { target for catch-all handler }
+    cg.a_label(current_asmdata.CurrAsmList,exceptlabel);
+
+    { set control flow labels for the except block }
+    { and the on statements                        }
+    current_procinfo.CurrExitLabel:=exitexceptlabel;
+    if assigned(oldBreakLabel) then
+      begin
+        current_procinfo.CurrContinueLabel:=continueexceptlabel;
+        current_procinfo.CurrBreakLabel:=breakexceptlabel;
+      end;
+
+    flowcontrol:=[fc_inflowcontrol];
+    { on statements }
+    if assigned(right) then
+      begin
+        { emit filter table to a temporary asmlist }
+        hlist:=TAsmList.Create;
+        new_section(hlist,sec_rodata,filterlabel.name,4);
+        cg.a_label(hlist,filterlabel);
+        onnodecount:=tai_const.create_32bit(0);
+        hlist.concat(onnodecount);
+
+        hnode:=right;
+        while assigned(hnode) do
+          begin
+            if hnode.nodetype<>onn then
+              InternalError(2011103101);
+            { TODO: make it done without using global label }
+            current_asmdata.getglobaljumplabel(onlabel);
+            hlist.concat(tai_const.create_sym(current_asmdata.RefAsmSymbol(tonnode(hnode).excepttype.vmt_mangledname,AT_DATA)));
+            hlist.concat(tai_const.create_sym(onlabel));
+            cg.a_label(current_asmdata.CurrAsmList,onlabel);
+            secondpass(hnode);
+            inc(onnodecount.value);
+            hnode:=tonnode(hnode).left;
+          end;
+        { add 'else' node to the filter list, too }
+        if assigned(t1) then
+          begin
+            hlist.concat(tai_const.create_32bit(-1));
+            hlist.concat(tai_const.create_sym(lastonlabel));
+            inc(onnodecount.value);
+          end;
+        { now move filter table to permanent list all at once }
+        maybe_new_object_file(current_asmdata.asmlists[al_typedconsts]);
+        current_asmdata.asmlists[al_typedconsts].concatlist(hlist);
+        hlist.free;
+      end;
+
+    cg.a_label(current_asmdata.CurrAsmList,lastonlabel);
+    if assigned(t1) then
+      begin
+        { here we don't have to reset flowcontrol           }
+        { the default and on flowcontrols are handled equal }
+        secondpass(t1);
+        cg.g_call(current_asmdata.CurrAsmList,'FPC_DONEEXCEPTION');
+        if (flowcontrol*[fc_exit,fc_break,fc_continue]<>[]) then
+          cg.a_jmp_always(current_asmdata.CurrAsmList,endexceptlabel);
+      end;
+    exceptflowcontrol:=flowcontrol;
+
+    if fc_exit in exceptflowcontrol then
+      begin
+        { do some magic for exit in the try block }
+        cg.a_label(current_asmdata.CurrAsmList,exitexceptlabel);
+        cg.g_call(current_asmdata.CurrAsmList,'FPC_DONEEXCEPTION');
+        cg.a_jmp_always(current_asmdata.CurrAsmList,oldCurrExitLabel);
+      end;
+
+    if fc_break in exceptflowcontrol then
+      begin
+        cg.a_label(current_asmdata.CurrAsmList,breakexceptlabel);
+        cg.g_call(current_asmdata.CurrAsmList,'FPC_DONEEXCEPTION');
+        cg.a_jmp_always(current_asmdata.CurrAsmList,oldBreakLabel);
+      end;
+
+    if fc_continue in exceptflowcontrol then
+      begin
+        cg.a_label(current_asmdata.CurrAsmList,continueexceptlabel);
+        cg.g_call(current_asmdata.CurrAsmList,'FPC_DONEEXCEPTION');
+        cg.a_jmp_always(current_asmdata.CurrAsmList,oldContinueLabel);
+      end;
+
+    cg.a_label(current_asmdata.CurrAsmList,endexceptlabel);
+
+errorexit:
+    { restore all saved labels }
+    endexceptlabel:=oldendexceptlabel;
+
+    { restore the control flow labels }
+    current_procinfo.CurrExitLabel:=oldCurrExitLabel;
+    if assigned(oldBreakLabel) then
+      begin
+        current_procinfo.CurrContinueLabel:=oldContinueLabel;
+        current_procinfo.CurrBreakLabel:=oldBreakLabel;
+      end;
+
+    { return all used control flow statements }
+    flowcontrol:=oldflowcontrol+(exceptflowcontrol +
+      tryflowcontrol - [fc_inflowcontrol]);
+  end;
+
+initialization
+  craisenode:=ti386raisenode;
+  connode:=ti386onnode;
+  ctryexceptnode:=ti386tryexceptnode;
+  ctryfinallynode:=ti386tryfinallynode;
+end.

+ 5 - 3
compiler/i386/popt386.pas

@@ -320,7 +320,7 @@ begin
                          12: begin
                          12: begin
                             {imul 12, reg1, reg2 to
                             {imul 12, reg1, reg2 to
                                lea (,reg1,4), reg2
                                lea (,reg1,4), reg2
-                               lea (,reg1,8) reg2
+                               lea (reg2,reg1,8), reg2
                              imul 12, reg1 to
                              imul 12, reg1 to
                                lea (reg1,reg1,2), reg1
                                lea (reg1,reg1,2), reg1
                                lea (,reg1,4), reg1}
                                lea (,reg1,4), reg1}
@@ -853,6 +853,8 @@ begin
                         S_B: v:=$80;
                         S_B: v:=$80;
                         S_W: v:=$8000;
                         S_W: v:=$8000;
                         S_L: v:=aint($80000000);
                         S_L: v:=aint($80000000);
+                        else
+                          internalerror(2013112905);
                       end;
                       end;
                       if (taicpu(p).oper[0]^.typ=Top_const) and
                       if (taicpu(p).oper[0]^.typ=Top_const) and
                          (taicpu(p).oper[0]^.val=v) and
                          (taicpu(p).oper[0]^.val=v) and
@@ -1399,6 +1401,7 @@ begin
                                   taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
                                   taicpu(hp1).loadReg(0,taicpu(hp1).oper[1]^.reg);
                                   taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
                                   taicpu(hp1).loadRef(1,taicpu(p).oper[1]^.ref^);
                                   taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
                                   taicpu(p).loadReg(1,taicpu(hp1).oper[0]^.reg);
+                                  taicpu(hp1).fileinfo := taicpu(p).fileinfo;
                                 end
                                 end
                         end;
                         end;
                       if GetNextInstruction(p, hp1) and
                       if GetNextInstruction(p, hp1) and
@@ -1973,10 +1976,9 @@ procedure PeepHoleOptPass2(asml: TAsmList; BlockStart, BlockEnd: tai);
     end;
     end;
 
 
 var
 var
-  p,hp1,hp2: tai;
+  p,hp1,hp2,hp3: tai;
   l : longint;
   l : longint;
   condition : tasmcond;
   condition : tasmcond;
-  hp3: tai;
   UsedRegs, TmpUsedRegs: TRegSet;
   UsedRegs, TmpUsedRegs: TRegSet;
   carryadd_opcode: Tasmop;
   carryadd_opcode: Tasmop;
 
 

+ 1 - 0
compiler/i386/rropt386.pas

@@ -206,6 +206,7 @@ begin
   sequenceEnd := false;
   sequenceEnd := false;
   reg1Modified := false;
   reg1Modified := false;
   reg2Modified := false;
   reg2Modified := false;
+  switchLast := false;
   endP := start;
   endP := start;
   while tmpResult and not sequenceEnd do
   while tmpResult and not sequenceEnd do
     begin
     begin

+ 445 - 105
compiler/i8086/cgcpu.pas

@@ -49,8 +49,6 @@ unit cgcpu;
         procedure a_call_name_static_far(list : TAsmList;const s : string);
         procedure a_call_name_static_far(list : TAsmList;const s : string);
         procedure a_call_reg(list : TAsmList;reg : tregister);override;
         procedure a_call_reg(list : TAsmList;reg : tregister);override;
         procedure a_call_reg_far(list : TAsmList;reg : tregister);
         procedure a_call_reg_far(list : TAsmList;reg : tregister);
-        procedure a_call_ref(list : TAsmList;ref : treference);override;
-        procedure a_call_ref_far(list : TAsmList;ref : treference);
 
 
         procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister); override;
         procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister); override;
         procedure a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference); override;
         procedure a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference); override;
@@ -73,6 +71,18 @@ unit cgcpu;
         procedure a_load_ref_reg(list : TAsmList;fromsize,tosize: tcgsize;const ref : treference;reg : tregister);override;
         procedure a_load_ref_reg(list : TAsmList;fromsize,tosize: tcgsize;const ref : treference;reg : tregister);override;
         procedure a_load_reg_reg(list : TAsmList;fromsize,tosize: tcgsize;reg1,reg2 : tregister);override;
         procedure a_load_reg_reg(list : TAsmList;fromsize,tosize: tcgsize;reg1,reg2 : tregister);override;
 
 
+        {  comparison operations }
+        procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
+          l : tasmlabel);override;
+        procedure a_cmp_const_ref_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;const ref : treference;
+          l : tasmlabel);override;
+        procedure a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel); override;
+        procedure a_cmp_ref_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;const ref: treference; reg : tregister; l : tasmlabel); override;
+        procedure a_cmp_reg_ref_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg : tregister; const ref: treference; l : tasmlabel); override;
+
+        procedure gen_cmp32_jmp1(list: TAsmList; cmp_op: topcmp; l_skip, l_target: TAsmLabel);
+        procedure gen_cmp32_jmp2(list: TAsmList; cmp_op: topcmp; l_skip, l_target: TAsmLabel);
+
         procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: tresflags; reg: TRegister);override;
         procedure g_flags2reg(list: TAsmList; size: TCgSize; const f: tresflags; reg: TRegister);override;
         procedure g_flags2ref(list: TAsmList; size: TCgSize; const f: tresflags; const ref: TReference);override;
         procedure g_flags2ref(list: TAsmList; size: TCgSize; const f: tresflags; const ref: TReference);override;
 
 
@@ -89,6 +99,8 @@ unit cgcpu;
         procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);override;
         procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);override;
 
 
         procedure get_32bit_ops(op: TOpCG; out op1,op2: TAsmOp);
         procedure get_32bit_ops(op: TOpCG; out op1,op2: TAsmOp);
+
+        procedure add_move_instruction(instr:Taicpu);override;
      end;
      end;
 
 
       tcg64f8086 = class(tcg64f32)
       tcg64f8086 = class(tcg64f32)
@@ -123,12 +135,11 @@ unit cgcpu;
     procedure tcg8086.init_register_allocators;
     procedure tcg8086.init_register_allocators;
       begin
       begin
         inherited init_register_allocators;
         inherited init_register_allocators;
-        if not(target_info.system in [system_i386_darwin,system_i386_iphonesim]) and
-           (cs_create_pic in current_settings.moduleswitches) then
-          rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_AX,RS_DX,RS_CX,RS_SI,RS_DI],first_int_imreg,[RS_BP])
+        if cs_create_pic in current_settings.moduleswitches then
+          rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_AX,RS_DX,RS_CX,RS_SI,RS_DI],first_int_imreg,[RS_BP])
         else
         else
           if (cs_useebp in current_settings.optimizerswitches) and assigned(current_procinfo) and (current_procinfo.framepointer<>NR_BP) then
           if (cs_useebp in current_settings.optimizerswitches) and assigned(current_procinfo) and (current_procinfo.framepointer<>NR_BP) then
-            rg[R_INTREGISTER]:=trgcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_AX,RS_DX,RS_CX,RS_BX,RS_SI,RS_DI,RS_BP],first_int_imreg,[])
+            rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_AX,RS_DX,RS_CX,RS_BX,RS_SI,RS_DI,RS_BP],first_int_imreg,[])
           else
           else
             rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_AX,RS_DX,RS_CX,RS_BX,RS_SI,RS_DI],first_int_imreg,[RS_BP]);
             rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,[RS_AX,RS_DX,RS_CX,RS_BX,RS_SI,RS_DI],first_int_imreg,[RS_BP]);
         rg[R_MMXREGISTER]:=trgcpu.create(R_MMXREGISTER,R_SUBNONE,[RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7],first_mm_imreg,[]);
         rg[R_MMXREGISTER]:=trgcpu.create(R_MMXREGISTER,R_SUBNONE,[RS_XMM0,RS_XMM1,RS_XMM2,RS_XMM3,RS_XMM4,RS_XMM5,RS_XMM6,RS_XMM7],first_mm_imreg,[]);
@@ -240,27 +251,12 @@ unit cgcpu;
         a_load_reg_ref(list,OS_32,OS_32,reg,href);
         a_load_reg_ref(list,OS_32,OS_32,reg,href);
         cg.getcpuregister(list,NR_BX);
         cg.getcpuregister(list,NR_BX);
         cg.getcpuregister(list,NR_SI);
         cg.getcpuregister(list,NR_SI);
-        a_call_ref_far(list,href);
+        href.refaddr:=addr_far_ref;
+        list.concat(taicpu.op_ref(A_CALL,S_NO,href));
         tg.ungettemp(list,href);
         tg.ungettemp(list,href);
       end;
       end;
 
 
 
 
-    procedure tcg8086.a_call_ref(list: TAsmList; ref: treference);
-      begin
-        if current_settings.x86memorymodel in x86_far_code_models then
-          a_call_ref_far(list,ref)
-        else
-          a_call_ref_near(list,ref);
-      end;
-
-
-    procedure tcg8086.a_call_ref_far(list: TAsmList; ref: treference);
-      begin
-        ref.refaddr:=addr_far_ref;
-        list.concat(taicpu.op_ref(A_CALL,S_NO,ref));
-      end;
-
-
     procedure tcg8086.a_op_const_reg(list: TAsmList; Op: TOpCG; size: TCGSize;
     procedure tcg8086.a_op_const_reg(list: TAsmList; Op: TOpCG; size: TCGSize;
       a: tcgint; reg: TRegister);
       a: tcgint; reg: TRegister);
       var
       var
@@ -272,7 +268,7 @@ unit cgcpu;
         use_loop: Boolean;
         use_loop: Boolean;
         i: Integer;
         i: Integer;
       begin
       begin
-        optimize_op_const(op, a);
+        optimize_op_const(size, op, a);
         check_register_size(size,reg);
         check_register_size(size,reg);
 
 
         if size in [OS_64, OS_S64] then
         if size in [OS_64, OS_S64] then
@@ -296,7 +292,8 @@ unit cgcpu;
                   { Optimization when the low 16-bits of the constant are 0 }
                   { Optimization when the low 16-bits of the constant are 0 }
                   if aint(a and $FFFF) = 0 then
                   if aint(a and $FFFF) = 0 then
                     begin
                     begin
-                      list.concat(taicpu.op_const_reg(op1,S_W,aint(a shr 16),GetNextReg(reg)));
+                      { use a_op_const_reg to allow the use of inc/dec }
+                      a_op_const_reg(list,op,OS_16,aint(a shr 16),GetNextReg(reg));
                     end
                     end
                   else
                   else
                     begin
                     begin
@@ -306,27 +303,61 @@ unit cgcpu;
                 end;
                 end;
               OP_AND, OP_OR, OP_XOR:
               OP_AND, OP_OR, OP_XOR:
                 begin
                 begin
-                  if longword(a) = high(longword) then
+                  { low word operation }
+                  if aint(a and $FFFF) = aint(0) then
                     begin
                     begin
                       case op of
                       case op of
                         OP_AND:
                         OP_AND:
-                          exit;
+                          a_load_const_reg(list,OS_16,aint(0),reg);
+                        OP_OR,OP_XOR:
+                          {do nothing};
+                        else
+                          InternalError(2013100701);
+                      end;
+                    end
+                  else if aint(a and $FFFF) = aint($FFFF) then
+                    begin
+                      case op of
+                        OP_AND:
+                          {do nothing};
                         OP_OR:
                         OP_OR:
-                          a_load_const_reg(list,size,high(longword),reg);
+                          a_load_const_reg(list,OS_16,aint($FFFF),reg);
                         OP_XOR:
                         OP_XOR:
-                          begin
-                            list.concat(taicpu.op_reg(A_NOT,S_W,reg));
-                            list.concat(taicpu.op_reg(A_NOT,S_W,GetNextReg(reg)));
-                          end;
+                          list.concat(taicpu.op_reg(A_NOT,S_W,reg));
                         else
                         else
                           InternalError(2013100701);
                           InternalError(2013100701);
-                      end
+                      end;
                     end
                     end
                   else
                   else
-                  begin
                     a_op_const_reg(list,op,OS_16,aint(a and $FFFF),reg);
                     a_op_const_reg(list,op,OS_16,aint(a and $FFFF),reg);
+
+                  { high word operation }
+                  if aint(a shr 16) = aint(0) then
+                    begin
+                      case op of
+                        OP_AND:
+                          a_load_const_reg(list,OS_16,aint(0),GetNextReg(reg));
+                        OP_OR,OP_XOR:
+                          {do nothing};
+                        else
+                          InternalError(2013100701);
+                      end;
+                    end
+                  else if aint(a shr 16) = aint($FFFF) then
+                    begin
+                      case op of
+                        OP_AND:
+                          {do nothing};
+                        OP_OR:
+                          a_load_const_reg(list,OS_16,aint($FFFF),GetNextReg(reg));
+                        OP_XOR:
+                          list.concat(taicpu.op_reg(A_NOT,S_W,GetNextReg(reg)));
+                        else
+                          InternalError(2013100701);
+                      end;
+                    end
+                  else
                     a_op_const_reg(list,op,OS_16,aint(a shr 16),GetNextReg(reg));
                     a_op_const_reg(list,op,OS_16,aint(a shr 16),GetNextReg(reg));
-                  end;
                 end;
                 end;
               OP_SHR,OP_SHL,OP_SAR:
               OP_SHR,OP_SHL,OP_SAR:
                 begin
                 begin
@@ -448,13 +479,6 @@ unit cgcpu;
             { 8086 doesn't support 'imul reg,const', so we handle it here }
             { 8086 doesn't support 'imul reg,const', so we handle it here }
             if (current_settings.cputype<cpu_186) and (op in [OP_MUL,OP_IMUL]) then
             if (current_settings.cputype<cpu_186) and (op in [OP_MUL,OP_IMUL]) then
               begin
               begin
-                { TODO: also enable the SHL optimization below }
-    {            if not(cs_check_overflow in current_settings.localswitches) and
-                   ispowerof2(int64(a),power) then
-                  begin
-                    list.concat(taicpu.op_const_reg(A_SHL,TCgSize2OpSize[size],power,reg));
-                    exit;
-                  end;}
                 if op = OP_IMUL then
                 if op = OP_IMUL then
                   begin
                   begin
                     if size in [OS_16,OS_S16] then
                     if size in [OS_16,OS_S16] then
@@ -470,15 +494,17 @@ unit cgcpu;
                     a_load_const_reg(list,size,a,ax_subreg);
                     a_load_const_reg(list,size,a,ax_subreg);
                     if size in [OS_16,OS_S16] then
                     if size in [OS_16,OS_S16] then
                       getcpuregister(list,NR_DX);
                       getcpuregister(list,NR_DX);
-                    list.concat(taicpu.op_reg(A_IMUL,TCgSize2OpSize[size],reg));
+                    { prefer MUL over IMUL when overflow checking is off, }
+                    { because it's faster on the 8086 & 8088              }
+                    if not (cs_check_overflow in current_settings.localswitches) then
+                      list.concat(taicpu.op_reg(A_MUL,TCgSize2OpSize[size],reg))
+                    else
+                      list.concat(taicpu.op_reg(A_IMUL,TCgSize2OpSize[size],reg));
                     if size in [OS_16,OS_S16] then
                     if size in [OS_16,OS_S16] then
                       ungetcpuregister(list,NR_DX);
                       ungetcpuregister(list,NR_DX);
                     a_load_reg_reg(list,size,size,ax_subreg,reg);
                     a_load_reg_reg(list,size,size,ax_subreg,reg);
 
 
                     ungetcpuregister(list,NR_AX);
                     ungetcpuregister(list,NR_AX);
-
-                    { TODO: implement overflow checking? }
-
                     exit;
                     exit;
                   end
                   end
                 else
                 else
@@ -497,7 +523,7 @@ unit cgcpu;
         tmpref: treference;
         tmpref: treference;
         op1,op2: TAsmOp;
         op1,op2: TAsmOp;
       begin
       begin
-        optimize_op_const(op, a);
+        optimize_op_const(size, op, a);
         tmpref:=ref;
         tmpref:=ref;
         make_simple_ref(list,tmpref);
         make_simple_ref(list,tmpref);
 
 
@@ -522,7 +548,8 @@ unit cgcpu;
                   if aint(a and $FFFF) = 0 then
                   if aint(a and $FFFF) = 0 then
                     begin
                     begin
                       inc(tmpref.offset, 2);
                       inc(tmpref.offset, 2);
-                      list.concat(taicpu.op_const_ref(op1,S_W,aint(a shr 16),tmpref));
+                      { use a_op_const_ref to allow the use of inc/dec }
+                      a_op_const_ref(list,op,OS_16,aint(a shr 16),tmpref);
                     end
                     end
                   else
                   else
                     begin
                     begin
@@ -533,29 +560,62 @@ unit cgcpu;
                 end;
                 end;
               OP_AND, OP_OR, OP_XOR:
               OP_AND, OP_OR, OP_XOR:
                 begin
                 begin
-                  if longword(a) = high(longword) then
+                  { low word operation }
+                  if aint(a and $FFFF) = aint(0) then
+                    begin
+                      case op of
+                        OP_AND:
+                          a_load_const_ref(list,OS_16,aint(0),ref);
+                        OP_OR,OP_XOR:
+                          {do nothing};
+                        else
+                          InternalError(2013100701);
+                      end;
+                    end
+                  else if aint(a and $FFFF) = aint($FFFF) then
                     begin
                     begin
                       case op of
                       case op of
                         OP_AND:
                         OP_AND:
-                          exit;
+                          {do nothing};
                         OP_OR:
                         OP_OR:
-                          a_load_const_ref(list,size,high(longword),tmpref);
+                          a_load_const_ref(list,OS_16,aint($FFFF),tmpref);
                         OP_XOR:
                         OP_XOR:
-                          begin
-                            list.concat(taicpu.op_ref(A_NOT,S_W,tmpref));
-                            inc(tmpref.offset, 2);
-                            list.concat(taicpu.op_ref(A_NOT,S_W,tmpref));
-                          end;
+                          list.concat(taicpu.op_ref(A_NOT,S_W,tmpref));
                         else
                         else
                           InternalError(2013100701);
                           InternalError(2013100701);
-                      end
+                      end;
                     end
                     end
                   else
                   else
-                  begin
                     a_op_const_ref(list,op,OS_16,aint(a and $FFFF),tmpref);
                     a_op_const_ref(list,op,OS_16,aint(a and $FFFF),tmpref);
-                    inc(tmpref.offset, 2);
+
+                  { high word operation }
+                  inc(tmpref.offset, 2);
+                  if aint(a shr 16) = aint(0) then
+                    begin
+                      case op of
+                        OP_AND:
+                          a_load_const_ref(list,OS_16,aint(0),tmpref);
+                        OP_OR,OP_XOR:
+                          {do nothing};
+                        else
+                          InternalError(2013100701);
+                      end;
+                    end
+                  else if aint(a shr 16) = aint($FFFF) then
+                    begin
+                      case op of
+                        OP_AND:
+                          {do nothing};
+                        OP_OR:
+                          a_load_const_ref(list,OS_16,aint($FFFF),tmpref);
+                        OP_XOR:
+                          list.concat(taicpu.op_ref(A_NOT,S_W,tmpref));
+                        else
+                          InternalError(2013100701);
+                      end;
+                    end
+                  else
                     a_op_const_ref(list,op,OS_16,aint(a shr 16),tmpref);
                     a_op_const_ref(list,op,OS_16,aint(a shr 16),tmpref);
-                  end;
                 end;
                 end;
               else
               else
                 internalerror(2013050802);
                 internalerror(2013050802);
@@ -1050,7 +1110,6 @@ unit cgcpu;
 
 
     procedure tcg8086.a_load_reg_ref(list : TAsmList;fromsize,tosize: tcgsize; reg : tregister;const ref : treference);
     procedure tcg8086.a_load_reg_ref(list : TAsmList;fromsize,tosize: tcgsize; reg : tregister;const ref : treference);
       var
       var
-        tmpsize : tcgsize;
         tmpreg  : tregister;
         tmpreg  : tregister;
         tmpref  : treference;
         tmpref  : treference;
       begin
       begin
@@ -1066,15 +1125,12 @@ unit cgcpu;
               internalerror(2013030310);
               internalerror(2013030310);
           OS_16,OS_S16:
           OS_16,OS_S16:
             case fromsize of
             case fromsize of
-              OS_8:
+              OS_8,OS_S8:
                 begin
                 begin
-                  reg := makeregsize(list, reg, OS_16);
-                  setsubreg(reg, R_SUBH);
-                  list.concat(taicpu.op_const_reg(A_MOV, S_B, 0, reg));
-                  setsubreg(reg, R_SUBW);
-                  list.concat(taicpu.op_reg_ref(A_MOV, S_W, reg, tmpref));
+                  tmpreg:=getintregister(list,tosize);
+                  a_load_reg_reg(list,fromsize,tosize,reg,tmpreg);
+                  a_load_reg_ref(list,tosize,tosize,tmpreg,tmpref);
                 end;
                 end;
-              OS_S8: internalerror(2013052503);  { TODO }
               OS_16,OS_S16:
               OS_16,OS_S16:
                 begin
                 begin
                   list.concat(taicpu.op_reg_ref(A_MOV, S_W, reg, tmpref));
                   list.concat(taicpu.op_reg_ref(A_MOV, S_W, reg, tmpref));
@@ -1084,26 +1140,18 @@ unit cgcpu;
             end;
             end;
           OS_32,OS_S32:
           OS_32,OS_S32:
             case fromsize of
             case fromsize of
-              OS_8:
+              OS_8,OS_S8,OS_S16:
                 begin
                 begin
-                  reg := makeregsize(list, reg, OS_16);
-                  setsubreg(reg, R_SUBH);
-                  list.concat(taicpu.op_const_reg(A_MOV, S_B, 0, reg));
-                  setsubreg(reg, R_SUBW);
-                  list.concat(taicpu.op_reg_ref(A_MOV, S_W, reg, tmpref));
-                  inc(tmpref.offset, 2);
-                  list.concat(taicpu.op_const_ref(A_MOV, S_W, 0, tmpref));
+                  tmpreg:=getintregister(list,tosize);
+                  a_load_reg_reg(list,fromsize,tosize,reg,tmpreg);
+                  a_load_reg_ref(list,tosize,tosize,tmpreg,tmpref);
                 end;
                 end;
-              OS_S8:
-                internalerror(2013052501);  { TODO }
               OS_16:
               OS_16:
                 begin
                 begin
                   list.concat(taicpu.op_reg_ref(A_MOV, S_W, reg, tmpref));
                   list.concat(taicpu.op_reg_ref(A_MOV, S_W, reg, tmpref));
                   inc(tmpref.offset, 2);
                   inc(tmpref.offset, 2);
                   list.concat(taicpu.op_const_ref(A_MOV, S_W, 0, tmpref));
                   list.concat(taicpu.op_const_ref(A_MOV, S_W, 0, tmpref));
                 end;
                 end;
-              OS_S16:
-                internalerror(2013052502);  { TODO }
               OS_32,OS_S32:
               OS_32,OS_S32:
                 begin
                 begin
                   list.concat(taicpu.op_reg_ref(A_MOV, S_W, reg, tmpref));
                   list.concat(taicpu.op_reg_ref(A_MOV, S_W, reg, tmpref));
@@ -1152,9 +1200,11 @@ unit cgcpu;
             case fromsize of
             case fromsize of
               OS_8:
               OS_8:
                 begin
                 begin
-                  list.concat(taicpu.op_const_reg(A_MOV, S_W, 0, reg));
                   reg := makeregsize(list, reg, OS_8);
                   reg := makeregsize(list, reg, OS_8);
                   list.concat(taicpu.op_ref_reg(A_MOV, S_B, tmpref, reg));
                   list.concat(taicpu.op_ref_reg(A_MOV, S_B, tmpref, reg));
+                  setsubreg(reg, R_SUBH);
+                  list.concat(taicpu.op_const_reg(A_MOV, S_B, 0, reg));
+                  makeregsize(list, reg, OS_16);
                 end;
                 end;
               OS_S8:
               OS_S8:
                 begin
                 begin
@@ -1174,9 +1224,11 @@ unit cgcpu;
               OS_8:
               OS_8:
                 begin
                 begin
                   list.concat(taicpu.op_const_reg(A_MOV,S_W,0,GetNextReg(reg)));
                   list.concat(taicpu.op_const_reg(A_MOV,S_W,0,GetNextReg(reg)));
-                  list.concat(taicpu.op_const_reg(A_MOV, S_W, 0, reg));
                   reg := makeregsize(list, reg, OS_8);
                   reg := makeregsize(list, reg, OS_8);
                   list.concat(taicpu.op_ref_reg(A_MOV, S_B, tmpref, reg));
                   list.concat(taicpu.op_ref_reg(A_MOV, S_B, tmpref, reg));
+                  setsubreg(reg, R_SUBH);
+                  list.concat(taicpu.op_const_reg(A_MOV, S_B, 0, reg));
+                  makeregsize(list, reg, OS_16);
                 end;
                 end;
               OS_S8:
               OS_S8:
                 begin
                 begin
@@ -1244,12 +1296,15 @@ unit cgcpu;
             fromsize:=tosize;
             fromsize:=tosize;
           end;
           end;
 
 
-        if (reg1<>reg2) then
+        if (reg1<>reg2) or (fromsize<>tosize) then
           begin
           begin
             case tosize of
             case tosize of
               OS_8,OS_S8:
               OS_8,OS_S8:
                 if fromsize in [OS_8,OS_S8] then
                 if fromsize in [OS_8,OS_S8] then
-                  add_mov(taicpu.op_reg_reg(A_MOV, S_B, reg1, reg2))
+                  begin
+                    if reg1<>reg2 then
+                      add_mov(taicpu.op_reg_reg(A_MOV, S_B, reg1, reg2));
+                  end
                 else
                 else
                   internalerror(2013030210);
                   internalerror(2013030210);
               OS_16,OS_S16:
               OS_16,OS_S16:
@@ -1257,9 +1312,11 @@ unit cgcpu;
                   OS_8:
                   OS_8:
                     begin
                     begin
                       reg2 := makeregsize(list, reg2, OS_8);
                       reg2 := makeregsize(list, reg2, OS_8);
-                      add_mov(taicpu.op_reg_reg(A_MOV, S_B, reg1, reg2));
+                      if reg1<>reg2 then
+                        add_mov(taicpu.op_reg_reg(A_MOV, S_B, reg1, reg2));
                       setsubreg(reg2,R_SUBH);
                       setsubreg(reg2,R_SUBH);
                       list.concat(taicpu.op_const_reg(A_MOV, S_B, 0, reg2));
                       list.concat(taicpu.op_const_reg(A_MOV, S_B, 0, reg2));
+                      makeregsize(list, reg2, OS_16);
                     end;
                     end;
                   OS_S8:
                   OS_S8:
                     begin
                     begin
@@ -1270,7 +1327,10 @@ unit cgcpu;
                       ungetcpuregister(list, NR_AX);
                       ungetcpuregister(list, NR_AX);
                     end;
                     end;
                   OS_16,OS_S16:
                   OS_16,OS_S16:
-                    add_mov(taicpu.op_reg_reg(A_MOV, S_W, reg1, reg2));
+                    begin
+                      if reg1<>reg2 then
+                        add_mov(taicpu.op_reg_reg(A_MOV, S_W, reg1, reg2));
+                    end
                   else
                   else
                     internalerror(2013030212);
                     internalerror(2013030212);
                 end;
                 end;
@@ -1280,9 +1340,11 @@ unit cgcpu;
                     begin
                     begin
                       list.concat(taicpu.op_const_reg(A_MOV, S_W, 0, GetNextReg(reg2)));
                       list.concat(taicpu.op_const_reg(A_MOV, S_W, 0, GetNextReg(reg2)));
                       reg2 := makeregsize(list, reg2, OS_8);
                       reg2 := makeregsize(list, reg2, OS_8);
-                      add_mov(taicpu.op_reg_reg(A_MOV, S_B, reg1, reg2));
+                      if reg1<>reg2 then
+                        add_mov(taicpu.op_reg_reg(A_MOV, S_B, reg1, reg2));
                       setsubreg(reg2,R_SUBH);
                       setsubreg(reg2,R_SUBH);
                       list.concat(taicpu.op_const_reg(A_MOV, S_B, 0, reg2));
                       list.concat(taicpu.op_const_reg(A_MOV, S_B, 0, reg2));
+                      makeregsize(list, reg2, OS_16);
                     end;
                     end;
                   OS_S8:
                   OS_S8:
                     begin
                     begin
@@ -1298,7 +1360,8 @@ unit cgcpu;
                     end;
                     end;
                   OS_16:
                   OS_16:
                     begin
                     begin
-                      add_mov(taicpu.op_reg_reg(A_MOV, S_W, reg1, reg2));
+                      if reg1<>reg2 then
+                        add_mov(taicpu.op_reg_reg(A_MOV, S_W, reg1, reg2));
                       list.concat(taicpu.op_const_reg(A_MOV,S_W,0,GetNextReg(reg2)));
                       list.concat(taicpu.op_const_reg(A_MOV,S_W,0,GetNextReg(reg2)));
                     end;
                     end;
                   OS_S16:
                   OS_S16:
@@ -1307,15 +1370,19 @@ unit cgcpu;
                       add_mov(taicpu.op_reg_reg(A_MOV, S_W, reg1, NR_AX));
                       add_mov(taicpu.op_reg_reg(A_MOV, S_W, reg1, NR_AX));
                       getcpuregister(list, NR_DX);
                       getcpuregister(list, NR_DX);
                       list.concat(taicpu.op_none(A_CWD));
                       list.concat(taicpu.op_none(A_CWD));
-                      add_mov(taicpu.op_reg_reg(A_MOV, S_W, NR_AX, reg2));
+                      if reg1<>reg2 then
+                        add_mov(taicpu.op_reg_reg(A_MOV, S_W, NR_AX, reg2));
                       ungetcpuregister(list, NR_AX);
                       ungetcpuregister(list, NR_AX);
                       add_mov(taicpu.op_reg_reg(A_MOV, S_W, NR_DX, GetNextReg(reg2)));
                       add_mov(taicpu.op_reg_reg(A_MOV, S_W, NR_DX, GetNextReg(reg2)));
                       ungetcpuregister(list, NR_DX);
                       ungetcpuregister(list, NR_DX);
                     end;
                     end;
                   OS_32,OS_S32:
                   OS_32,OS_S32:
                     begin
                     begin
-                      add_mov(taicpu.op_reg_reg(A_MOV, S_W, reg1, reg2));
-                      add_mov(taicpu.op_reg_reg(A_MOV, S_W, GetNextReg(reg1), GetNextReg(reg2)));
+                      if reg1<>reg2 then
+                        begin
+                          add_mov(taicpu.op_reg_reg(A_MOV, S_W, reg1, reg2));
+                          add_mov(taicpu.op_reg_reg(A_MOV, S_W, GetNextReg(reg1), GetNextReg(reg2)));
+                        end;
                     end;
                     end;
                   else
                   else
                     internalerror(2013030213);
                     internalerror(2013030213);
@@ -1327,19 +1394,208 @@ unit cgcpu;
       end;
       end;
 
 
 
 
+    procedure tcg8086.a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel);
+      var
+        hl_skip: TAsmLabel;
+      begin
+        if size in [OS_32, OS_S32] then
+          begin
+            if (longint(a shr 16) = 0) then
+              list.concat(taicpu.op_reg_reg(A_TEST,S_W,GetNextReg(reg),GetNextReg(reg)))
+            else
+              list.concat(taicpu.op_const_reg(A_CMP,S_W,longint(a shr 16),GetNextReg(reg)));
+            current_asmdata.getjumplabel(hl_skip);
+            gen_cmp32_jmp1(list, cmp_op, hl_skip, l);
+
+            if (longint(a and $ffff) = 0) then
+              list.concat(taicpu.op_reg_reg(A_TEST,S_W,reg,reg))
+            else
+              list.concat(taicpu.op_const_reg(A_CMP,S_W,longint(a and $ffff),reg));
+            gen_cmp32_jmp2(list, cmp_op, hl_skip, l);
+            a_label(list,hl_skip);
+          end
+        else
+          inherited a_cmp_const_reg_label(list, size, cmp_op, a, reg, l);
+      end;
+
+
+    procedure tcg8086.a_cmp_const_ref_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; const ref: treference; l: tasmlabel);
+      var
+        tmpref: treference;
+        hl_skip: TAsmLabel;
+      begin
+        if size in [OS_32, OS_S32] then
+          begin
+            tmpref:=ref;
+            make_simple_ref(list,tmpref);
+            inc(tmpref.offset,2);
+            list.concat(taicpu.op_const_ref(A_CMP,S_W,longint(a shr 16),tmpref));
+            current_asmdata.getjumplabel(hl_skip);
+            gen_cmp32_jmp1(list, cmp_op, hl_skip, l);
+            dec(tmpref.offset,2);
+            list.concat(taicpu.op_const_ref(A_CMP,S_W,longint(a and $ffff),tmpref));
+            gen_cmp32_jmp2(list, cmp_op, hl_skip, l);
+            a_label(list,hl_skip);
+          end
+        else
+          inherited a_cmp_const_ref_label(list, size, cmp_op, a, ref, l);
+      end;
+
+
+    procedure tcg8086.a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel);
+      var
+        hl_skip: TAsmLabel;
+      begin
+        if size in [OS_32, OS_S32] then
+          begin
+            check_register_size(size,reg1);
+            check_register_size(size,reg2);
+            list.concat(taicpu.op_reg_reg(A_CMP,S_W,GetNextReg(reg1),GetNextReg(reg2)));
+            current_asmdata.getjumplabel(hl_skip);
+            gen_cmp32_jmp1(list, cmp_op, hl_skip, l);
+            list.concat(taicpu.op_reg_reg(A_CMP,S_W,reg1,reg2));
+            gen_cmp32_jmp2(list, cmp_op, hl_skip, l);
+            a_label(list,hl_skip);
+          end
+        else
+          inherited a_cmp_reg_reg_label(list, size, cmp_op, reg1, reg2, l);
+      end;
+
+
+    procedure tcg8086.a_cmp_ref_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; const ref: treference; reg: tregister; l: tasmlabel);
+      var
+        tmpref: treference;
+        hl_skip: TAsmLabel;
+      begin
+        if size in [OS_32, OS_S32] then
+          begin
+            tmpref:=ref;
+            make_simple_ref(list,tmpref);
+            check_register_size(size,reg);
+            inc(tmpref.offset,2);
+            list.concat(taicpu.op_ref_reg(A_CMP,S_W,tmpref,GetNextReg(reg)));
+            current_asmdata.getjumplabel(hl_skip);
+            gen_cmp32_jmp1(list, cmp_op, hl_skip, l);
+            dec(tmpref.offset,2);
+            list.concat(taicpu.op_ref_reg(A_CMP,S_W,tmpref,reg));
+            gen_cmp32_jmp2(list, cmp_op, hl_skip, l);
+            a_label(list,hl_skip);
+          end
+        else
+          inherited a_cmp_ref_reg_label(list, size, cmp_op, ref, reg, l);
+      end;
+
+
+    procedure tcg8086.a_cmp_reg_ref_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg: tregister; const ref: treference; l: tasmlabel);
+      var
+        tmpref: treference;
+        hl_skip: TAsmLabel;
+      begin
+        if size in [OS_32, OS_S32] then
+          begin
+            tmpref:=ref;
+            make_simple_ref(list,tmpref);
+            check_register_size(size,reg);
+            inc(tmpref.offset,2);
+            list.concat(taicpu.op_reg_ref(A_CMP,S_W,GetNextReg(reg),tmpref));
+            current_asmdata.getjumplabel(hl_skip);
+            gen_cmp32_jmp1(list, cmp_op, hl_skip, l);
+            dec(tmpref.offset,2);
+            list.concat(taicpu.op_reg_ref(A_CMP,S_W,reg,tmpref));
+            gen_cmp32_jmp2(list, cmp_op, hl_skip, l);
+            a_label(list,hl_skip);
+          end
+        else
+          inherited a_cmp_reg_ref_label(list, size, cmp_op, reg, ref, l);
+      end;
+
+
+    procedure tcg8086.gen_cmp32_jmp1(list: TAsmList; cmp_op: topcmp; l_skip, l_target: TAsmLabel);
+      begin
+        case cmp_op of
+          OC_EQ:
+            a_jmp_cond(list, OC_NE, l_skip);
+          OC_NE:
+            a_jmp_cond(list, OC_NE, l_target);
+          OC_GT,OC_GTE:
+            begin
+              a_jmp_cond(list, OC_GT, l_target);
+              a_jmp_cond(list, OC_LT, l_skip);
+            end;
+          OC_LT,OC_LTE:
+            begin
+              a_jmp_cond(list, OC_LT, l_target);
+              a_jmp_cond(list, OC_GT, l_skip);
+            end;
+          OC_B,OC_BE:
+            begin
+              a_jmp_cond(list, OC_B, l_target);
+              a_jmp_cond(list, OC_A, l_skip);
+            end;
+          OC_A,OC_AE:
+            begin
+              a_jmp_cond(list, OC_A, l_target);
+              a_jmp_cond(list, OC_B, l_skip);
+            end;
+          else
+            internalerror(2014010305);
+        end;
+      end;
+
+    procedure tcg8086.gen_cmp32_jmp2(list: TAsmList; cmp_op: topcmp; l_skip, l_target: TAsmLabel);
+      begin
+        case cmp_op of
+          OC_EQ:
+            a_jmp_cond(list, OC_EQ, l_target);
+          OC_GT:
+            a_jmp_cond(list, OC_A, l_target);
+          OC_LT:
+            a_jmp_cond(list, OC_B, l_target);
+          OC_GTE:
+            a_jmp_cond(list, OC_AE, l_target);
+          OC_LTE:
+            a_jmp_cond(list, OC_BE, l_target);
+          OC_NE:
+            a_jmp_cond(list, OC_NE, l_target);
+          OC_BE:
+            a_jmp_cond(list, OC_BE, l_target);
+          OC_B:
+            a_jmp_cond(list, OC_B, l_target);
+          OC_AE:
+            a_jmp_cond(list, OC_AE, l_target);
+          OC_A:
+            a_jmp_cond(list, OC_A, l_target);
+          else
+            internalerror(2014010306);
+        end;
+      end;
+
+
     procedure tcg8086.g_flags2reg(list: TAsmList; size: TCgSize; const f: tresflags; reg: TRegister);
     procedure tcg8086.g_flags2reg(list: TAsmList; size: TCgSize; const f: tresflags; reg: TRegister);
       var
       var
         ai : taicpu;
         ai : taicpu;
-        hreg, hreg16 : tregister;
+        hreg16 : tregister;
         hl_skip: TAsmLabel;
         hl_skip: TAsmLabel;
         invf: TResFlags;
         invf: TResFlags;
+        tmpsize: TCgSize;
       begin
       begin
-        hreg:=makeregsize(list,reg,OS_8);
-
         invf := f;
         invf := f;
         inverse_flags(invf);
         inverse_flags(invf);
 
 
-        list.concat(Taicpu.op_const_reg(A_MOV, S_B, 0, hreg));
+        case size of
+          OS_8,OS_S8:
+            begin
+              tmpsize:=OS_8;
+              list.concat(Taicpu.op_const_reg(A_MOV, S_B, 0, reg));
+            end;
+          OS_16,OS_S16,OS_32,OS_S32:
+            begin
+              tmpsize:=OS_16;
+              list.concat(Taicpu.op_const_reg(A_MOV, S_W, 0, reg));
+            end;
+          else
+            internalerror(2013123101);
+        end;
 
 
         current_asmdata.getjumplabel(hl_skip);
         current_asmdata.getjumplabel(hl_skip);
         ai:=Taicpu.Op_Sym(A_Jcc,S_NO,hl_skip);
         ai:=Taicpu.Op_Sym(A_Jcc,S_NO,hl_skip);
@@ -1348,23 +1604,39 @@ unit cgcpu;
         list.concat(ai);
         list.concat(ai);
 
 
         { 16-bit INC is shorter than 8-bit }
         { 16-bit INC is shorter than 8-bit }
-        hreg16:=makeregsize(list,hreg,OS_16);
+        hreg16:=makeregsize(list,reg,OS_16);
         list.concat(Taicpu.op_reg(A_INC, S_W, hreg16));
         list.concat(Taicpu.op_reg(A_INC, S_W, hreg16));
+        makeregsize(list,hreg16,tmpsize);
 
 
         a_label(list,hl_skip);
         a_label(list,hl_skip);
 
 
-        if reg<>hreg then
-          a_load_reg_reg(list,OS_8,size,hreg,reg);
+        a_load_reg_reg(list,tmpsize,size,reg,reg);
       end;
       end;
 
 
 
 
     procedure tcg8086.g_flags2ref(list: TAsmList; size: TCgSize; const f: tresflags; const ref: TReference);
     procedure tcg8086.g_flags2ref(list: TAsmList; size: TCgSize; const f: tresflags; const ref: TReference);
       var
       var
         tmpreg : tregister;
         tmpreg : tregister;
+        tmpregsize: TCgSize;
+        tmpref: treference;
       begin
       begin
-        tmpreg:=getintregister(list,size);
-        g_flags2reg(list,size,f,tmpreg);
-        a_load_reg_ref(list,size,size,tmpreg,ref);
+        if size in [OS_8,OS_S8,OS_16,OS_S16] then
+          tmpregsize:=size
+        else
+          tmpregsize:=OS_16;
+        tmpreg:=getintregister(list,tmpregsize);
+        g_flags2reg(list,tmpregsize,f,tmpreg);
+
+        tmpref:=ref;
+        make_simple_ref(list,tmpref);
+        if size in [OS_64,OS_S64] then
+          begin
+            a_load_reg_ref(list,tmpregsize,OS_32,tmpreg,tmpref);
+            inc(tmpref.offset,4);
+            a_load_const_ref(list,OS_32,0,tmpref);
+          end
+        else
+          a_load_reg_ref(list,tmpregsize,size,tmpreg,tmpref);
       end;
       end;
 
 
 
 
@@ -1630,6 +1902,72 @@ unit cgcpu;
       end;
       end;
 
 
 
 
+    procedure tcg8086.add_move_instruction(instr: Taicpu);
+      begin
+        { HACK: when regvars are on, don't notify the register allocator of any
+          direct moves to BX, so it doesn't try to coalesce them. Currently,
+          direct moves to BX are only used when returning an int64 value in
+          AX:BX:CX:DX. This hack fixes a common issue with functions, returning
+          int64, for example:
+
+        function RandomFrom(const AValues: array of Int64): Int64;
+          begin
+            result:=AValues[random(High(AValues)+1)];
+          end;
+
+    	push	bp
+    	mov	bp,sp
+; Var AValues located in register ireg20w
+; Var $highAVALUES located in register ireg21w
+; Var $result located in register ireg33w:ireg32w:ireg31w:ireg30w
+    	mov	ireg20w,word [bp+6]
+    	mov	ireg21w,word [bp+4]
+; [3] result:=AValues[random(High(AValues)+1)];
+    	mov	ireg22w,ireg21w
+    	inc	ireg22w
+    	mov	ax,ireg22w
+    	cwd
+    	mov	ireg23w,ax
+    	mov	ireg24w,dx
+    	push	ireg24w
+    	push	ireg23w
+    	call	SYSTEM_$$_RANDOM$LONGINT$$LONGINT
+    	mov	ireg25w,ax
+    	mov	ireg26w,dx
+    	mov	ireg27w,ireg25w
+    	mov	ireg28w,ireg27w
+    	mov	ireg29w,ireg28w
+    	mov	cl,3
+    	shl	ireg29w,cl
+; Var $result located in register ireg32w:ireg30w
+    	mov	ireg30w,word [ireg20w+ireg29w]
+    	mov	ireg31w,word [ireg20w+ireg29w+2]
+    	mov	ireg32w,word [ireg20w+ireg29w+4]  ; problematic section start
+    	mov	ireg33w,word [ireg20w+ireg29w+6]
+; [4] end;
+    	mov	bx,ireg32w  ; problematic section end
+    	mov	ax,ireg33w
+    	mov	dx,ireg30w
+    	mov	cx,ireg31w
+    	mov	sp,bp
+    	pop	bp
+    	ret	4
+
+        the problem arises, because the register allocator tries to coalesce
+          mov bx,ireg32w
+        however, in the references [ireg20w+ireg29w+const], due to the
+        constraints of i8086, ireg20w can only be BX (or BP, which isn't available
+        to the register allocator, because it's used as a base pointer) }
+
+        if (cs_opt_regvar in current_settings.optimizerswitches) and
+           (instr.opcode=A_MOV) and (instr.ops=2) and
+           (instr.oper[1]^.typ=top_reg) and (getsupreg(instr.oper[1]^.reg)=RS_BX) then
+          exit
+        else
+          inherited add_move_instruction(instr);
+      end;
+
+
     procedure tcg8086.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
     procedure tcg8086.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
       var
       var
         hsym : tsym;
         hsym : tsym;
@@ -1975,12 +2313,13 @@ unit cgcpu;
             end;
             end;
           OP_ADD, OP_SUB:
           OP_ADD, OP_SUB:
             begin
             begin
-              // can't use a_op_const_ref because this may use dec/inc
               get_64bit_ops(op,op1,op2);
               get_64bit_ops(op,op1,op2);
               if (value and $ffffffffffff) = 0 then
               if (value and $ffffffffffff) = 0 then
                 begin
                 begin
-                  list.concat(taicpu.op_const_reg(op1,S_W,aint((value shr 48) and $ffff),GetNextReg(reg.reghi)));
+                  { use a_op_const_reg to allow the use of inc/dec }
+                  cg.a_op_const_reg(list,op,OS_16,aint((value shr 48) and $ffff),GetNextReg(reg.reghi));
                 end
                 end
+              // can't use a_op_const_ref because this may use dec/inc
               else if (value and $ffffffff) = 0 then
               else if (value and $ffffffff) = 0 then
                 begin
                 begin
                   list.concat(taicpu.op_const_reg(op1,S_W,aint((value shr 32) and $ffff),reg.reghi));
                   list.concat(taicpu.op_const_reg(op1,S_W,aint((value shr 32) and $ffff),reg.reghi));
@@ -2023,12 +2362,13 @@ unit cgcpu;
           OP_ADD, OP_SUB:
           OP_ADD, OP_SUB:
             begin
             begin
               get_64bit_ops(op,op1,op2);
               get_64bit_ops(op,op1,op2);
-              // can't use a_op_const_ref because this may use dec/inc
               if (value and $ffffffffffff) = 0 then
               if (value and $ffffffffffff) = 0 then
                 begin
                 begin
                   inc(tempref.offset,6);
                   inc(tempref.offset,6);
-                  list.concat(taicpu.op_const_ref(op1,S_W,aint((value shr 48) and $ffff),tempref));
+                  { use a_op_const_ref to allow the use of inc/dec }
+                  cg.a_op_const_ref(list,op,OS_16,aint((value shr 48) and $ffff),tempref);
                 end
                 end
+              // can't use a_op_const_ref because this may use dec/inc
               else if (value and $ffffffff) = 0 then
               else if (value and $ffffffff) = 0 then
                 begin
                 begin
                   inc(tempref.offset,4);
                   inc(tempref.offset,4);

+ 1 - 1
compiler/i8086/cpuinfo.pas

@@ -125,7 +125,7 @@ Const
 
 
    level1optimizerswitches = genericlevel1optimizerswitches + [cs_opt_peephole];
    level1optimizerswitches = genericlevel1optimizerswitches + [cs_opt_peephole];
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches +
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches +
-     [{cs_opt_regvar,}cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse];
+     [{cs_opt_regvar,}cs_opt_stackframe,cs_opt_tailrecursion{,cs_opt_nodecse}];
    level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [{,cs_opt_loopunroll}];
    level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [{,cs_opt_loopunroll}];
    level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [cs_useebp];
    level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [cs_useebp];
 
 

+ 1 - 0
compiler/i8086/i8086att.inc

@@ -946,6 +946,7 @@
 'vzeroupper',
 'vzeroupper',
 'andn',
 'andn',
 'bextr',
 'bextr',
+'tzcnt',
 'rorx',
 'rorx',
 'sarx',
 'sarx',
 'shlx',
 'shlx',

+ 1 - 0
compiler/i8086/i8086atts.inc

@@ -963,5 +963,6 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
 attsufNONE
 attsufNONE
 );
 );

+ 1 - 0
compiler/i8086/i8086int.inc

@@ -946,6 +946,7 @@
 'vzeroupper',
 'vzeroupper',
 'andn',
 'andn',
 'bextr',
 'bextr',
+'tzcnt',
 'rorx',
 'rorx',
 'sarx',
 'sarx',
 'shlx',
 'shlx',

+ 1 - 1
compiler/i8086/i8086nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
 { don't edit, this file is generated from x86ins.dat }
-1685;
+1686;

+ 1 - 0
compiler/i8086/i8086op.inc

@@ -946,6 +946,7 @@ A_VZEROALL,
 A_VZEROUPPER,
 A_VZEROUPPER,
 A_ANDN,
 A_ANDN,
 A_BEXTR,
 A_BEXTR,
+A_TZCNT,
 A_RORX,
 A_RORX,
 A_SARX,
 A_SARX,
 A_SHLX,
 A_SHLX,

+ 79 - 78
compiler/i8086/i8086prop.inc

@@ -224,8 +224,8 @@
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -389,7 +389,7 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_ROp1, Ch_WOp2, Ch_RFLAGS)),
+(Ch: (Ch_ROp1, Ch_RWOp2, Ch_RFLAGS)),
 (Ch: (Ch_RFLAGS, Ch_None, Ch_None)),
 (Ch: (Ch_RFLAGS, Ch_None, Ch_None)),
 (Ch: (Ch_RFLAGS, Ch_WOp1, Ch_None)),
 (Ch: (Ch_RFLAGS, Ch_WOp1, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
@@ -489,8 +489,8 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Wop2, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Wop2, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -570,7 +570,7 @@
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Wop2, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -730,6 +730,10 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -753,6 +757,12 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -761,28 +771,20 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
@@ -796,7 +798,15 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -859,6 +869,14 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -867,6 +885,24 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -876,15 +912,26 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -893,63 +940,17 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_Wop1, Ch_Rop2, Ch_Rop3)),
-(Ch: (Ch_Wop1, Ch_Rop2, Ch_Rop3)),
-(Ch: (Ch_Wop1, Ch_Rop2, Ch_None)),
-(Ch: (Ch_Wop1, Ch_Rop2, Ch_Rop3)),
-(Ch: (Ch_Wop1, Ch_Rop2, Ch_Rop3)),
-(Ch: (Ch_Wop1, Ch_Rop2, Ch_Rop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Wop2, Ch_WFlags, Ch_Rop1)),
+(Ch: (Ch_Rop1, Ch_Wop2, Ch_None)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
+(Ch: (Ch_Rop1, Ch_Rop2, Ch_Wop3)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),

+ 7 - 0
compiler/i8086/i8086tab.inc

@@ -11571,6 +11571,13 @@
     code    : #242#249#1#247#62#72;
     code    : #242#249#1#247#62#72;
     flags   : if_bmi1
     flags   : if_bmi1
   ),
   ),
+  (
+    opcode  : A_TZCNT;
+    ops     : 2;
+    optypes : (ot_reg16 or ot_bits32 or ot_bits64,ot_rm_gpr,ot_none,ot_none);
+    code    : #208#219#2#15#188#72;
+    flags   : if_bmi1 or if_sm
+  ),
   (
   (
     opcode  : A_RORX;
     opcode  : A_RORX;
     ops     : 3;
     ops     : 3;

+ 10 - 11
compiler/i8086/n8086add.pas

@@ -596,15 +596,6 @@ interface
 
 
     procedure ti8086addnode.second_mul(unsigned: boolean);
     procedure ti8086addnode.second_mul(unsigned: boolean);
 
 
-      procedure add_mov(instr: Taicpu);
-        begin
-          { Notify the register allocator that we have written a move instruction so
-            it can try to eliminate it. }
-          if (instr.oper[0]^.reg<>current_procinfo.framepointer) and (instr.oper[0]^.reg<>NR_STACK_POINTER_REG) then
-            tcgx86(cg).add_move_instruction(instr);
-          current_asmdata.CurrAsmList.concat(instr);
-        end;
-
     var reg:Tregister;
     var reg:Tregister;
         ref:Treference;
         ref:Treference;
         use_ref:boolean;
         use_ref:boolean;
@@ -616,6 +607,13 @@ interface
     begin
     begin
       pass_left_right;
       pass_left_right;
 
 
+      { MUL is faster than IMUL on the 8086 & 8088 (and equal in speed on 286+),
+        but it's only safe to use in place of IMUL when overflow checking is off
+        and we're doing a 16-bit>16-bit multiplication }
+      if not (cs_check_overflow in current_settings.localswitches) and
+        (not is_32bitint(resultdef)) then
+        unsigned:=true;
+
       {The location.register will be filled in later (JM)}
       {The location.register will be filled in later (JM)}
       location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
       location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
       { Mul supports registers and references, so if not register/reference,
       { Mul supports registers and references, so if not register/reference,
@@ -661,8 +659,9 @@ interface
         {Allocate an imaginary 32-bit register, which consists of a pair of
         {Allocate an imaginary 32-bit register, which consists of a pair of
          16-bit registers and store DX:AX into it}
          16-bit registers and store DX:AX into it}
         location.register := cg.getintregister(current_asmdata.CurrAsmList,OS_32);
         location.register := cg.getintregister(current_asmdata.CurrAsmList,OS_32);
-        add_mov(Taicpu.Op_reg_reg(A_MOV,S_W,NR_AX,location.register));
-        add_mov(Taicpu.Op_reg_reg(A_MOV,S_W,NR_DX,GetNextReg(location.register)));
+        cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_DX,GetNextReg(location.register));
+        cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
+        cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,NR_AX,location.register);
       end
       end
       else
       else
       begin
       begin

+ 32 - 3
compiler/i8086/n8086cal.pas

@@ -28,13 +28,15 @@ interface
 { $define AnsiStrRef}
 { $define AnsiStrRef}
 
 
     uses
     uses
-      nx86cal;
+      nx86cal,cgutils;
 
 
     type
     type
        ti8086callnode = class(tx86callnode)
        ti8086callnode = class(tx86callnode)
        protected
        protected
           procedure pop_parasize(pop_size:longint);override;
           procedure pop_parasize(pop_size:longint);override;
           procedure extra_interrupt_code;override;
           procedure extra_interrupt_code;override;
+          procedure extra_call_ref_code(var ref: treference);override;
+          procedure do_call_ref(ref: treference);override;
        end;
        end;
 
 
 
 
@@ -43,11 +45,11 @@ implementation
     uses
     uses
       globtype,systems,
       globtype,systems,
       cutils,verbose,globals,
       cutils,verbose,globals,
-      cgbase,cgutils,
+      cgbase,
       cpubase,paramgr,
       cpubase,paramgr,
       aasmtai,aasmdata,aasmcpu,
       aasmtai,aasmdata,aasmcpu,
       ncal,nbas,nmem,nld,ncnv,
       ncal,nbas,nmem,nld,ncnv,
-      cga,cgobj,cpuinfo;
+      cga,cgobj,cgx86,cpuinfo;
 
 
 
 
 {*****************************************************************************
 {*****************************************************************************
@@ -92,6 +94,33 @@ implementation
       end;
       end;
 
 
 
 
+    procedure ti8086callnode.extra_call_ref_code(var ref: treference);
+      begin
+        if ref.base<>NR_NO then
+          begin
+            cg.getcpuregister(current_asmdata.CurrAsmList,NR_BX);
+            cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_16,OS_16,ref.base,NR_BX);
+            ref.base:=NR_BX;
+            cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_BX);
+          end;
+        if ref.index<>NR_NO then
+          begin
+            cg.getcpuregister(current_asmdata.CurrAsmList,NR_SI);
+            cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_16,OS_16,ref.index,NR_SI);
+            ref.index:=NR_SI;
+            cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_SI);
+          end;
+      end;
+
+
+    procedure ti8086callnode.do_call_ref(ref: treference);
+      begin
+        if current_settings.x86memorymodel in x86_far_code_models then
+          ref.refaddr:=addr_far_ref;
+        current_asmdata.CurrAsmList.concat(taicpu.op_ref(A_CALL,S_NO,ref));
+      end;
+
+
 begin
 begin
    ccallnode:=ti8086callnode;
    ccallnode:=ti8086callnode;
 end.
 end.

+ 17 - 1
compiler/i8086/n8086inl.pas

@@ -36,6 +36,7 @@ interface
          function typecheck_seg: tnode; override;
          function typecheck_seg: tnode; override;
          function first_seg: tnode; override;
          function first_seg: tnode; override;
          procedure second_seg; override;
          procedure second_seg; override;
+         procedure second_get_frame;override;
        end;
        end;
 
 
 implementation
 implementation
@@ -54,7 +55,7 @@ implementation
     nbas,ncon,ncal,ncnv,nld,ncgutil,
     nbas,ncon,ncal,ncnv,nld,ncgutil,
     tgobj,
     tgobj,
     cga,cgutils,cgx86,cgobj,hlcgobj,
     cga,cgutils,cgx86,cgobj,hlcgobj,
-    htypechk;
+    htypechk,procinfo;
 
 
      function ti8086inlinenode.typecheck_seg: tnode;
      function ti8086inlinenode.typecheck_seg: tnode;
        begin
        begin
@@ -75,6 +76,21 @@ implementation
          current_asmdata.CurrAsmList.Concat(Taicpu.op_reg_reg(A_MOV,S_W,NR_DS,location.register));
          current_asmdata.CurrAsmList.Concat(Taicpu.op_reg_reg(A_MOV,S_W,NR_DS,location.register));
        end;
        end;
 
 
+     procedure ti8086inlinenode.second_get_frame;
+       begin
+         if current_settings.x86memorymodel in x86_far_data_models then
+           begin
+             if current_procinfo.framepointer=NR_STACK_POINTER_REG then
+               internalerror(2014030201);
+             location_reset(location,LOC_REGISTER,OS_32);
+             location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_32);
+             emit_reg_reg(A_MOV,S_W,current_procinfo.framepointer,location.register);
+             current_asmdata.CurrAsmList.Concat(Taicpu.op_reg_reg(A_MOV,S_W,NR_SS,GetNextReg(location.register)));
+           end
+         else
+           inherited second_get_frame;
+       end;
+
 begin
 begin
    cinlinenode:=ti8086inlinenode;
    cinlinenode:=ti8086inlinenode;
 end.
 end.

+ 3 - 3
compiler/jvm/cpubase.pas

@@ -142,13 +142,13 @@ uses
       maxfpuvarregs = 1;
       maxfpuvarregs = 1;
 
 
       { Integer Super registers first and last }
       { Integer Super registers first and last }
-      first_int_imreg = 10;
+      first_int_imreg = 2;
 
 
       { Float Super register first and last }
       { Float Super register first and last }
-      first_fpu_imreg     = 10;
+      first_fpu_imreg     = 2;
 
 
       { MM Super register first and last }
       { MM Super register first and last }
-      first_mm_imreg     = 10;
+      first_mm_imreg     = 2;
 
 
       regnumber_table : array[tregisterindex] of tregister = (
       regnumber_table : array[tregisterindex] of tregister = (
         {$i rjvmnum.inc}
         {$i rjvmnum.inc}

+ 4 - 2
compiler/jvm/hlcgcpu.pas

@@ -271,7 +271,7 @@ implementation
       if (fevalstackheight>fmaxevalstackheight) then
       if (fevalstackheight>fmaxevalstackheight) then
         fmaxevalstackheight:=fevalstackheight;
         fmaxevalstackheight:=fevalstackheight;
       if cs_asm_regalloc in current_settings.globalswitches then
       if cs_asm_regalloc in current_settings.globalswitches then
-        list.concat(tai_comment.Create(strpnew('allocated '+tostr(slots)+', stack height = '+tostr(fevalstackheight))));
+        list.concat(tai_comment.Create(strpnew('    allocated '+tostr(slots)+', stack height = '+tostr(fevalstackheight))));
     end;
     end;
 
 
   procedure thlcgjvm.decstack(list: TAsmList;slots: longint);
   procedure thlcgjvm.decstack(list: TAsmList;slots: longint);
@@ -1711,7 +1711,9 @@ implementation
               if tprocsym(sym).procdeflist.Count<>1 then
               if tprocsym(sym).procdeflist.Count<>1 then
                 internalerror(2011071713);
                 internalerror(2011071713);
               pd:=tprocdef(tprocsym(sym).procdeflist[0]);
               pd:=tprocdef(tprocsym(sym).procdeflist[0]);
-            end;
+            end
+          else
+            internalerror(2013113008);
           a_load_ref_stack(list,java_jlobject,ref,prepare_stack_for_ref(list,ref,false));
           a_load_ref_stack(list,java_jlobject,ref,prepare_stack_for_ref(list,ref,false));
           a_call_name(list,pd,pd.mangledname,nil,false);
           a_call_name(list,pd,pd.mangledname,nil,false);
           { parameter removed, no result }
           { parameter removed, no result }

+ 4 - 1
compiler/jvm/jvmdef.pas

@@ -282,7 +282,10 @@ implementation
                 s64real:
                 s64real:
                   c:='D';
                   c:='D';
                 else
                 else
-                  result:=false;
+                  begin
+                    result:=false;
+                    c:=' ';
+                  end;
               end;
               end;
               encodedstr:=encodedstr+c;
               encodedstr:=encodedstr+c;
             end;
             end;

+ 2 - 29
compiler/jvm/njvmadd.pas

@@ -38,8 +38,6 @@ interface
        protected
        protected
           function jvm_first_addset: tnode;
           function jvm_first_addset: tnode;
 
 
-          function cmpnode2topcmp(unsigned: boolean): TOpCmp;
-
           procedure second_generic_compare(unsigned: boolean);
           procedure second_generic_compare(unsigned: boolean);
 
 
           procedure pass_left_right;override;
           procedure pass_left_right;override;
@@ -215,6 +213,8 @@ interface
                       end;
                       end;
                       procname:='CONTAINSALL'
                       procname:='CONTAINSALL'
                     end;
                     end;
+                  else
+                    internalerror(2013120114);
                 end;
                 end;
               result:=ccallnode.createinternmethod(left,procname,ccallparanode.create(right,nil));
               result:=ccallnode.createinternmethod(left,procname,ccallparanode.create(right,nil));
               { for an unequaln, we have to negate the result of equals }
               { for an unequaln, we have to negate the result of equals }
@@ -333,33 +333,6 @@ interface
       end;
       end;
 
 
 
 
-    function tjvmaddnode.cmpnode2topcmp(unsigned: boolean): TOpCmp;
-      begin
-        if not unsigned then
-          case nodetype of
-            gtn: result:=OC_GT;
-            gten: result:=OC_GTE;
-            ltn: result:=OC_LT;
-            lten: result:=OC_LTE;
-            equaln: result:=OC_EQ;
-            unequaln: result:=OC_NE;
-            else
-              internalerror(2011010412);
-          end
-        else
-        case nodetype of
-          gtn: result:=OC_A;
-          gten: result:=OC_AE;
-          ltn: result:=OC_B;
-          lten: result:=OC_BE;
-          equaln: result:=OC_EQ;
-          unequaln: result:=OC_NE;
-          else
-            internalerror(2011010412);
-        end;
-      end;
-
-
     procedure tjvmaddnode.second_generic_compare(unsigned: boolean);
     procedure tjvmaddnode.second_generic_compare(unsigned: boolean);
       var
       var
         cmpop: TOpCmp;
         cmpop: TOpCmp;

+ 10 - 5
compiler/jvm/njvmmem.pas

@@ -453,6 +453,14 @@ implementation
              object instances (since that's what they are in Java) }
              object instances (since that's what they are in Java) }
            right.resultdef:=s32inttype;
            right.resultdef:=s32inttype;
            right.location.size:=OS_S32;
            right.location.size:=OS_S32;
+          end
+        else if (right.location.loc<>LOC_CONSTANT) and
+                ((right.resultdef.typ<>orddef) or
+                 (torddef(right.resultdef).ordtype<>s32bit)) then
+          begin
+            { Java array indices are always 32 bit signed integers }
+            hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,s32inttype,true);
+            right.resultdef:=s32inttype;
           end;
           end;
 
 
         { adjust index if necessary }
         { adjust index if necessary }
@@ -462,11 +470,8 @@ implementation
           begin
           begin
             thlcgjvm(hlcg).a_load_loc_stack(current_asmdata.CurrAsmList,right.resultdef,right.location);
             thlcgjvm(hlcg).a_load_loc_stack(current_asmdata.CurrAsmList,right.resultdef,right.location);
             thlcgjvm(hlcg).a_op_const_stack(current_asmdata.CurrAsmList,OP_SUB,right.resultdef,tarraydef(left.resultdef).lowrange);
             thlcgjvm(hlcg).a_op_const_stack(current_asmdata.CurrAsmList,OP_SUB,right.resultdef,tarraydef(left.resultdef).lowrange);
-            if right.location.loc<>LOC_REGISTER then
-              begin
-                location_reset(right.location,LOC_REGISTER,def_cgsize(right.resultdef));
-                right.location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,right.resultdef);
-              end;
+            location_reset(right.location,LOC_REGISTER,def_cgsize(right.resultdef));
+            right.location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,right.resultdef);
             thlcgjvm(hlcg).a_load_stack_reg(current_asmdata.CurrAsmList,right.resultdef,right.location.register);
             thlcgjvm(hlcg).a_load_stack_reg(current_asmdata.CurrAsmList,right.resultdef,right.location.register);
           end;
           end;
 
 

+ 3 - 0
compiler/jvm/pjvm.pas

@@ -80,6 +80,7 @@ implementation
         sstate: tscannerstate;
         sstate: tscannerstate;
         needclassconstructor: boolean;
         needclassconstructor: boolean;
       begin
       begin
+        ps:=nil;
         { if there is at least one constructor for a class, do nothing (for
         { if there is at least one constructor for a class, do nothing (for
            records, we'll always also need a parameterless constructor) }
            records, we'll always also need a parameterless constructor) }
         if not is_javaclass(obj) or
         if not is_javaclass(obj) or
@@ -733,6 +734,8 @@ implementation
         conststr: ansistring;
         conststr: ansistring;
         first: boolean;
         first: boolean;
       begin
       begin
+        result:=nil;
+        esym:=nil;
         case csym.constdef.typ of
         case csym.constdef.typ of
           enumdef:
           enumdef:
             begin
             begin

+ 25 - 6
compiler/link.pas

@@ -566,7 +566,7 @@ Implementation
     Function TLinker.MakeStaticLibrary:boolean;
     Function TLinker.MakeStaticLibrary:boolean;
       begin
       begin
         MakeStaticLibrary:=false;
         MakeStaticLibrary:=false;
-        Message(exec_e_dll_not_supported);
+        Message(exec_e_static_lib_not_supported);
       end;
       end;
 
 
 
 
@@ -787,7 +787,8 @@ Implementation
         binstr := FindUtil(utilsprefix + binstr);
         binstr := FindUtil(utilsprefix + binstr);
 
 
 
 
-        scripted_ar:=target_ar.id=ar_gnu_ar_scripted;
+        scripted_ar:=(target_ar.id=ar_gnu_ar_scripted) or
+                     (target_ar.id=ar_watcom_wlib_omf_scripted);
 
 
         if scripted_ar then
         if scripted_ar then
           begin
           begin
@@ -796,15 +797,25 @@ Implementation
             Assign(script, scriptfile);
             Assign(script, scriptfile);
             Rewrite(script);
             Rewrite(script);
             try
             try
-              writeln(script, 'CREATE ' + current_module.staticlibfilename);
+              if (target_ar.id=ar_gnu_ar_scripted) then
+                writeln(script, 'CREATE ' + current_module.staticlibfilename)
+              else { wlib case }
+                writeln(script,'-q -fo -c '+
+                  maybequoted(current_module.staticlibfilename));
               current := TCmdStrListItem(SmartLinkOFiles.First);
               current := TCmdStrListItem(SmartLinkOFiles.First);
               while current <> nil do
               while current <> nil do
                 begin
                 begin
-                  writeln(script, 'ADDMOD ' + current.str);
+                  if (target_ar.id=ar_gnu_ar_scripted) then
+                  writeln(script, 'ADDMOD ' + current.str)
+                  else
+                    writeln(script,'+' + current.str);
                   current := TCmdStrListItem(current.next);
                   current := TCmdStrListItem(current.next);
                 end;
                 end;
-              writeln(script, 'SAVE');
-              writeln(script, 'END');
+              if (target_ar.id=ar_gnu_ar_scripted) then
+                begin
+                  writeln(script, 'SAVE');
+                  writeln(script, 'END');
+                end;
             finally
             finally
               Close(script);
               Close(script);
             end;
             end;
@@ -1585,10 +1596,18 @@ Implementation
             arfinishcmd : ''
             arfinishcmd : ''
           );
           );
 
 
+      ar_watcom_wlib_omf_scripted_info : tarinfo =
+          (
+            id    : ar_watcom_wlib_omf_scripted;
+            arcmd : 'wlib @$SCRIPT';
+            arfinishcmd : ''
+          );
+
 
 
 initialization
 initialization
   RegisterAr(ar_gnu_ar_info);
   RegisterAr(ar_gnu_ar_info);
   RegisterAr(ar_gnu_ar_scripted_info);
   RegisterAr(ar_gnu_ar_scripted_info);
   RegisterAr(ar_gnu_gar_info);
   RegisterAr(ar_gnu_gar_info);
   RegisterAr(ar_watcom_wlib_omf_info);
   RegisterAr(ar_watcom_wlib_omf_info);
+  RegisterAr(ar_watcom_wlib_omf_scripted_info);
 end.
 end.

+ 1 - 1
compiler/m68k/ag68kgas.pas

@@ -319,7 +319,7 @@ interface
             id     : as_gas;
             id     : as_gas;
             idtxt  : 'AS';
             idtxt  : 'AS';
             asmbin : 'as';
             asmbin : 'as';
-            asmcmd : '$ARCH -o $OBJ $ASM';
+            asmcmd : '$ARCH -o $OBJ $EXTRAOPT $ASM';
             supported_targets : [system_m68k_Amiga,system_m68k_Atari,system_m68k_Mac,system_m68k_linux,system_m68k_PalmOS,system_m68k_netbsd,system_m68k_openbsd,system_m68k_embedded];
             supported_targets : [system_m68k_Amiga,system_m68k_Atari,system_m68k_Mac,system_m68k_linux,system_m68k_PalmOS,system_m68k_netbsd,system_m68k_openbsd,system_m68k_embedded];
             flags : [af_needar,af_smartlink_sections];
             flags : [af_needar,af_smartlink_sections];
             labelprefix : '.L';
             labelprefix : '.L';

+ 399 - 242
compiler/m68k/cgcpu.pas

@@ -42,7 +42,7 @@ unit cgcpu;
         procedure a_load_reg_cgpara(list : TAsmList;size : tcgsize;r : tregister;const cgpara : tcgpara);override;
         procedure a_load_reg_cgpara(list : TAsmList;size : tcgsize;r : tregister;const cgpara : tcgpara);override;
         procedure a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const cgpara : tcgpara);override;
         procedure a_load_const_cgpara(list : TAsmList;size : tcgsize;a : tcgint;const cgpara : tcgpara);override;
         procedure a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const cgpara : tcgpara);override;
         procedure a_load_ref_cgpara(list : TAsmList;size : tcgsize;const r : treference;const cgpara : tcgpara);override;
-        procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const cgpara : tcgpara);override;
+        //procedure a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const cgpara : tcgpara);override;
 
 
         procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
         procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
         procedure a_call_reg(list : TAsmList;reg : tregister);override;
         procedure a_call_reg(list : TAsmList;reg : tregister);override;
@@ -67,11 +67,11 @@ unit cgcpu;
         procedure a_loadmm_reg_cgpara(list: TAsmList; size: tcgsize; reg: tregister;const locpara : TCGPara;shuffle : pmmshuffle); override;
         procedure a_loadmm_reg_cgpara(list: TAsmList; size: tcgsize; reg: tregister;const locpara : TCGPara;shuffle : pmmshuffle); override;
 
 
         procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: tcgsize; a: tcgint; reg: TRegister); override;
         procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: tcgsize; a: tcgint; reg: TRegister); override;
-        //procedure a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference); override;
+        procedure a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference); override;
         procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; reg1, reg2: TRegister); override;
         procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; reg1, reg2: TRegister); override;
+        procedure a_op_reg_ref(list : TAsmList; Op: TOpCG; size: TCGSize; reg: TRegister; const ref: TReference); override;
 
 
-        procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
-          l : tasmlabel);override;
+        procedure a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister; l : tasmlabel);override;
         procedure a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel); override;
         procedure a_cmp_reg_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;reg1,reg2 : tregister;l : tasmlabel); override;
         procedure a_jmp_name(list : TAsmList;const s : string); override;
         procedure a_jmp_name(list : TAsmList;const s : string); override;
         procedure a_jmp_always(list : TAsmList;l: tasmlabel); override;
         procedure a_jmp_always(list : TAsmList;l: tasmlabel); override;
@@ -85,8 +85,8 @@ unit cgcpu;
         procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
         procedure g_proc_entry(list : TAsmList;localsize : longint;nostackframe:boolean);override;
         procedure g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);override;
         procedure g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);override;
 
 
-//        procedure g_restore_frame_pointer(list : TAsmList);override;
-//        procedure g_return_from_proc(list : TAsmList;parasize : tcgint);override;
+        procedure g_save_registers(list:TAsmList);override;
+        procedure g_restore_registers(list:TAsmList);override;
 
 
         procedure g_adjust_self_value(list:TAsmList;procdef:tprocdef;ioffset:tcgint);override;
         procedure g_adjust_self_value(list:TAsmList;procdef:tprocdef;ioffset:tcgint);override;
         procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);override;
         procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);override;
@@ -121,6 +121,7 @@ unit cgcpu;
         is 32K
         is 32K
      }
      }
      function isvalidrefoffset(const ref: treference): boolean;
      function isvalidrefoffset(const ref: treference): boolean;
+     function isvalidreference(const ref: treference): boolean;
 
 
     procedure create_codegen;
     procedure create_codegen;
 
 
@@ -156,6 +157,28 @@ unit cgcpu;
        A_NONE
        A_NONE
       );
       );
 
 
+      { opcode with extend bits table lookup, used by 64bit cg }
+      topcg2tasmopx: Array[topcg] of tasmop =
+      (
+       A_NONE,
+       A_NONE,
+       A_ADDX,
+       A_NONE,
+       A_NONE,
+       A_NONE,
+       A_NONE,
+       A_NONE,
+       A_NEGX,
+       A_NONE,
+       A_NONE,
+       A_NONE,
+       A_NONE,
+       A_NONE,
+       A_SUBX,
+       A_NONE,
+       A_NONE,
+       A_NONE
+      );
 
 
       TOpCmp2AsmCond: Array[topcmp] of TAsmCond =
       TOpCmp2AsmCond: Array[topcmp] of TAsmCond =
       (
       (
@@ -172,14 +195,26 @@ unit cgcpu;
        C_HI
        C_HI
       );
       );
 
 
+     function isvalidreference(const ref: treference): boolean;
+       begin
+         isvalidreference:=isvalidrefoffset(ref) and
+
+           { don't try to generate addressing with symbol and base reg and offset
+             it might fail in linking stage if the symbol is more than 32k away (KB) }
+           not (assigned(ref.symbol) and (ref.base <> NR_NO) and (ref.offset <> 0)) and
+
+           { coldfire and 68000 cannot handle non-addressregs as bases }
+           not ((current_settings.cputype in cpu_coldfire+[cpu_mc68000]) and
+                not isaddressregister(ref.base));
+       end;
 
 
      function isvalidrefoffset(const ref: treference): boolean;
      function isvalidrefoffset(const ref: treference): boolean;
       begin
       begin
          isvalidrefoffset := true;
          isvalidrefoffset := true;
          if ref.index <> NR_NO then
          if ref.index <> NR_NO then
            begin
            begin
-             if ref.base <> NR_NO then
-                internalerror(2002081401);
+//             if ref.base <> NR_NO then
+//                internalerror(2002081401);
              if (ref.offset < low(shortint)) or (ref.offset > high(shortint)) then
              if (ref.offset < low(shortint)) or (ref.offset > high(shortint)) then
                 isvalidrefoffset := false
                 isvalidrefoffset := false
            end
            end
@@ -364,7 +399,7 @@ unit cgcpu;
           inherited a_load_ref_cgpara(list,size,r,cgpara);
           inherited a_load_ref_cgpara(list,size,r,cgpara);
       end;
       end;
 
 
-
+{
     procedure tcg68k.a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const cgpara : tcgpara);
     procedure tcg68k.a_loadaddr_ref_cgpara(list : TAsmList;const r : treference;const cgpara : tcgpara);
       var
       var
         tmpreg : tregister;
         tmpreg : tregister;
@@ -403,7 +438,7 @@ unit cgcpu;
               inherited a_loadaddr_ref_cgpara(list,r,cgpara);
               inherited a_loadaddr_ref_cgpara(list,r,cgpara);
           end;
           end;
       end;
       end;
-
+}
 
 
     function tcg68k.fixref(list: TAsmList; var ref: treference): boolean;
     function tcg68k.fixref(list: TAsmList; var ref: treference): boolean;
        var
        var
@@ -705,11 +740,24 @@ unit cgcpu;
 
 
 
 
     procedure tcg68k.a_load_const_reg(list : TAsmList;size : tcgsize;a : tcgint;register : tregister);
     procedure tcg68k.a_load_const_reg(list : TAsmList;size : tcgsize;a : tcgint;register : tregister);
+      var
+        opsize: topsize;
       begin
       begin
+        opsize:=tcgsize2opsize[size];
+
         if isaddressregister(register) then
         if isaddressregister(register) then
-         begin
-           list.concat(taicpu.op_const_reg(A_MOVE,S_L,longint(a),register))
-         end
+          begin
+            { an m68k manual I have recommends SUB Ax,Ax to be used instead of CLR for address regs }
+            if a = 0 then
+              list.concat(taicpu.op_reg_reg(A_SUB,S_L,register,register))
+            else
+              { ISA B/C Coldfire has MOV3Q which can move -1 or 1..7 to any reg }
+              if (current_settings.cputype in [cpu_isa_b,cpu_isa_c]) and 
+                 ((longint(a) = -1) or ((longint(a) > 0) and (longint(a) < 8))) then
+                list.concat(taicpu.op_const_reg(A_MOV3Q,S_L,longint(a),register))
+              else
+                list.concat(taicpu.op_const_reg(A_MOVE,S_L,longint(a),register));
+          end
         else
         else
         if a = 0 then
         if a = 0 then
            list.concat(taicpu.op_reg(A_CLR,S_L,register))
            list.concat(taicpu.op_reg(A_CLR,S_L,register))
@@ -719,15 +767,28 @@ unit cgcpu;
               list.concat(taicpu.op_const_reg(A_MOVEQ,S_L,longint(a),register))
               list.concat(taicpu.op_const_reg(A_MOVEQ,S_L,longint(a),register))
            else
            else
              begin
              begin
-               { clear the register first, for unsigned and positive values, so
-                  we don't need to zero extend after }
-               if (size in [OS_16,OS_8]) or
-                  ((size in [OS_S16,OS_S8]) and (a > 0)) then
-                 list.concat(taicpu.op_reg(A_CLR,S_L,register));
-               list.concat(taicpu.op_const_reg(A_MOVE,tcgsize2opsize[size],longint(a),register));
-               { only sign extend if we need to, zero extension is not necessary because the CLR.L above }
-               if (size in [OS_S16,OS_S8]) and (a < 0) then
-                 sign_extend(list,size,register);
+               { ISA B/C Coldfire has sign extend/zero extend moves }
+               if (current_settings.cputype in [cpu_isa_b,cpu_isa_c]) and 
+                  (size in [OS_16, OS_8, OS_S16, OS_S8]) and 
+                  ((longint(a) >= low(smallint)) and (longint(a) <= high(smallint))) then
+                 begin
+                   if size in [OS_16, OS_8] then
+                     list.concat(taicpu.op_const_reg(A_MVZ,opsize,longint(a),register))
+                   else
+                     list.concat(taicpu.op_const_reg(A_MVS,opsize,longint(a),register));
+                 end
+               else
+                 begin
+                   { clear the register first, for unsigned and positive values, so
+                     we don't need to zero extend after }
+                   if (size in [OS_16,OS_8]) or
+                      ((size in [OS_S16,OS_S8]) and (a > 0)) then
+                     list.concat(taicpu.op_reg(A_CLR,S_L,register));
+                   list.concat(taicpu.op_const_reg(A_MOVE,opsize,longint(a),register));
+                   { only sign extend if we need to, zero extension is not necessary because the CLR.L above }
+                   if (size in [OS_S16,OS_S8]) and (a < 0) then
+                     sign_extend(list,size,register);
+                 end;
              end;
              end;
          end;
          end;
       end;
       end;
@@ -1007,7 +1068,7 @@ unit cgcpu;
        instr : taicpu;
        instr : taicpu;
        paraloc1,paraloc2,paraloc3 : tcgpara;
        paraloc1,paraloc2,paraloc3 : tcgpara;
       begin
       begin
-        optimize_op_const(op, a);
+        optimize_op_const(size, op, a);
         opcode := topcg2tasmop[op];
         opcode := topcg2tasmop[op];
         case op of
         case op of
           OP_NONE :
           OP_NONE :
@@ -1111,15 +1172,25 @@ unit cgcpu;
          end;
          end;
       end;
       end;
 
 
-{
+
     procedure tcg68k.a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference);
     procedure tcg68k.a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference);
       var
       var
         opcode: tasmop;
         opcode: tasmop;
+        opsize : topsize;
       begin
       begin
-        writeln('a_op_const_ref');
-
-        optimize_op_const(op, a);
+        optimize_op_const(size, op, a);
         opcode := topcg2tasmop[op];
         opcode := topcg2tasmop[op];
+        opsize := TCGSize2OpSize[size];
+
+        { on ColdFire all arithmetic operations are only possible on 32bit }
+        if not isvalidreference(ref) or
+           ((current_settings.cputype in cpu_coldfire) and (opsize <> S_L)
+           and not (op in [OP_NONE,OP_MOVE])) then
+          begin
+            inherited;
+            exit;
+          end;
+
         case op of
         case op of
           OP_NONE :
           OP_NONE :
             begin
             begin
@@ -1130,13 +1201,32 @@ unit cgcpu;
               { Optimized, replaced with a simple load }
               { Optimized, replaced with a simple load }
               a_load_const_ref(list,size,a,ref);
               a_load_const_ref(list,size,a,ref);
             end;
             end;
-          else
+          OP_ADD,
+          OP_SUB :
             begin
             begin
-              internalerror(2007010101);
+              { add/sub works the same way, so have it unified here }
+              if (a >= 1) and (a <= 8) then
+                begin
+                  if (op = OP_ADD) then
+                    opcode:=A_ADDQ
+                  else
+                    opcode:=A_SUBQ;
+                  list.concat(taicpu.op_const_ref(opcode, opsize, a, ref));
+                end
+              else
+                if current_settings.cputype = cpu_mc68000 then
+                  list.concat(taicpu.op_const_ref(opcode, opsize, a, ref))
+                else
+                  { on ColdFire, ADDI/SUBI cannot act on memory
+                    so we can only go through a register }
+                  inherited;
             end;
             end;
+          else begin
+//            list.concat(tai_comment.create(strpnew('a_op_const_ref inherited')));
+            inherited;
+          end;
         end;
         end;
       end;
       end;
-}
 
 
     procedure tcg68k.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; reg1, reg2: TRegister);
     procedure tcg68k.a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; reg1, reg2: TRegister);
       var
       var
@@ -1246,6 +1336,36 @@ unit cgcpu;
       end;
       end;
 
 
 
 
+    procedure tcg68k.a_op_reg_ref(list : TAsmList; Op: TOpCG; size: TCGSize; reg: TRegister; const ref: TReference);
+      var
+        opcode : tasmop;
+        opsize : topsize;
+      begin
+        opcode := topcg2tasmop[op];
+        opsize := TCGSize2OpSize[size];
+
+        { on ColdFire all arithmetic operations are only possible on 32bit 
+          and addressing modes are limited }
+        if not isvalidreference(ref) or
+           ((current_settings.cputype in cpu_coldfire) and (opsize <> S_L)) then
+          begin
+            inherited;
+            exit;
+          end;
+
+        case op of
+          OP_ADD,
+          OP_SUB :
+            begin
+              { add/sub works the same way, so have it unified here }
+              list.concat(taicpu.op_reg_ref(opcode, opsize, reg, ref));
+            end;
+          else begin
+//            list.concat(tai_comment.create(strpnew('a_op_reg_ref inherited')));
+            inherited;
+          end;
+        end;
+      end;
 
 
     procedure tcg68k.a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
     procedure tcg68k.a_cmp_const_reg_label(list : TAsmList;size : tcgsize;cmp_op : topcmp;a : tcgint;reg : tregister;
             l : tasmlabel);
             l : tasmlabel);
@@ -1572,65 +1692,57 @@ unit cgcpu;
           end;
           end;
       end;
       end;
 
 
-{    procedure tcg68k.g_restore_frame_pointer(list : TAsmList);
-      var
-        r:Tregister;
-      begin
-        r:=NR_FRAME_POINTER_REG;
-        list.concat(taicpu.op_reg(A_UNLK,S_NO,r));
-      end;
-}
-
     procedure tcg68k.g_proc_exit(list : TAsmList; parasize: longint; nostackframe: boolean);
     procedure tcg68k.g_proc_exit(list : TAsmList; parasize: longint; nostackframe: boolean);
       var
       var
         r,hregister : TRegister;
         r,hregister : TRegister;
-        localsize: tcgint;
         spr : TRegister;
         spr : TRegister;
         fpr : TRegister;
         fpr : TRegister;
         ref : TReference;
         ref : TReference;
       begin
       begin
         if not nostackframe then
         if not nostackframe then
           begin
           begin
-            localsize := current_procinfo.calc_stackframe_size;
             list.concat(taicpu.op_reg(A_UNLK,S_NO,NR_FRAME_POINTER_REG));
             list.concat(taicpu.op_reg(A_UNLK,S_NO,NR_FRAME_POINTER_REG));
-	    parasize := parasize - target_info.first_parm_offset; { i'm still not 100% confident that this is
-	                                                            correct here, but at least it looks less
-								    hacky, and makes some sense (KB) }
-            if (parasize<>0) then
+            parasize := parasize - target_info.first_parm_offset; { i'm still not 100% confident that this is
+                                                                    correct here, but at least it looks less
+                                                                    hacky, and makes some sense (KB) }
+
+            { if parasize is less than zero here, we probably have a cdecl function.
+              According to the info here: http://www.makestuff.eu/wordpress/gcc-68000-abi/
+              68k GCC uses two different methods to free the stack, depending if the target
+              architecture supports RTD or not, and one does callee side, the other does
+              caller side free, which looks like a PITA to support. We have to figure this 
+              out later. More info welcomed. (KB) }
+
+            if (parasize > 0) then
               begin
               begin
-                { only 68020+ supports RTD, so this needs another code path
-                  for 68000 and Coldfire (KB) }
-{ TODO: 68020+ only code generation, without fallback}
                 if current_settings.cputype=cpu_mc68020 then
                 if current_settings.cputype=cpu_mc68020 then
                   list.concat(taicpu.op_const(A_RTD,S_NO,parasize))
                   list.concat(taicpu.op_const(A_RTD,S_NO,parasize))
                 else
                 else
                   begin
                   begin
-
                     { We must pull the PC Counter from the stack, before  }
                     { We must pull the PC Counter from the stack, before  }
                     { restoring the stack pointer, otherwise the PC would }
                     { restoring the stack pointer, otherwise the PC would }
                     { point to nowhere!                                   }
                     { point to nowhere!                                   }
 
 
-                    { save the PC counter (pop it from the stack)         }
-                    { use A0 for this which is defined as a scratch       }
-                    { register                                            }
+                    { Instead of doing a slow copy of the return address while trying    }
+                    { to feed it to the RTS instruction, load the PC to A0 (scratch reg) }
+                    { then free up the stack allocated for paras, then use a JMP (A0) to }
+                    { return to the caller with the paras freed. (KB) }
+
                     hregister:=NR_A0;
                     hregister:=NR_A0;
                     cg.a_reg_alloc(list,hregister);
                     cg.a_reg_alloc(list,hregister);
                     reference_reset_base(ref,NR_STACK_POINTER_REG,0,4);
                     reference_reset_base(ref,NR_STACK_POINTER_REG,0,4);
                     ref.direction:=dir_inc;
                     ref.direction:=dir_inc;
                     list.concat(taicpu.op_ref_reg(A_MOVE,S_L,ref,hregister));
                     list.concat(taicpu.op_ref_reg(A_MOVE,S_L,ref,hregister));
-                    { can we do a quick addition ... }
+
                     r:=NR_SP;
                     r:=NR_SP;
+                    { can we do a quick addition ... }
                     if (parasize > 0) and (parasize < 9) then
                     if (parasize > 0) and (parasize < 9) then
                        list.concat(taicpu.op_const_reg(A_ADDQ,S_L,parasize,r))
                        list.concat(taicpu.op_const_reg(A_ADDQ,S_L,parasize,r))
                     else { nope ... }
                     else { nope ... }
                        list.concat(taicpu.op_const_reg(A_ADD,S_L,parasize,r));
                        list.concat(taicpu.op_const_reg(A_ADD,S_L,parasize,r));
 
 
-                    { restore the PC counter (push it on the stack)       }
-                    reference_reset_base(ref,NR_STACK_POINTER_REG,0,4);
-                    ref.direction:=dir_dec;
-                    cg.a_reg_alloc(list,hregister);
-                    list.concat(taicpu.op_reg_ref(A_MOVE,S_L,hregister,ref));
-                    list.concat(taicpu.op_none(A_RTS,S_NO));
+                    reference_reset_base(ref,hregister,0,4);
+                    list.concat(taicpu.op_ref(A_JMP,S_NO,ref));
                   end;
                   end;
               end
               end
             else
             else
@@ -1641,9 +1753,9 @@ unit cgcpu;
             list.concat(taicpu.op_none(A_RTS,S_NO));
             list.concat(taicpu.op_none(A_RTS,S_NO));
           end;
           end;
 
 
-//         writeln('g_proc_exit');
          { Routines with the poclearstack flag set use only a ret.
          { Routines with the poclearstack flag set use only a ret.
-           also  routines with parasize=0     }
+           also  routines with parasize=0 }
+         { TODO: figure out if these are still relevant to us (KB) }
            (*
            (*
          if current_procinfo.procdef.proccalloption in clearstack_pocalls then
          if current_procinfo.procdef.proccalloption in clearstack_pocalls then
            begin
            begin
@@ -1658,41 +1770,115 @@ unit cgcpu;
              list.concat(taicpu.op_none(A_RTS,S_NO));
              list.concat(taicpu.op_none(A_RTS,S_NO));
            end
            end
          else
          else
-           begin
-            { return with immediate size possible here
-              signed!
-              RTD is not supported on the coldfire     }
-            if (current_settings.cputype=cpu_MC68020) and (parasize<$7FFF) then
-                list.concat(taicpu.op_const(A_RTD,S_NO,parasize))
-            { manually restore the stack }
+           *)
+      end;
+
+
+    procedure tcg68k.g_save_registers(list:TAsmList);
+      var
+        dataregs: tcpuregisterset;
+        addrregs: tcpuregisterset;
+        href : treference;
+        hreg : tregister;
+        size : longint;
+        r : integer;
+      begin
+        { The code generated by the section below, particularly the movem.l
+          instruction is known to cause an issue when compiled by some GNU 
+          assembler versions (I had it with 2.17, while 2.24 seems OK.) 
+          when you run into this problem, just call inherited here instead
+          to skip the movem.l generation. But better just use working GNU
+          AS version instead. (KB) }
+        dataregs:=[];
+        addrregs:=[];
+
+        { calculate temp. size }
+        size:=0;
+        for r:=low(saved_standard_registers) to high(saved_standard_registers) do
+          if saved_standard_registers[r] in rg[R_INTREGISTER].used_in_proc then
+            begin
+              hreg:=newreg(R_INTREGISTER,saved_address_registers[r],R_SUBWHOLE);
+              inc(size,sizeof(aint));
+              dataregs:=dataregs + [saved_standard_registers[r]];
+            end;
+        if uses_registers(R_ADDRESSREGISTER) then
+          for r:=low(saved_address_registers) to high(saved_address_registers) do
+            if saved_address_registers[r] in rg[R_ADDRESSREGISTER].used_in_proc then
+              begin
+                hreg:=newreg(R_ADDRESSREGISTER,saved_address_registers[r],R_SUBWHOLE);
+                inc(size,sizeof(aint));
+                addrregs:=addrregs + [saved_address_registers[r]];
+              end;
+
+        { 68k has no MM registers }
+        if uses_registers(R_MMREGISTER) then
+          internalerror(2014030201);
+
+        if size>0 then
+          begin
+            tg.GetTemp(list,size,sizeof(aint),tt_noreuse,current_procinfo.save_regs_ref);
+            include(current_procinfo.flags,pi_has_saved_regs);
+
+            { Copy registers to temp }
+            href:=current_procinfo.save_regs_ref;
+            if size = sizeof(aint) then
+              a_load_reg_ref(list, OS_32, OS_32, hreg, href)
             else
             else
+              list.concat(taicpu.op_regset_ref(A_MOVEM,S_L,dataregs,addrregs,href));
+          end;
+      end;
+
+
+    procedure tcg68k.g_restore_registers(list:TAsmList);
+      var
+        dataregs: tcpuregisterset;
+        addrregs: tcpuregisterset;
+        href    : treference;
+        r       : integer;
+        hreg    : tregister;
+        size    : longint;
+      begin
+        { see the remark about buggy GNU AS versions in g_save_registers() (KB) }
+        dataregs:=[];
+        addrregs:=[];
+
+        if not(pi_has_saved_regs in current_procinfo.flags) then
+          exit;
+        { Copy registers from temp }
+        size:=0;
+        for r:=low(saved_standard_registers) to high(saved_standard_registers) do
+          if saved_standard_registers[r] in rg[R_INTREGISTER].used_in_proc then
+            begin
+              inc(size,sizeof(aint));
+              hreg:=newreg(R_INTREGISTER,saved_standard_registers[r],R_SUBWHOLE);
+              { Allocate register so the optimizer does not remove the load }
+              a_reg_alloc(list,hreg);
+              dataregs:=dataregs + [saved_standard_registers[r]];
+            end;
+
+        if uses_registers(R_ADDRESSREGISTER) then
+          for r:=low(saved_address_registers) to high(saved_address_registers) do
+            if saved_address_registers[r] in rg[R_ADDRESSREGISTER].used_in_proc then
               begin
               begin
-                { We must pull the PC Counter from the stack, before  }
-                { restoring the stack pointer, otherwise the PC would }
-                { point to nowhere!                                   }
-
-                { save the PC counter (pop it from the stack)         }
-                hregister:=NR_A3;
-                cg.a_reg_alloc(list,hregister);
-                reference_reset_base(ref,NR_STACK_POINTER_REG,0);
-                ref.direction:=dir_inc;
-                list.concat(taicpu.op_ref_reg(A_MOVE,S_L,ref,hregister));
-                { can we do a quick addition ... }
-                r:=NR_SP;
-                if (parasize > 0) and (parasize < 9) then
-                   list.concat(taicpu.op_const_reg(A_ADDQ,S_L,parasize,r))
-                else { nope ... }
-                   list.concat(taicpu.op_const_reg(A_ADD,S_L,parasize,r));
-
-                { restore the PC counter (push it on the stack)       }
-                reference_reset_base(ref,NR_STACK_POINTER_REG,0);
-                ref.direction:=dir_dec;
-                cg.a_reg_alloc(list,hregister);
-                list.concat(taicpu.op_reg_ref(A_MOVE,S_L,hregister,ref));
-                list.concat(taicpu.op_none(A_RTS,S_NO));
-               end;
-           end;
-           *)
+                inc(size,sizeof(aint));
+                hreg:=newreg(R_ADDRESSREGISTER,saved_address_registers[r],R_SUBWHOLE);
+                { Allocate register so the optimizer does not remove the load }
+                a_reg_alloc(list,hreg);
+                addrregs:=addrregs + [saved_address_registers[r]];
+              end;
+
+        { 68k has no MM registers }
+        if uses_registers(R_MMREGISTER) then
+          internalerror(2014030202);
+
+        { Restore registers from temp }
+        href:=current_procinfo.save_regs_ref;
+        if size = sizeof(aint) then
+          a_load_ref_reg(list, OS_32, OS_32, href, hreg)
+        else
+          list.concat(taicpu.op_ref_regset(A_MOVEM,S_L,href,dataregs,addrregs));
+
+        tg.UnGetTemp(list,current_procinfo.save_regs_ref);
       end;
       end;
 
 
 
 
@@ -1909,156 +2095,127 @@ unit cgcpu;
 {****************************************************************************}
 {****************************************************************************}
 {                               TCG64F68K                                    }
 {                               TCG64F68K                                    }
 {****************************************************************************}
 {****************************************************************************}
- procedure tcg64f68k.a_op64_reg_reg(list : TAsmList;op:TOpCG;size: tcgsize; regsrc,regdst : tregister64);
-  var
-   hreg1, hreg2 : tregister;
-   opcode : tasmop;
-   instr : taicpu;
-  begin
-//    writeln('a_op64_reg_reg');
-    opcode := topcg2tasmop[op];
-    case op of
-      OP_ADD :
-         begin
-            { if one of these three registers is an address
-              register, we'll really get into problems!
-            }
-            if isaddressregister(regdst.reglo) or
-               isaddressregister(regdst.reghi) or
-               isaddressregister(regsrc.reghi) then
-                 internalerror(20020817);
-            list.concat(taicpu.op_reg_reg(A_ADD,S_L,regsrc.reglo,regdst.reglo));
-            list.concat(taicpu.op_reg_reg(A_ADDX,S_L,regsrc.reghi,regdst.reghi));
-         end;
-      OP_AND,OP_OR :
-          begin
-            { at least one of the registers must be a data register }
-            if (isaddressregister(regdst.reglo) and
-                isaddressregister(regsrc.reglo)) or
-               (isaddressregister(regsrc.reghi) and
-                isaddressregister(regdst.reghi))
-               then
-                 internalerror(20020817);
-            cg.a_op_reg_reg(list,op,OS_32,regsrc.reglo,regdst.reglo);
-            cg.a_op_reg_reg(list,op,OS_32,regsrc.reghi,regdst.reghi);
-          end;
-      { this is handled in 1st pass for 32-bit cpu's (helper call) }
-      OP_IDIV,OP_DIV,
-      OP_IMUL,OP_MUL: internalerror(2002081701);
-      { this is also handled in 1st pass for 32-bit cpu's (helper call) }
-      OP_SAR,OP_SHL,OP_SHR: internalerror(2002081702);
-      OP_SUB:
-         begin
-            { if one of these three registers is an address
-              register, we'll really get into problems!
-            }
-            if isaddressregister(regdst.reglo) or
-               isaddressregister(regdst.reghi) or
-               isaddressregister(regsrc.reghi) then
-                 internalerror(20020817);
-            list.concat(taicpu.op_reg_reg(A_SUB,S_L,regsrc.reglo,regdst.reglo));
-            list.concat(taicpu.op_reg_reg(A_SUBX,S_L,regsrc.reghi,regdst.reghi));
-         end;
-      OP_XOR:
-        begin
-            if isaddressregister(regdst.reglo) or
-               isaddressregister(regsrc.reglo) or
-               isaddressregister(regsrc.reghi) or
-               isaddressregister(regdst.reghi) then
-                 internalerror(20020817);
-            list.concat(taicpu.op_reg_reg(A_EOR,S_L,regsrc.reglo,regdst.reglo));
-            list.concat(taicpu.op_reg_reg(A_EOR,S_L,regsrc.reghi,regdst.reghi));
-        end;
-      OP_NEG:
-        begin
-          if isaddressregister(regdst.reglo) or
-              isaddressregister(regdst.reghi) then
-            internalerror(2012110402);
-          instr:=taicpu.op_reg_reg(A_MOVE,S_L,regsrc.reglo,regdst.reglo);
-          cg.add_move_instruction(instr);
-          list.concat(instr);
-          instr:=taicpu.op_reg_reg(A_MOVE,S_L,regsrc.reghi,regdst.reghi);
-          cg.add_move_instruction(instr);
-          list.concat(instr);
-          list.concat(taicpu.op_reg(A_NEG,S_L,regdst.reglo));
-          list.concat(taicpu.op_reg(A_NEGX,S_L,regdst.reghi));
-        end;
-      OP_NOT:
-        begin
-          if isaddressregister(regdst.reglo) or
-              isaddressregister(regdst.reghi) then
-            internalerror(2012110401);
-          instr:=taicpu.op_reg_reg(A_MOVE,S_L,regsrc.reglo,regdst.reglo);
-          cg.add_move_instruction(instr);
-          list.concat(instr);
-          instr:=taicpu.op_reg_reg(A_MOVE,S_L,regsrc.reghi,regdst.reghi);
-          cg.add_move_instruction(instr);
-          list.concat(instr);
-          list.concat(taicpu.op_reg(A_NOT,S_L,regdst.reglo));
-          list.concat(taicpu.op_reg(A_NOT,S_L,regdst.reghi));
-        end;
-    end; { end case }
-  end;
+    procedure tcg64f68k.a_op64_reg_reg(list : TAsmList;op:TOpCG;size: tcgsize; regsrc,regdst : tregister64);
+      var
+        hreg1, hreg2 : tregister;
+        opcode : tasmop;
+        xopcode : tasmop;
+        instr : taicpu;
+      begin
+        opcode := topcg2tasmop[op];
+        xopcode := topcg2tasmopx[op];
 
 
+        case op of
+          OP_ADD,OP_SUB:
+            begin
+              { if one of these three registers is an address
+              register, we'll really get into problems! }
+              if isaddressregister(regdst.reglo) or
+                 isaddressregister(regdst.reghi) or
+                 isaddressregister(regsrc.reghi) then
+                internalerror(2014030101);
+              list.concat(taicpu.op_reg_reg(opcode,S_L,regsrc.reglo,regdst.reglo));
+              list.concat(taicpu.op_reg_reg(xopcode,S_L,regsrc.reghi,regdst.reghi));
+            end;
+          OP_AND,OP_OR:
+            begin
+              { at least one of the registers must be a data register }
+              if (isaddressregister(regdst.reglo) and
+                  isaddressregister(regsrc.reglo)) or
+                 (isaddressregister(regsrc.reghi) and
+                  isaddressregister(regdst.reghi)) then
+                internalerror(2014030102);
+              cg.a_op_reg_reg(list,op,OS_32,regsrc.reglo,regdst.reglo);
+              cg.a_op_reg_reg(list,op,OS_32,regsrc.reghi,regdst.reghi);
+            end;
+          { this is handled in 1st pass for 32-bit cpu's (helper call) }
+          OP_IDIV,OP_DIV,
+          OP_IMUL,OP_MUL: 
+            internalerror(2002081701);
+          { this is also handled in 1st pass for 32-bit cpu's (helper call) }
+          OP_SAR,OP_SHL,OP_SHR:
+            internalerror(2002081702);
+          OP_XOR:
+            begin
+              if isaddressregister(regdst.reglo) or
+                 isaddressregister(regsrc.reglo) or
+                 isaddressregister(regsrc.reghi) or
+                 isaddressregister(regdst.reghi) then
+                internalerror(2014030103);
+              cg.a_op_reg_reg(list,op,OS_32,regsrc.reglo,regdst.reglo);
+              cg.a_op_reg_reg(list,op,OS_32,regsrc.reghi,regdst.reghi);
+            end;
+          OP_NEG,OP_NOT:
+            begin
+              if isaddressregister(regdst.reglo) or
+                 isaddressregister(regdst.reghi) then
+               internalerror(2014030104);
+              instr:=taicpu.op_reg_reg(A_MOVE,S_L,regsrc.reglo,regdst.reglo);
+              cg.add_move_instruction(instr);
+              list.concat(instr);
+              instr:=taicpu.op_reg_reg(A_MOVE,S_L,regsrc.reghi,regdst.reghi);
+              cg.add_move_instruction(instr);
+              list.concat(instr);
+              if (op = OP_NOT) then
+                xopcode:=opcode;
+              list.concat(taicpu.op_reg(opcode,S_L,regdst.reglo));
+              list.concat(taicpu.op_reg(xopcode,S_L,regdst.reghi));
+            end;
+        end; { end case }
+      end;
 
 
- procedure tcg64f68k.a_op64_const_reg(list : TAsmList;op:TOpCG;size: tcgsize; value : int64;regdst : tregister64);
-  var
-   lowvalue : cardinal;
-   highvalue : cardinal;
-   hreg : tregister;
-  begin
-//    writeln('a_op64_const_reg');
-    { is it optimized out ? }
-//    if cg.optimize64_op_const_reg(list,op,value,reg) then
-//       exit;
-
-    lowvalue := cardinal(value);
-    highvalue:= value shr 32;
-
-   { the destination registers must be data registers }
-   if  isaddressregister(regdst.reglo) or
-       isaddressregister(regdst.reghi) then
-         internalerror(20020817);
-   case op of
-      OP_ADD :
-         begin
-           hreg:=cg.getintregister(list,OS_INT);
-           list.concat(taicpu.op_const_reg(A_MOVE,S_L,highvalue,hreg));
-           list.concat(taicpu.op_const_reg(A_ADD,S_L,lowvalue,regdst.reglo));
-           list.concat(taicpu.op_reg_reg(A_ADDX,S_L,hreg,regdst.reghi));
-         end;
-      OP_AND :
-          begin
-            list.concat(taicpu.op_const_reg(A_AND,S_L,lowvalue,regdst.reglo));
-            list.concat(taicpu.op_const_reg(A_AND,S_L,highvalue,regdst.reghi));
-          end;
-      OP_OR :
-          begin
-            list.concat(taicpu.op_const_reg(A_OR,S_L,lowvalue,regdst.reglo));
-            list.concat(taicpu.op_const_reg(A_OR,S_L,highvalue,regdst.reghi));
-          end;
-      { this is handled in 1st pass for 32-bit cpus (helper call) }
-      OP_IDIV,OP_DIV,
-      OP_IMUL,OP_MUL: internalerror(2002081701);
-      { this is also handled in 1st pass for 32-bit cpus (helper call) }
-      OP_SAR,OP_SHL,OP_SHR: internalerror(2002081702);
-      OP_SUB:
-         begin
-           hreg:=cg.getintregister(list,OS_INT);
-           list.concat(taicpu.op_const_reg(A_MOVE,S_L,highvalue,hreg));
-           list.concat(taicpu.op_const_reg(A_SUB,S_L,lowvalue,regdst.reglo));
-           list.concat(taicpu.op_reg_reg(A_SUBX,S_L,hreg,regdst.reghi));
-         end;
-      OP_XOR:
-        begin
-            list.concat(taicpu.op_const_reg(A_EOR,S_L,lowvalue,regdst.reglo));
-            list.concat(taicpu.op_const_reg(A_EOR,S_L,highvalue,regdst.reghi));
-        end;
-      { these should have been handled already by earlier passes }
-      OP_NOT, OP_NEG:
-        internalerror(2012110403);
-    end; { end case }
-  end;
+
+    procedure tcg64f68k.a_op64_const_reg(list : TAsmList;op:TOpCG;size: tcgsize; value : int64;regdst : tregister64);
+      var
+        lowvalue : cardinal;
+        highvalue : cardinal;
+        opcode : tasmop;
+        xopcode : tasmop;
+        hreg : tregister;
+      begin
+        { is it optimized out ? }
+        { optimize64_op_const_reg doesn't seem to be used in any cg64f32 right now. why? (KB) }
+        { if cg.optimize64_op_const_reg(list,op,value,reg) then
+            exit; }
+
+        lowvalue := cardinal(value);
+        highvalue := value shr 32;
+
+        opcode := topcg2tasmop[op];
+        xopcode := topcg2tasmopx[op];
+
+        { the destination registers must be data registers }
+        if isaddressregister(regdst.reglo) or
+           isaddressregister(regdst.reghi) then
+          internalerror(2014030105);
+        case op of
+          OP_ADD,OP_SUB:
+            begin
+              hreg:=cg.getintregister(list,OS_INT);
+              { cg.a_load_const_reg provides optimized loading to register for special cases }
+              cg.a_load_const_reg(list,OS_S32,longint(highvalue),hreg);
+              { don't use cg.a_op_const_reg() here, because a possible optimized
+                ADDQ/SUBQ wouldn't set the eXtend bit }
+              list.concat(taicpu.op_const_reg(opcode,S_L,lowvalue,regdst.reglo));
+              list.concat(taicpu.op_reg_reg(xopcode,S_L,hreg,regdst.reghi));
+            end;
+          OP_AND,OP_OR,OP_XOR:
+            begin
+              cg.a_op_const_reg(list,op,OS_S32,longint(lowvalue),regdst.reglo);
+              cg.a_op_const_reg(list,op,OS_S32,longint(highvalue),regdst.reghi);
+            end;
+          { this is handled in 1st pass for 32-bit cpus (helper call) }
+          OP_IDIV,OP_DIV,
+          OP_IMUL,OP_MUL:
+            internalerror(2002081701);
+          { this is also handled in 1st pass for 32-bit cpus (helper call) }
+          OP_SAR,OP_SHL,OP_SHR: 
+            internalerror(2002081702);
+          { these should have been handled already by earlier passes }
+          OP_NOT,OP_NEG:
+            internalerror(2012110403);
+        end; { end case }
+      end;
 
 
 
 
 procedure create_codegen;
 procedure create_codegen;

+ 1 - 1
compiler/m68k/cpubase.pas

@@ -152,7 +152,7 @@ unit cpubase;
       { registers which may be destroyed by calls }
       { registers which may be destroyed by calls }
       VOLATILE_INTREGISTERS = [RS_D0,RS_D1];
       VOLATILE_INTREGISTERS = [RS_D0,RS_D1];
       VOLATILE_FPUREGISTERS = [];
       VOLATILE_FPUREGISTERS = [];
-      VOLATILE_ADDRESSREGISTER = [RS_A0,RS_A1];
+      VOLATILE_ADDRESSREGISTERS = [RS_A0,RS_A1];
 
 
     type
     type
       totherregisterset = set of tregisterindex;
       totherregisterset = set of tregisterindex;

+ 25 - 91
compiler/m68k/cpupara.pas

@@ -52,6 +52,7 @@ unit cpupara;
           function get_volatile_registers_int(calloption:tproccalloption):tcpuregisterset;override;
           function get_volatile_registers_int(calloption:tproccalloption):tcpuregisterset;override;
           function get_volatile_registers_address(calloption:tproccalloption):tcpuregisterset;override;
           function get_volatile_registers_address(calloption:tproccalloption):tcpuregisterset;override;
          private
          private
+          function parse_loc_string_to_register(var locreg: tregister; const s : string): boolean;
           procedure init_values(var curintreg, curfloatreg: tsuperregister; var cur_stack_offset: aword);
           procedure init_values(var curintreg, curfloatreg: tsuperregister; var cur_stack_offset: aword);
           function create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
           function create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
                                                var curintreg, curfloatreg: tsuperregister; var cur_stack_offset: aword):longint;
                                                var curintreg, curfloatreg: tsuperregister; var cur_stack_offset: aword):longint;
@@ -66,6 +67,21 @@ unit cpupara;
        cpuinfo,
        cpuinfo,
        defutil;
        defutil;
 
 
+
+    function tcpuparamanager.get_volatile_registers_int(calloption:tproccalloption):tcpuregisterset;
+      begin
+        { d0 and d1 are considered volatile }
+        Result:=VOLATILE_INTREGISTERS;
+      end;
+
+
+    function tcpuparamanager.get_volatile_registers_address(calloption:tproccalloption):tcpuregisterset;
+      begin
+        { a0 and a1 are considered volatile }
+        Result:=VOLATILE_ADDRESSREGISTERS;
+      end;
+
+
     procedure tcpuparamanager.getintparaloc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);
     procedure tcpuparamanager.getintparaloc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);
       var
       var
         paraloc : pcgparalocation;
         paraloc : pcgparalocation;
@@ -445,71 +461,22 @@ unit cpupara;
       end;
       end;
 }
 }
 
 
+    function tcpuparamanager.parse_loc_string_to_register(var locreg: tregister; const s : string): boolean;
+      begin
+        locreg:=std_regnum_search(lowercase(s));
+        result:=(locreg <> NR_NO) and (locreg <> NR_SP);
+      end;
+
     function tcpuparamanager.parsefuncretloc(p : tabstractprocdef; const s : string) : boolean;
     function tcpuparamanager.parsefuncretloc(p : tabstractprocdef; const s : string) : boolean;
       begin
       begin
-        result:=false;
         case target_info.system of
         case target_info.system of
           system_m68k_amiga:
           system_m68k_amiga:
-            begin
-              if s='D0' then
-                p.exp_funcretloc:=NR_D0
-              else if s='D1' then
-                p.exp_funcretloc:=NR_D1
-              else if s='D2' then
-                p.exp_funcretloc:=NR_D2
-              else if s='D3' then
-                p.exp_funcretloc:=NR_D3
-              else if s='D4' then
-                p.exp_funcretloc:=NR_D4
-              else if s='D5' then
-                p.exp_funcretloc:=NR_D5
-              else if s='D6' then
-                p.exp_funcretloc:=NR_D6
-              else if s='D7' then
-                p.exp_funcretloc:=NR_D7
-              else if s='A0' then
-                p.exp_funcretloc:=NR_A0
-              else if s='A1' then
-                p.exp_funcretloc:=NR_A1
-              else if s='A2' then
-                p.exp_funcretloc:=NR_A2
-              else if s='A3' then
-                p.exp_funcretloc:=NR_A3
-              else if s='A4' then
-                p.exp_funcretloc:=NR_A4
-              else if s='A5' then
-                p.exp_funcretloc:=NR_A5
-              { 'A6' is problematic, since it's the frame pointer in fpc,
-                so it should be saved before a call! }
-              else if s='A6' then
-                p.exp_funcretloc:=NR_A6
-              { 'A7' is the stack pointer on 68k, can't be overwritten by API calls }
-              else
-                p.exp_funcretloc:=NR_NO;
-
-              if p.exp_funcretloc<>NR_NO then result:=true;
-            end;
+            result:=parse_loc_string_to_register(p.exp_funcretloc, s);
           else
           else
             internalerror(2005121801);
             internalerror(2005121801);
         end;
         end;
       end;
       end;
 
 
-    function tcpuparamanager.get_volatile_registers_int(calloption:tproccalloption):tcpuregisterset;
-      begin
-        { d0 and d1 are considered volatile (ToDo: results in "procedure too
-          complex when compiling unicodedata.pas) }
-        //Result:=[RS_D0,RS_D1];
-        Result:=[RS_D0..RS_D7];
-      end;
-
-
-    function tcpuparamanager.get_volatile_registers_address(calloption:tproccalloption):tcpuregisterset;
-      begin
-        { a0 and a1 are considered volatile }
-        Result:=[RS_A0,RS_A1];
-      end;
-
-
     function tcpuparamanager.parseparaloc(p : tparavarsym;const s : string) : boolean;
     function tcpuparamanager.parseparaloc(p : tparavarsym;const s : string) : boolean;
       var
       var
         paraloc : pcgparalocation;
         paraloc : pcgparalocation;
@@ -523,41 +490,8 @@ unit cpupara;
               paraloc^.loc:=LOC_REGISTER;
               paraloc^.loc:=LOC_REGISTER;
               paraloc^.size:=def_cgsize(p.vardef);
               paraloc^.size:=def_cgsize(p.vardef);
               paraloc^.def:=p.vardef;
               paraloc^.def:=p.vardef;
-              { pattern is always uppercase'd }
-              if s='D0' then
-                paraloc^.register:=NR_D0
-              else if s='D1' then
-                paraloc^.register:=NR_D1
-              else if s='D2' then
-                paraloc^.register:=NR_D2
-              else if s='D3' then
-                paraloc^.register:=NR_D3
-              else if s='D4' then
-                paraloc^.register:=NR_D4
-              else if s='D5' then
-                paraloc^.register:=NR_D5
-              else if s='D6' then
-                paraloc^.register:=NR_D6
-              else if s='D7' then
-                paraloc^.register:=NR_D7
-              else if s='A0' then
-                paraloc^.register:=NR_A0
-              else if s='A1' then
-                paraloc^.register:=NR_A1
-              else if s='A2' then
-                paraloc^.register:=NR_A2
-              else if s='A3' then
-                paraloc^.register:=NR_A3
-              else if s='A4' then
-                paraloc^.register:=NR_A4
-              else if s='A5' then
-                paraloc^.register:=NR_A5
-              { 'A6' is problematic, since it's the frame pointer in fpc,
-                so it should be saved before a call! }
-              else if s='A6' then
-                paraloc^.register:=NR_A6
-              { 'A7' is the stack pointer on 68k, can't be overwritten by API calls }
-              else
+
+              if not parse_loc_string_to_register(paraloc^.register, s) then
                 exit;
                 exit;
 
 
               { copy to callee side }
               { copy to callee side }

+ 12 - 12
compiler/m68k/n68kadd.pas

@@ -77,7 +77,7 @@ implementation
         tmpreg:=cg.getintregister(current_asmdata.currasmlist,OS_INT);
         tmpreg:=cg.getintregister(current_asmdata.currasmlist,OS_INT);
 
 
         { load the value for "false" }
         { load the value for "false" }
-        current_asmdata.currasmlist.concat(taicpu.op_const_reg(A_MOVE,S_L,0,tmpreg));
+        cg.a_load_const_reg(current_asmdata.currasmlist,OS_INT,0,tmpreg);
 
 
         current_asmdata.getjumplabel(labelcmp64_1);
         current_asmdata.getjumplabel(labelcmp64_1);
         current_asmdata.getjumplabel(labelcmp64_2);
         current_asmdata.getjumplabel(labelcmp64_2);
@@ -102,7 +102,7 @@ implementation
         cg.a_label(current_asmdata.currasmlist,labelcmp64_2);
         cg.a_label(current_asmdata.currasmlist,labelcmp64_2);
 
 
         { load the value for "true" }
         { load the value for "true" }
-        current_asmdata.currasmlist.concat(taicpu.op_const_reg(A_MOVE,S_L,1,tmpreg));
+        cg.a_load_const_reg(current_asmdata.currasmlist,OS_INT,1,tmpreg);
 
 
         cg.a_label(current_asmdata.currasmlist,labelcmp64_1);
         cg.a_label(current_asmdata.currasmlist,labelcmp64_1);
         result:=tmpreg;
         result:=tmpreg;
@@ -116,7 +116,7 @@ implementation
         tmpreg:=cg.getintregister(current_asmdata.currasmlist,OS_INT);
         tmpreg:=cg.getintregister(current_asmdata.currasmlist,OS_INT);
 
 
         { load the value for "false" }
         { load the value for "false" }
-        current_asmdata.currasmlist.concat(taicpu.op_const_reg(A_MOVE,S_L,0,tmpreg));
+        cg.a_load_const_reg(current_asmdata.currasmlist,OS_INT,0,tmpreg);
 
 
         current_asmdata.getjumplabel(labelcmp64_1);
         current_asmdata.getjumplabel(labelcmp64_1);
         current_asmdata.getjumplabel(labelcmp64_2);
         current_asmdata.getjumplabel(labelcmp64_2);
@@ -135,7 +135,7 @@ implementation
         cg.a_label(current_asmdata.currasmlist,labelcmp64_2);
         cg.a_label(current_asmdata.currasmlist,labelcmp64_2);
 
 
         { load the value for "true" }
         { load the value for "true" }
-        current_asmdata.currasmlist.concat(taicpu.op_const_reg(A_MOVE,S_L,1,tmpreg));
+        cg.a_load_const_reg(current_asmdata.currasmlist,OS_INT,1,tmpreg);
 
 
         cg.a_label(current_asmdata.currasmlist,labelcmp64_1);
         cg.a_label(current_asmdata.currasmlist,labelcmp64_1);
         result:=tmpreg;
         result:=tmpreg;
@@ -150,7 +150,7 @@ implementation
         current_asmdata.getjumplabel(labelcmp64);
         current_asmdata.getjumplabel(labelcmp64);
 
 
         { load the value for "false" }
         { load the value for "false" }
-        current_asmdata.currasmlist.concat(taicpu.op_const_reg(A_MOVE,S_L,0,tmpreg));
+        cg.a_load_const_reg(current_asmdata.currasmlist,OS_INT,0,tmpreg);
 
 
         { is the high order longword equal? }
         { is the high order longword equal? }
         current_asmdata.currasmlist.concat(taicpu.op_reg_reg(A_CMP,S_L,left_reg.reghi,right_reg.reghi));
         current_asmdata.currasmlist.concat(taicpu.op_reg_reg(A_CMP,S_L,left_reg.reghi,right_reg.reghi));
@@ -161,7 +161,7 @@ implementation
         current_asmdata.currasmlist.concat(taicpu.op_cond_sym(A_BXX,C_NE,S_L,labelcmp64));
         current_asmdata.currasmlist.concat(taicpu.op_cond_sym(A_BXX,C_NE,S_L,labelcmp64));
 
 
         { load the value for "true" }
         { load the value for "true" }
-        current_asmdata.currasmlist.concat(taicpu.op_const_reg(A_MOVE,S_L,1,tmpreg));
+        cg.a_load_const_reg(current_asmdata.currasmlist,OS_INT,1,tmpreg);
 
 
         cg.a_label(current_asmdata.currasmlist,labelcmp64);
         cg.a_label(current_asmdata.currasmlist,labelcmp64);
         result:=tmpreg;
         result:=tmpreg;
@@ -176,7 +176,7 @@ implementation
         current_asmdata.getjumplabel(labelcmp64);
         current_asmdata.getjumplabel(labelcmp64);
 
 
         { load the value for "true" }
         { load the value for "true" }
-        current_asmdata.currasmlist.concat(taicpu.op_const_reg(A_MOVE,S_L,1,tmpreg));
+        cg.a_load_const_reg(current_asmdata.currasmlist,OS_INT,1,tmpreg);
 
 
         { is the high order longword equal? }
         { is the high order longword equal? }
         current_asmdata.currasmlist.concat(taicpu.op_reg_reg(A_CMP,S_L,left_reg.reghi,right_reg.reghi));
         current_asmdata.currasmlist.concat(taicpu.op_reg_reg(A_CMP,S_L,left_reg.reghi,right_reg.reghi));
@@ -187,7 +187,7 @@ implementation
         current_asmdata.currasmlist.concat(taicpu.op_cond_sym(A_BXX,C_NE,S_L,labelcmp64));
         current_asmdata.currasmlist.concat(taicpu.op_cond_sym(A_BXX,C_NE,S_L,labelcmp64));
 
 
         { load the value for "false" }
         { load the value for "false" }
-        current_asmdata.currasmlist.concat(taicpu.op_const_reg(A_MOVE,S_L,0,tmpreg));
+        cg.a_load_const_reg(current_asmdata.currasmlist,OS_INT,0,tmpreg);
 
 
         cg.a_label(current_asmdata.currasmlist,labelcmp64);
         cg.a_label(current_asmdata.currasmlist,labelcmp64);
         result:=tmpreg;
         result:=tmpreg;
@@ -201,7 +201,7 @@ implementation
         tmpreg:=cg.getintregister(current_asmdata.currasmlist,OS_INT);
         tmpreg:=cg.getintregister(current_asmdata.currasmlist,OS_INT);
 
 
         { load the value for "false" }
         { load the value for "false" }
-        current_asmdata.currasmlist.concat(taicpu.op_const_reg(A_MOVE,S_L,0,tmpreg));
+        cg.a_load_const_reg(current_asmdata.currasmlist,OS_INT,0,tmpreg);
 
 
         current_asmdata.getjumplabel(labelcmp64_1);
         current_asmdata.getjumplabel(labelcmp64_1);
         current_asmdata.getjumplabel(labelcmp64_2);
         current_asmdata.getjumplabel(labelcmp64_2);
@@ -226,7 +226,7 @@ implementation
         cg.a_label(current_asmdata.currasmlist,labelcmp64_2);
         cg.a_label(current_asmdata.currasmlist,labelcmp64_2);
 
 
         { load the value for "true" }
         { load the value for "true" }
-        current_asmdata.currasmlist.concat(taicpu.op_const_reg(A_MOVE,S_L,1,tmpreg));
+        cg.a_load_const_reg(current_asmdata.currasmlist,OS_INT,1,tmpreg);
 
 
         cg.a_label(current_asmdata.currasmlist,labelcmp64_1);
         cg.a_label(current_asmdata.currasmlist,labelcmp64_1);
         result:=tmpreg;
         result:=tmpreg;
@@ -240,7 +240,7 @@ implementation
         tmpreg:=cg.getintregister(current_asmdata.currasmlist,OS_INT);
         tmpreg:=cg.getintregister(current_asmdata.currasmlist,OS_INT);
 
 
         { load the value for "false" }
         { load the value for "false" }
-        current_asmdata.currasmlist.concat(taicpu.op_const_reg(A_MOVE,S_L,0,tmpreg));
+        cg.a_load_const_reg(current_asmdata.currasmlist,OS_INT,0,tmpreg);
 
 
         current_asmdata.getjumplabel(labelcmp64_1);
         current_asmdata.getjumplabel(labelcmp64_1);
         current_asmdata.getjumplabel(labelcmp64_2);
         current_asmdata.getjumplabel(labelcmp64_2);
@@ -259,7 +259,7 @@ implementation
         cg.a_label(current_asmdata.currasmlist,labelcmp64_2);
         cg.a_label(current_asmdata.currasmlist,labelcmp64_2);
 
 
         { load the value for "true" }
         { load the value for "true" }
-        current_asmdata.currasmlist.concat(taicpu.op_const_reg(A_MOVE,S_L,1,tmpreg));
+        cg.a_load_const_reg(current_asmdata.currasmlist,OS_INT,1,tmpreg);
 
 
         cg.a_label(current_asmdata.currasmlist,labelcmp64_1);
         cg.a_label(current_asmdata.currasmlist,labelcmp64_1);
         result:=tmpreg;
         result:=tmpreg;

+ 27 - 36
compiler/m68k/n68kcnv.pas

@@ -34,7 +34,6 @@ interface
           function first_int_to_real: tnode; override;
           function first_int_to_real: tnode; override;
           procedure second_int_to_real;override;
           procedure second_int_to_real;override;
           procedure second_int_to_bool;override;
           procedure second_int_to_bool;override;
-//          procedure pass_generate_code;override;
        end;
        end;
 
 
 implementation
 implementation
@@ -192,7 +191,9 @@ implementation
               exit;
               exit;
            end;
            end;
 
 
-         location_reset(location,LOC_REGISTER,def_cgsize(left.resultdef));
+         resflags:=F_NE;
+
+         newsize:=def_cgsize(resultdef);
          opsize := def_cgsize(left.resultdef);
          opsize := def_cgsize(left.resultdef);
          case left.location.loc of
          case left.location.loc of
             LOC_CREFERENCE,LOC_REFERENCE :
             LOC_CREFERENCE,LOC_REFERENCE :
@@ -229,12 +230,8 @@ implementation
                          cg.a_load_ref_reg(current_asmdata.CurrAsmList,opsize,opsize,
                          cg.a_load_ref_reg(current_asmdata.CurrAsmList,opsize,opsize,
                             left.location.reference,hreg2);
                             left.location.reference,hreg2);
                          current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_TST,TCGSize2OpSize[opsize],hreg2));
                          current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_TST,TCGSize2OpSize[opsize],hreg2));
-    //                     cg.ungetcpuregister(current_asmdata.CurrAsmList,hreg2);
                       end;
                       end;
-    //                reference_release(current_asmdata.CurrAsmList,left.location.reference);
                   end;
                   end;
-                resflags:=F_NE;
-                hreg1:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
               end;
               end;
             LOC_REGISTER,LOC_CREGISTER :
             LOC_REGISTER,LOC_CREGISTER :
               begin
               begin
@@ -249,20 +246,16 @@ implementation
                   begin
                   begin
                     hreg2:=left.location.register;
                     hreg2:=left.location.register;
                     current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_TST,TCGSize2OpSize[opsize],hreg2));
                     current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_TST,TCGSize2OpSize[opsize],hreg2));
-    //                cg.ungetcpuregister(current_asmdata.CurrAsmList,hreg2);
                   end;
                   end;
-                hreg1:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
-                resflags:=F_NE;
               end;
               end;
             LOC_FLAGS :
             LOC_FLAGS :
               begin
               begin
-                hreg1:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
                 resflags:=left.location.resflags;
                 resflags:=left.location.resflags;
               end;
               end;
             LOC_JUMP :
             LOC_JUMP :
               begin
               begin
                 { for now blindly copied from nx86cnv }
                 { for now blindly copied from nx86cnv }
-                location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
+                location_reset(location,LOC_REGISTER,newsize);
                 location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
                 location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
                 current_asmdata.getjumplabel(hlabel);
                 current_asmdata.getjumplabel(hlabel);
                 cg.a_label(current_asmdata.CurrAsmList,current_procinfo.CurrTrueLabel);
                 cg.a_label(current_asmdata.CurrAsmList,current_procinfo.CurrTrueLabel);
@@ -280,36 +273,34 @@ implementation
          end;
          end;
          if left.location.loc<>LOC_JUMP then
          if left.location.loc<>LOC_JUMP then
            begin
            begin
-             cg.g_flags2reg(current_asmdata.CurrAsmList,location.size,resflags,hreg1);
-             if (is_cbool(resultdef)) then
-               cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NEG,location.size,hreg1,hreg1);
-             location.register := hreg1;
+             location_reset(location,LOC_REGISTER,newsize);
+             if newsize in [OS_64,OS_S64] then
+               begin
+                 hreg2:=cg.getintregister(current_asmdata.CurrAsmList,OS_32);
+                 cg.g_flags2reg(current_asmdata.CurrAsmList,OS_32,resflags,hreg2);
+                 if (is_cbool(resultdef)) then
+                   cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NEG,OS_32,hreg2,hreg2);
+                 location.register64.reglo:=hreg2;
+                 location.register64.reghi:=cg.getintregister(current_asmdata.CurrAsmList,OS_32);
+                 if (is_cbool(resultdef)) then
+                   { reglo is either 0 or -1 -> reghi has to become the same }
+                   cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_32,OS_32,location.register64.reglo,location.register64.reghi)
+                 else
+                   { unsigned }
+                   cg.a_load_const_reg(current_asmdata.CurrAsmList,OS_32,0,location.register64.reghi);
+               end
+            else
+              begin
+                location.register:=cg.getintregister(current_asmdata.CurrAsmList,newsize);
+                cg.g_flags2reg(current_asmdata.CurrAsmList,newsize,resflags,location.register);
+                if (is_cbool(resultdef)) then
+                  cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NEG,newsize,location.register,location.register);
+              end
            end;
            end;
          current_procinfo.CurrTrueLabel:=oldTrueLabel;
          current_procinfo.CurrTrueLabel:=oldTrueLabel;
          current_procinfo.CurrFalseLabel:=oldFalseLabel;
          current_procinfo.CurrFalseLabel:=oldFalseLabel;
       end;
       end;
 
 
-{
-    procedure tm68ktypeconvnode.pass_generate_code;
-{$ifdef TESTOBJEXT2}
-      var
-         r : preference;
-         nillabel : plabel;
-{$endif TESTOBJEXT2}
-      begin
-         { this isn't good coding, I think tc_bool_2_int, shouldn't be }
-         { type conversion (FK)                                 }
-
-         if not(convtype in [tc_bool_2_int,tc_bool_2_bool]) then
-           begin
-              secondpass(left);
-              location_copy(location,left.location);
-              if codegenerror then
-               exit;
-           end;
-         second_call_helper(convtype);
-      end;
-}
 
 
 begin
 begin
    ctypeconvnode:=tm68ktypeconvnode;
    ctypeconvnode:=tm68ktypeconvnode;

+ 65 - 11
compiler/mips/aasmcpu.pas

@@ -469,11 +469,12 @@ procedure fixup_jmps(list: TAsmList);
   var
   var
     p,pdelayslot: tai;
     p,pdelayslot: tai;
     newcomment: tai_comment;
     newcomment: tai_comment;
-    newjmp,newnoop: taicpu;
+    newins,newjmp,newnoop: taicpu;
     labelpositions: TFPList;
     labelpositions: TFPList;
     instrpos: ptrint;
     instrpos: ptrint;
     l: tasmlabel;
     l: tasmlabel;
     inserted_something: boolean;
     inserted_something: boolean;
+    href: treference;
   begin
   begin
     // if certainly not enough instructions to cause an overflow, dont bother
     // if certainly not enough instructions to cause an overflow, dont bother
     if (list.count <= (high(smallint) div 4)) then
     if (list.count <= (high(smallint) div 4)) then
@@ -541,11 +542,36 @@ procedure fixup_jmps(list: TAsmList);
                        (ptruint(abs(ptrint(labelpositions[tasmlabel(taicpu(p).oper[0]^.ref^.symbol).labelnr]-instrpos)) - (low(smallint) div 4)) > ptruint((high(smallint) - low(smallint)) div 4)) then
                        (ptruint(abs(ptrint(labelpositions[tasmlabel(taicpu(p).oper[0]^.ref^.symbol).labelnr]-instrpos)) - (low(smallint) div 4)) > ptruint((high(smallint) - low(smallint)) div 4)) then
 {$pop}
 {$pop}
                       begin
                       begin
-                        { This is not PIC safe }
-                        taicpu(p).opcode:=A_J;
-                        newcomment:=tai_comment.create(strpnew('fixup_jmps, A_BA changed into A_J'));
-                        list.insertbefore(newcomment,p);
-                      end;
+                        if (cs_create_pic in current_settings.moduleswitches) then
+                          begin
+                            newcomment:=tai_comment.create(strpnew('fixup_jmps, A_BA changed into PIC sequence'));
+                            list.insertbefore(newcomment,p);
+                            href:=taicpu(p).oper[0]^.ref^;
+                            href.refaddr:=addr_pic;
+                            href.base:=NR_GP;
+                            newins:=taicpu.op_reg_ref(A_LW,NR_PIC_FUNC,href);
+                            newins.fileinfo:=taicpu(p).fileinfo;
+                            list.insertbefore(newins,p);
+                            inc(instrpos,2);
+                            if (href.symbol.bind=AB_LOCAL) then
+                              begin
+                                href.refaddr:=addr_low;
+                                href.base:=NR_NO;
+                                newins:=taicpu.op_reg_reg_ref(A_ADDIU,NR_PIC_FUNC,NR_PIC_FUNC,href);
+                                newins.fileinfo:=taicpu(p).fileinfo;
+                                list.insertbefore(newins,p);
+                                inc(instrpos,2);
+                              end;
+                            taicpu(p).opcode:=A_JR;
+                            taicpu(p).loadreg(0,NR_PIC_FUNC);
+                          end
+                        else
+                          begin
+                            taicpu(p).opcode:=A_J;
+                            newcomment:=tai_comment.create(strpnew('fixup_jmps, A_BA changed into A_J'));
+                            list.insertbefore(newcomment,p);
+                          end;
+                       end;
                   A_BC:
                   A_BC:
                     if (taicpu(p).ops=3) and (taicpu(p).oper[2]^.typ = top_ref) and
                     if (taicpu(p).ops=3) and (taicpu(p).oper[2]^.typ = top_ref) and
                        assigned(taicpu(p).oper[2]^.ref^.symbol) and
                        assigned(taicpu(p).oper[2]^.ref^.symbol) and
@@ -574,11 +600,39 @@ procedure fixup_jmps(list: TAsmList);
                         // add a new unconditional jump between this jump and the label
                         // add a new unconditional jump between this jump and the label
                         newcomment:=tai_comment.create(strpnew('fixup_jmps, A_BXX changed into A_BNOTXX label;A_J;label:'));
                         newcomment:=tai_comment.create(strpnew('fixup_jmps, A_BXX changed into A_BNOTXX label;A_J;label:'));
                         list.insertbefore(newcomment,p);
                         list.insertbefore(newcomment,p);
-                        newjmp := taicpu.op_sym(A_J,taicpu(p).oper[2]^.ref^.symbol);
-                        newjmp.is_jmp := true;
-                        newjmp.fileinfo := taicpu(p).fileinfo;
-                        list.insertafter(newjmp,pdelayslot);
-                        inc(instrpos,2);
+                        if (cs_create_pic in current_settings.moduleswitches) then
+                          begin
+                            reference_reset_symbol(href,taicpu(p).oper[2]^.ref^.symbol,0,sizeof(pint));
+                            href.refaddr:=addr_pic;
+                            href.base:=NR_GP;
+                            newins:=taicpu.op_reg_ref(A_LW,NR_PIC_FUNC,href);
+                            newins.fileinfo:=taicpu(p).fileinfo;
+                            list.insertafter(newins,pdelayslot);
+                            pdelayslot:=newins;
+                            inc(instrpos,2);
+                            if (href.symbol.bind=AB_LOCAL) then
+                              begin
+                                href.base:=NR_NO;
+                                href.refaddr:=addr_low;
+                                newins:=taicpu.op_reg_reg_ref(A_ADDIU,NR_PIC_FUNC,NR_PIC_FUNC,href);
+                                newins.fileinfo:=taicpu(p).fileinfo;
+                                list.insertafter(newins,pdelayslot);
+                                pdelayslot:=newins;
+                                inc(instrpos,2);
+                              end;
+                            newjmp:=taicpu.op_reg(A_JR,NR_PIC_FUNC);
+                            newjmp.fileinfo:=taicpu(p).fileinfo;
+                            list.insertafter(newjmp,pdelayslot);
+                            inc(instrpos,2);
+                          end
+                        else
+                          begin
+                            newjmp := taicpu.op_sym(A_J,taicpu(p).oper[2]^.ref^.symbol);
+                            newjmp.is_jmp := true;
+                            newjmp.fileinfo := taicpu(p).fileinfo;
+                            list.insertafter(newjmp,pdelayslot);
+                            inc(instrpos,2);
+                          end;
                         { Add a delay slot for new A_J instruction }
                         { Add a delay slot for new A_J instruction }
                         newnoop:=taicpu.op_none(A_NOP);
                         newnoop:=taicpu.op_none(A_NOP);
                         newnoop.fileinfo := taicpu(p).fileinfo;
                         newnoop.fileinfo := taicpu(p).fileinfo;

+ 19 - 8
compiler/mips/aoptcpu.pas

@@ -32,7 +32,7 @@ unit aoptcpu;
 
 
     Type
     Type
       TCpuAsmOptimizer = class(TAsmOptimizer)
       TCpuAsmOptimizer = class(TAsmOptimizer)
-        function TryRemoveMov(var p: tai): boolean;
+        function TryRemoveMov(var p: tai; opcode: TAsmOp): boolean;
         function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
         function PeepHoleOptPass1Cpu(var p: tai): boolean; override;
       End;
       End;
 
 
@@ -65,21 +65,22 @@ unit aoptcpu;
     end;
     end;
 
 
 
 
-  function TCpuAsmOptimizer.TryRemoveMov(var p: tai): boolean;
+  function TCpuAsmOptimizer.TryRemoveMov(var p: tai; opcode: TAsmOp): boolean;
     var
     var
       next,hp1: tai;
       next,hp1: tai;
       alloc,dealloc: tai_regalloc;
       alloc,dealloc: tai_regalloc;
     begin
     begin
       { Fold
       { Fold
-          op   $reg1,...
-          move $reg2,$reg1
+          op      $reg1,...
+          opcode  $reg2,$reg1
           dealloc $reg1
           dealloc $reg1
         into
         into
           op   $reg2,...
           op   $reg2,...
+        opcode may be A_MOVE, A_MOV_s, A_MOV_d, etc.
       }
       }
       result:=false;
       result:=false;
       if GetNextInstruction(p,next) and
       if GetNextInstruction(p,next) and
-         MatchInstruction(next,A_MOVE) and
+         MatchInstruction(next,opcode) and
          MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then
          MatchOperand(taicpu(next).oper[1]^,taicpu(p).oper[0]^.reg) then
         begin
         begin
           dealloc:=FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.Next));
           dealloc:=FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.Next));
@@ -155,7 +156,7 @@ unit aoptcpu;
                       next.free;
                       next.free;
                     end
                     end
                   else
                   else
-                    TryRemoveMov(p);
+                    TryRemoveMov(p,A_MOVE);
                 end;
                 end;
 
 
               A_ANDI:
               A_ANDI:
@@ -185,7 +186,7 @@ unit aoptcpu;
                       next2.free;
                       next2.free;
                     end
                     end
                   else
                   else
-                    TryRemoveMov(p);
+                    TryRemoveMov(p,A_MOVE);
                 end;
                 end;
 
 
               A_ADD,A_ADDU,
               A_ADD,A_ADDU,
@@ -195,7 +196,17 @@ unit aoptcpu;
               A_SRLV,
               A_SRLV,
               A_SLL,A_SLLV,
               A_SLL,A_SLLV,
               A_AND,A_OR,A_XOR,A_ORI,A_XORI:
               A_AND,A_OR,A_XOR,A_ORI,A_XORI:
-                TryRemoveMov(p);
+                TryRemoveMov(p,A_MOVE);
+
+              A_ADD_s, A_SUB_s, A_MUL_s, A_DIV_s,
+              A_ABS_s, A_NEG_s, A_SQRT_s,
+              A_CVT_s_w, A_CVT_s_l, A_CVT_s_d:
+                TryRemoveMov(p,A_MOV_s);
+
+              A_ADD_d, A_SUB_d, A_MUL_d, A_DIV_d,
+              A_ABS_d, A_NEG_d, A_SQRT_d,
+              A_CVT_d_w, A_CVT_d_l, A_CVT_d_s:
+                TryRemoveMov(p,A_MOV_d);
             end;
             end;
           end;
           end;
       end;
       end;

+ 16 - 17
compiler/mips/cgcpu.pas

@@ -749,7 +749,7 @@ const
 
 
 procedure TCGMIPS.a_op_const_reg(list: tasmlist; Op: TOpCG; size: tcgsize; a: tcgint; reg: TRegister);
 procedure TCGMIPS.a_op_const_reg(list: tasmlist; Op: TOpCG; size: tcgsize; a: tcgint; reg: TRegister);
 begin
 begin
-  optimize_op_const(op,a);
+  optimize_op_const(size,op,a);
   case op of
   case op of
     OP_NONE:
     OP_NONE:
       exit;
       exit;
@@ -796,8 +796,6 @@ end;
 
 
 
 
 procedure TCGMIPS.a_op_reg_reg_reg(list: tasmlist; op: TOpCg; size: tcgsize; src1, src2, dst: tregister);
 procedure TCGMIPS.a_op_reg_reg_reg(list: tasmlist; op: TOpCg; size: tcgsize; src1, src2, dst: tregister);
-var
-  hreg: tregister;
 begin
 begin
   if (TOpcg2AsmOp[op]=A_NONE) then
   if (TOpcg2AsmOp[op]=A_NONE) then
     InternalError(2013070305);
     InternalError(2013070305);
@@ -805,11 +803,10 @@ begin
     begin
     begin
       if (size in [OS_S8,OS_S16]) then
       if (size in [OS_S8,OS_S16]) then
         begin
         begin
-          { Shift left by 16/24 bits and increase amount of right shift by same value }
+          { Sign-extend before shiting }
           list.concat(taicpu.op_reg_reg_const(A_SLL, dst, src2, 32-(tcgsize2size[size]*8)));
           list.concat(taicpu.op_reg_reg_const(A_SLL, dst, src2, 32-(tcgsize2size[size]*8)));
-          hreg:=GetIntRegister(list,OS_INT);
-          a_op_const_reg_reg(list,OP_ADD,OS_INT,32-(tcgsize2size[size]*8),src1,dst);
-          src1:=hreg;
+          list.concat(taicpu.op_reg_reg_const(A_SRA, dst, dst, 32-(tcgsize2size[size]*8)));
+          src2:=dst;
         end
         end
       else if not (size in [OS_32,OS_S32]) then
       else if not (size in [OS_32,OS_S32]) then
         InternalError(2013070306);
         InternalError(2013070306);
@@ -826,7 +823,7 @@ var
   asmop: TAsmOp;
   asmop: TAsmOp;
 begin
 begin
   ovloc.loc := LOC_VOID;
   ovloc.loc := LOC_VOID;
-  optimize_op_const(op,a);
+  optimize_op_const(size,op,a);
   signed:=(size in [OS_S8,OS_S16,OS_S32]);
   signed:=(size in [OS_S8,OS_S16,OS_S32]);
   if (setflags and (not signed) and (src=dst) and (op in [OP_ADD,OP_SUB])) then
   if (setflags and (not signed) and (src=dst) and (op in [OP_ADD,OP_SUB])) then
     hreg:=GetIntRegister(list,OS_INT)
     hreg:=GetIntRegister(list,OS_INT)
@@ -1198,6 +1195,7 @@ var
   helplist : TAsmList;
   helplist : TAsmList;
   largeoffs : boolean;
   largeoffs : boolean;
 begin
 begin
+  list.concat(tai_directive.create(asd_ent,current_procinfo.procdef.mangledname));
   a_reg_alloc(list,NR_STACK_POINTER_REG);
   a_reg_alloc(list,NR_STACK_POINTER_REG);
 
 
   if nostackframe then
   if nostackframe then
@@ -1399,6 +1397,7 @@ begin
        list.concat(Taicpu.op_none(A_P_SET_MACRO));
        list.concat(Taicpu.op_none(A_P_SET_MACRO));
        list.concat(Taicpu.op_none(A_P_SET_REORDER));
        list.concat(Taicpu.op_none(A_P_SET_REORDER));
     end;
     end;
+  list.concat(tai_directive.create(asd_ent_end,current_procinfo.procdef.mangledname));
 end;
 end;
 
 
 
 
@@ -1565,8 +1564,8 @@ begin
     { generate a loop }
     { generate a loop }
     if len > 4 then
     if len > 4 then
     begin
     begin
-      countreg := cg.GetIntRegister(list, OS_INT);
-      tmpreg1  := cg.GetIntRegister(list, OS_INT);
+      countreg := GetIntRegister(list, OS_INT);
+      tmpreg1  := GetIntRegister(list, OS_INT);
       a_load_const_reg(list, OS_INT, len, countreg);
       a_load_const_reg(list, OS_INT, len, countreg);
       current_asmdata.getjumplabel(lab);
       current_asmdata.getjumplabel(lab);
       a_label(list, lab);
       a_label(list, lab);
@@ -1580,7 +1579,7 @@ begin
     else
     else
     begin
     begin
       { unrolled loop }
       { unrolled loop }
-      tmpreg1 := cg.GetIntRegister(list, OS_INT);
+      tmpreg1 := GetIntRegister(list, OS_INT);
       for i := 1 to len do
       for i := 1 to len do
       begin
       begin
         list.concat(taicpu.op_reg_ref(A_LBU, tmpreg1, src));
         list.concat(taicpu.op_reg_ref(A_LBU, tmpreg1, src));
@@ -1761,7 +1760,6 @@ var
   tmpref: treference;
   tmpref: treference;
   tmpreg: tregister;
   tmpreg: tregister;
 begin
 begin
-  { Override this function to prevent loading the reference twice }
   if target_info.endian = endian_big then
   if target_info.endian = endian_big then
     begin
     begin
       tmpreg := reg.reglo;
       tmpreg := reg.reglo;
@@ -1769,9 +1767,10 @@ begin
       reg.reghi := tmpreg;
       reg.reghi := tmpreg;
     end;
     end;
   tmpref := ref;
   tmpref := ref;
-  cg.a_load_reg_ref(list, OS_S32, OS_S32, reg.reglo, tmpref);
+  tcgmips(cg).make_simple_ref(list,tmpref);
+  list.concat(taicpu.op_reg_ref(A_SW,reg.reglo,tmpref));
   Inc(tmpref.offset, 4);
   Inc(tmpref.offset, 4);
-  cg.a_load_reg_ref(list, OS_S32, OS_S32, reg.reghi, tmpref);
+  list.concat(taicpu.op_reg_ref(A_SW,reg.reghi,tmpref));
 end;
 end;
 
 
 
 
@@ -1780,7 +1779,6 @@ var
   tmpref: treference;
   tmpref: treference;
   tmpreg: tregister;
   tmpreg: tregister;
 begin
 begin
-  { Override this function to prevent loading the reference twice }
   if target_info.endian = endian_big then
   if target_info.endian = endian_big then
     begin
     begin
       tmpreg := reg.reglo;
       tmpreg := reg.reglo;
@@ -1788,9 +1786,10 @@ begin
       reg.reghi := tmpreg;
       reg.reghi := tmpreg;
     end;
     end;
   tmpref := ref;
   tmpref := ref;
-  cg.a_load_ref_reg(list, OS_S32, OS_S32, tmpref, reg.reglo);
+  tcgmips(cg).make_simple_ref(list,tmpref);
+  list.concat(taicpu.op_reg_ref(A_LW,reg.reglo,tmpref));
   Inc(tmpref.offset, 4);
   Inc(tmpref.offset, 4);
-  cg.a_load_ref_reg(list, OS_S32, OS_S32, tmpref, reg.reghi);
+  list.concat(taicpu.op_reg_ref(A_LW,reg.reghi,tmpref));
 end;
 end;
 
 
 
 

+ 4 - 4
compiler/mips/cpugas.pas

@@ -42,7 +42,7 @@ unit cpugas;
       end;
       end;
 
 
     const
     const
-      use_std_regnames : boolean = 
+      use_std_regnames : boolean =
       {$ifndef USE_MIPS_GAS_REGS}
       {$ifndef USE_MIPS_GAS_REGS}
       true;
       true;
       {$else}
       {$else}
@@ -379,8 +379,8 @@ unit cpugas;
         id: as_gas;
         id: as_gas;
         idtxt: 'AS';
         idtxt: 'AS';
         asmbin: 'as';
         asmbin: 'as';
-        asmcmd: '$ABI $ARCH $NOWARN -EL $PIC -o $OBJ $ASM';
-        supported_targets: [system_mipsel_linux];
+        asmcmd: '$ABI $ARCH $NOWARN -EL $PIC -o $OBJ $EXTRAOPT $ASM';
+        supported_targets: [system_mipsel_linux,system_mipsel_android];
         flags: [ af_needar, af_smartlink_sections];
         flags: [ af_needar, af_smartlink_sections];
         labelprefix: '.L';
         labelprefix: '.L';
         comment: '# ';
         comment: '# ';
@@ -391,7 +391,7 @@ unit cpugas;
         id: as_gas;
         id: as_gas;
         idtxt: 'AS';
         idtxt: 'AS';
         asmbin: 'as';
         asmbin: 'as';
-        asmcmd: '$ABI $ARCH $NOWARN -EB $PIC -o $OBJ $ASM';
+        asmcmd: '$ABI $ARCH $NOWARN -EB $PIC -o $OBJ $EXTRAOPT $ASM';
         supported_targets: [system_mipseb_linux];
         supported_targets: [system_mipseb_linux];
         flags: [ af_needar, af_smartlink_sections];
         flags: [ af_needar, af_smartlink_sections];
         labelprefix: '.L';
         labelprefix: '.L';

+ 2 - 1
compiler/mips/cpuinfo.pas

@@ -1,7 +1,7 @@
 {
 {
     Copyright (c) 1998-2002 by the Free Pascal development team
     Copyright (c) 1998-2002 by the Free Pascal development team
 
 
-    Basic Processor information for the ARM
+    Basic Processor information for the MIPS
 
 
     See the file COPYING.FPC, included in this distribution,
     See the file COPYING.FPC, included in this distribution,
     for details about the copyright.
     for details about the copyright.
@@ -63,6 +63,7 @@ Const
    supported_calling_conventions : tproccalloptions = [
    supported_calling_conventions : tproccalloptions = [
      pocall_internproc,
      pocall_internproc,
      pocall_stdcall,
      pocall_stdcall,
+     pocall_safecall,
      { same as stdcall only different name mangling }
      { same as stdcall only different name mangling }
      pocall_cdecl,
      pocall_cdecl,
      { same as stdcall only different name mangling }
      { same as stdcall only different name mangling }

Деякі файли не було показано, через те що забагато файлів було змінено