ソースを参照

Merged from trunk

git-svn-id: branches/laksen/armiw@30146 -
Jeppe Johansen 10 年 前
コミット
914e9e7b49
100 ファイル変更9758 行追加3318 行削除
  1. 66 2
      .gitattributes
  2. 28 3
      Makefile
  3. 4 1
      Makefile.fpc
  4. 65 10
      compiler/Makefile
  5. 14 10
      compiler/Makefile.fpc
  6. 93 36
      compiler/aarch64/a64att.inc
  7. 57 0
      compiler/aarch64/a64atts.inc
  8. 158 44
      compiler/aarch64/a64ins.dat
  9. 93 36
      compiler/aarch64/a64op.inc
  10. 168 161
      compiler/aarch64/a64reg.dat
  11. 462 1125
      compiler/aarch64/aasmcpu.pas
  12. 288 0
      compiler/aarch64/agcpugas.pas
  13. 25 7
      compiler/aarch64/aoptcpub.pas
  14. 2284 0
      compiler/aarch64/cgcpu.pas
  15. 200 38
      compiler/aarch64/cpubase.pas
  16. 2 2
      compiler/aarch64/cpuinfo.pas
  17. 40 0
      compiler/aarch64/cpunode.pas
  18. 456 520
      compiler/aarch64/cpupara.pas
  19. 68 0
      compiler/aarch64/cpupi.pas
  20. 70 0
      compiler/aarch64/cputarg.pas
  21. 156 0
      compiler/aarch64/hlcgcpu.pas
  22. 402 0
      compiler/aarch64/ncpuadd.pas
  23. 201 0
      compiler/aarch64/ncpucnv.pas
  24. 184 0
      compiler/aarch64/ncpuinl.pas
  25. 196 0
      compiler/aarch64/ncpumat.pas
  26. 142 0
      compiler/aarch64/ncpumem.pas
  27. 175 0
      compiler/aarch64/ncpuset.pas
  28. 5 0
      compiler/aarch64/ra64con.inc
  29. 162 157
      compiler/aarch64/ra64dwa.inc
  30. 1 1
      compiler/aarch64/ra64nor.inc
  31. 6 1
      compiler/aarch64/ra64num.inc
  32. 101 96
      compiler/aarch64/ra64rni.inc
  33. 48 43
      compiler/aarch64/ra64sri.inc
  34. 162 157
      compiler/aarch64/ra64sta.inc
  35. 6 1
      compiler/aarch64/ra64std.inc
  36. 5 0
      compiler/aarch64/ra64sup.inc
  37. 88 0
      compiler/aarch64/racpu.pas
  38. 1053 0
      compiler/aarch64/racpugas.pas
  39. 171 0
      compiler/aarch64/rgcpu.pas
  40. 21 4
      compiler/aasmtai.pas
  41. 4 1
      compiler/aggas.pas
  42. 19 5
      compiler/aoptobj.pas
  43. 45 12
      compiler/arm/aasmcpu.pas
  44. 1 2
      compiler/arm/agarmgas.pas
  45. 5 3
      compiler/arm/cgcpu.pas
  46. 6 0
      compiler/arm/cpuinfo.pas
  47. 4 3
      compiler/arm/narmmem.pas
  48. 1 1
      compiler/arm/raarmgas.pas
  49. 2 1
      compiler/assemble.pas
  50. 13 0
      compiler/cgbase.pas
  51. 2 2
      compiler/cghlcpu.pas
  52. 19 8
      compiler/cgobj.pas
  53. 6 0
      compiler/cgutils.pas
  54. 1 1
      compiler/finput.pas
  55. 1 0
      compiler/fpcdefs.inc
  56. 3 1
      compiler/fppu.pas
  57. 2 2
      compiler/globals.pas
  58. 3 3
      compiler/hlcg2ll.pas
  59. 19 7
      compiler/hlcgobj.pas
  60. 9 8
      compiler/i386/cpuinfo.pas
  61. 8 0
      compiler/i386/i386att.inc
  62. 8 0
      compiler/i386/i386atts.inc
  63. 8 0
      compiler/i386/i386int.inc
  64. 1 1
      compiler/i386/i386nop.inc
  65. 8 0
      compiler/i386/i386op.inc
  66. 9 1
      compiler/i386/i386prop.inc
  67. 112 0
      compiler/i386/i386tab.inc
  68. 1 1
      compiler/i386/popt386.pas
  69. 8 0
      compiler/i8086/i8086att.inc
  70. 8 0
      compiler/i8086/i8086atts.inc
  71. 8 0
      compiler/i8086/i8086int.inc
  72. 1 1
      compiler/i8086/i8086nop.inc
  73. 8 0
      compiler/i8086/i8086op.inc
  74. 9 1
      compiler/i8086/i8086prop.inc
  75. 112 0
      compiler/i8086/i8086tab.inc
  76. 5 4
      compiler/i8086/n8086mem.pas
  77. 2 2
      compiler/jvm/hlcgcpu.pas
  78. 2 2
      compiler/link.pas
  79. 10 1
      compiler/m68k/aasmcpu.pas
  80. 4 4
      compiler/m68k/cgcpu.pas
  81. 1 2
      compiler/m68k/cpunode.pas
  82. 36 18
      compiler/m68k/n68kadd.pas
  83. 212 0
      compiler/m68k/n68kinl.pas
  84. 43 1
      compiler/m68k/n68kmat.pas
  85. 3 2
      compiler/m68k/n68kmem.pas
  86. 3 1
      compiler/mips/aoptcpu.pas
  87. 1 1
      compiler/msg/errord.msg
  88. 1 1
      compiler/msg/errordu.msg
  89. 11 0
      compiler/msg/errore.msg
  90. 3 2
      compiler/msgidx.inc
  91. 288 287
      compiler/msgtxt.inc
  92. 8 4
      compiler/nadd.pas
  93. 2 4
      compiler/nbas.pas
  94. 652 420
      compiler/ncal.pas
  95. 5 2
      compiler/ncgbas.pas
  96. 2 2
      compiler/ncgcal.pas
  97. 1 1
      compiler/ncgcnv.pas
  98. 4 8
      compiler/ncginl.pas
  99. 11 3
      compiler/ncgld.pas
  100. 48 26
      compiler/ncgmem.pas

+ 66 - 2
.gitattributes

@@ -14,13 +14,25 @@ compiler/aarch64/a64op.inc svneol=native#text/plain
 compiler/aarch64/a64reg.dat svneol=native#text/plain
 compiler/aarch64/a64reg.dat svneol=native#text/plain
 compiler/aarch64/a64tab.inc svneol=native#text/plain
 compiler/aarch64/a64tab.inc svneol=native#text/plain
 compiler/aarch64/aasmcpu.pas svneol=native#text/plain
 compiler/aarch64/aasmcpu.pas svneol=native#text/plain
+compiler/aarch64/agcpugas.pas svneol=native#text/plain
 compiler/aarch64/aoptcpu.pas svneol=native#text/plain
 compiler/aarch64/aoptcpu.pas svneol=native#text/plain
 compiler/aarch64/aoptcpub.pas svneol=native#text/plain
 compiler/aarch64/aoptcpub.pas svneol=native#text/plain
 compiler/aarch64/aoptcpud.pas svneol=native#text/plain
 compiler/aarch64/aoptcpud.pas svneol=native#text/plain
+compiler/aarch64/cgcpu.pas svneol=native#text/plain
 compiler/aarch64/cpubase.pas svneol=native#text/plain
 compiler/aarch64/cpubase.pas svneol=native#text/plain
 compiler/aarch64/cpuinfo.pas svneol=native#text/plain
 compiler/aarch64/cpuinfo.pas svneol=native#text/plain
+compiler/aarch64/cpunode.pas svneol=native#text/plain
 compiler/aarch64/cpupara.pas svneol=native#text/plain
 compiler/aarch64/cpupara.pas svneol=native#text/plain
+compiler/aarch64/cpupi.pas svneol=native#text/plain
+compiler/aarch64/cputarg.pas svneol=native#text/plain
+compiler/aarch64/hlcgcpu.pas svneol=native#text/plain
 compiler/aarch64/itcpugas.pas svneol=native#text/plain
 compiler/aarch64/itcpugas.pas svneol=native#text/plain
+compiler/aarch64/ncpuadd.pas svneol=native#text/plain
+compiler/aarch64/ncpucnv.pas svneol=native#text/plain
+compiler/aarch64/ncpuinl.pas svneol=native#text/plain
+compiler/aarch64/ncpumat.pas svneol=native#text/plain
+compiler/aarch64/ncpumem.pas svneol=native#text/plain
+compiler/aarch64/ncpuset.pas svneol=native#text/plain
 compiler/aarch64/ra64con.inc svneol=native#text/plain
 compiler/aarch64/ra64con.inc svneol=native#text/plain
 compiler/aarch64/ra64dwa.inc svneol=native#text/plain
 compiler/aarch64/ra64dwa.inc svneol=native#text/plain
 compiler/aarch64/ra64nor.inc svneol=native#text/plain
 compiler/aarch64/ra64nor.inc svneol=native#text/plain
@@ -30,6 +42,9 @@ compiler/aarch64/ra64sri.inc svneol=native#text/plain
 compiler/aarch64/ra64sta.inc svneol=native#text/plain
 compiler/aarch64/ra64sta.inc svneol=native#text/plain
 compiler/aarch64/ra64std.inc svneol=native#text/plain
 compiler/aarch64/ra64std.inc svneol=native#text/plain
 compiler/aarch64/ra64sup.inc svneol=native#text/plain
 compiler/aarch64/ra64sup.inc svneol=native#text/plain
+compiler/aarch64/racpu.pas svneol=native#text/plain
+compiler/aarch64/racpugas.pas svneol=native#text/plain
+compiler/aarch64/rgcpu.pas svneol=native#text/plain
 compiler/aarch64/symcpu.pas svneol=native#text/plain
 compiler/aarch64/symcpu.pas svneol=native#text/plain
 compiler/aasmbase.pas svneol=native#text/plain
 compiler/aasmbase.pas svneol=native#text/plain
 compiler/aasmdata.pas svneol=native#text/plain
 compiler/aasmdata.pas svneol=native#text/plain
@@ -349,6 +364,7 @@ compiler/m68k/m68kreg.dat svneol=native#text/plain
 compiler/m68k/n68kadd.pas svneol=native#text/plain
 compiler/m68k/n68kadd.pas svneol=native#text/plain
 compiler/m68k/n68kcal.pas svneol=native#text/plain
 compiler/m68k/n68kcal.pas svneol=native#text/plain
 compiler/m68k/n68kcnv.pas svneol=native#text/plain
 compiler/m68k/n68kcnv.pas svneol=native#text/plain
+compiler/m68k/n68kinl.pas svneol=native#text/plain
 compiler/m68k/n68kmat.pas svneol=native#text/plain
 compiler/m68k/n68kmat.pas svneol=native#text/plain
 compiler/m68k/n68kmem.pas svneol=native#text/plain
 compiler/m68k/n68kmem.pas svneol=native#text/plain
 compiler/m68k/r68kcon.inc svneol=native#text/plain
 compiler/m68k/r68kcon.inc svneol=native#text/plain
@@ -915,6 +931,10 @@ ide/fputils.pas svneol=native#text/plain
 ide/fpvars.pas svneol=native#text/plain
 ide/fpvars.pas svneol=native#text/plain
 ide/fpviews.pas svneol=native#text/plain
 ide/fpviews.pas svneol=native#text/plain
 ide/fpw32.rc -text
 ide/fpw32.rc -text
+ide/gdbmicon.pas svneol=native#text/plain
+ide/gdbmiint.pas svneol=native#text/plain
+ide/gdbmiproc.pas svneol=native#text/plain
+ide/gdbmiwrap.pas svneol=native#text/plain
 ide/globdir.inc svneol=native#text/plain
 ide/globdir.inc svneol=native#text/plain
 ide/gplprog.pt -text
 ide/gplprog.pt -text
 ide/gplunit.pt -text
 ide/gplunit.pt -text
@@ -4453,6 +4473,7 @@ packages/iosxlocale/Makefile.fpc svneol=native#text/plain
 packages/iosxlocale/Makefile.fpc.fpcmake svneol=native#text/plain
 packages/iosxlocale/Makefile.fpc.fpcmake svneol=native#text/plain
 packages/iosxlocale/fpmake.pp svneol=native#text/plain
 packages/iosxlocale/fpmake.pp svneol=native#text/plain
 packages/iosxlocale/src/iosxlocale.pp svneol=native#text/plain
 packages/iosxlocale/src/iosxlocale.pp svneol=native#text/plain
+packages/iosxlocale/src/iosxwstr.pp svneol=native#text/plain
 packages/jni/Makefile svneol=native#text/plain
 packages/jni/Makefile svneol=native#text/plain
 packages/jni/Makefile.fpc svneol=native#text/plain
 packages/jni/Makefile.fpc svneol=native#text/plain
 packages/jni/fpmake.pp svneol=native#text/plain
 packages/jni/fpmake.pp svneol=native#text/plain
@@ -4835,6 +4856,9 @@ packages/libndsfpc/examples/audio/maxmod/song_events_example2/Makefile svneol=na
 packages/libndsfpc/examples/audio/maxmod/song_events_example2/Makefile.fpc svneol=native#text/plain
 packages/libndsfpc/examples/audio/maxmod/song_events_example2/Makefile.fpc svneol=native#text/plain
 packages/libndsfpc/examples/audio/maxmod/song_events_example2/audio/example2.it -text
 packages/libndsfpc/examples/audio/maxmod/song_events_example2/audio/example2.it -text
 packages/libndsfpc/examples/audio/maxmod/song_events_example2/song_events_example2.pp svneol=native#text/plain
 packages/libndsfpc/examples/audio/maxmod/song_events_example2/song_events_example2.pp svneol=native#text/plain
+packages/libndsfpc/examples/audio/maxmod/streaming/Makefile svneol=native#text/plain
+packages/libndsfpc/examples/audio/maxmod/streaming/Makefile.fpc svneol=native#text/plain
+packages/libndsfpc/examples/audio/maxmod/streaming/streaming.pp svneol=native#text/plain
 packages/libndsfpc/examples/audio/micrecord/Makefile svneol=native#text/plain
 packages/libndsfpc/examples/audio/micrecord/Makefile svneol=native#text/plain
 packages/libndsfpc/examples/audio/micrecord/Makefile.fpc svneol=native#text/plain
 packages/libndsfpc/examples/audio/micrecord/Makefile.fpc svneol=native#text/plain
 packages/libndsfpc/examples/audio/micrecord/micrecord.pp svneol=native#text/plain
 packages/libndsfpc/examples/audio/micrecord/micrecord.pp svneol=native#text/plain
@@ -5131,8 +5155,6 @@ packages/libndsfpc/examples/graphics/Ext_Palettes/backgrounds/gfx/devkitlogo.gri
 packages/libndsfpc/examples/graphics/Ext_Palettes/backgrounds/gfx/devkitlogo.png -text
 packages/libndsfpc/examples/graphics/Ext_Palettes/backgrounds/gfx/devkitlogo.png -text
 packages/libndsfpc/examples/graphics/Ext_Palettes/backgrounds/gfx/drunkenlogo.grit svneol=native#text/plain
 packages/libndsfpc/examples/graphics/Ext_Palettes/backgrounds/gfx/drunkenlogo.grit svneol=native#text/plain
 packages/libndsfpc/examples/graphics/Ext_Palettes/backgrounds/gfx/drunkenlogo.png -text
 packages/libndsfpc/examples/graphics/Ext_Palettes/backgrounds/gfx/drunkenlogo.png -text
-packages/libndsfpc/examples/graphics/Ext_Palettes/backgrounds/inc/devkitlogo.inc svneol=native#text/plain
-packages/libndsfpc/examples/graphics/Ext_Palettes/backgrounds/inc/drunkenlogo.inc svneol=native#text/plain
 packages/libndsfpc/examples/graphics/Makefile svneol=native#text/plain
 packages/libndsfpc/examples/graphics/Makefile svneol=native#text/plain
 packages/libndsfpc/examples/graphics/Makefile.fpc svneol=native#text/plain
 packages/libndsfpc/examples/graphics/Makefile.fpc svneol=native#text/plain
 packages/libndsfpc/examples/graphics/Printing/Makefile svneol=native#text/plain
 packages/libndsfpc/examples/graphics/Printing/Makefile svneol=native#text/plain
@@ -5435,6 +5457,7 @@ packages/libogcfpc/src/ogc/ios.inc svneol=native#text/plain
 packages/libogcfpc/src/ogc/ipc.inc svneol=native#text/plain
 packages/libogcfpc/src/ogc/ipc.inc svneol=native#text/plain
 packages/libogcfpc/src/ogc/irq.inc svneol=native#text/plain
 packages/libogcfpc/src/ogc/irq.inc svneol=native#text/plain
 packages/libogcfpc/src/ogc/isfs.inc svneol=native#text/plain
 packages/libogcfpc/src/ogc/isfs.inc svneol=native#text/plain
+packages/libogcfpc/src/ogc/libversion.inc svneol=native#text/plain
 packages/libogcfpc/src/ogc/lwp.inc svneol=native#text/plain
 packages/libogcfpc/src/ogc/lwp.inc svneol=native#text/plain
 packages/libogcfpc/src/ogc/lwp_config.inc svneol=native#text/plain
 packages/libogcfpc/src/ogc/lwp_config.inc svneol=native#text/plain
 packages/libogcfpc/src/ogc/lwp_heap.inc svneol=native#text/plain
 packages/libogcfpc/src/ogc/lwp_heap.inc svneol=native#text/plain
@@ -7855,6 +7878,16 @@ rtl/COPYING.txt svneol=native#text/plain
 rtl/Makefile svneol=native#text/plain
 rtl/Makefile svneol=native#text/plain
 rtl/Makefile.fpc svneol=native#text/plain
 rtl/Makefile.fpc svneol=native#text/plain
 rtl/README.txt svneol=native#text/plain
 rtl/README.txt svneol=native#text/plain
+rtl/aarch64/aarch64.inc svneol=native#text/plain
+rtl/aarch64/int64p.inc svneol=native#text/plain
+rtl/aarch64/makefile.cpu svneol=native#text/plain
+rtl/aarch64/math.inc svneol=native#text/plain
+rtl/aarch64/mathu.inc svneol=native#text/plain
+rtl/aarch64/set.inc svneol=native#text/plain
+rtl/aarch64/setjump.inc svneol=native#text/plain
+rtl/aarch64/setjumph.inc svneol=native#text/plain
+rtl/aarch64/strings.inc svneol=native#text/plain
+rtl/aarch64/stringss.inc svneol=native#text/plain
 rtl/aix/Makefile svneol=native#text/plain
 rtl/aix/Makefile svneol=native#text/plain
 rtl/aix/Makefile.fpc svneol=native#text/plain
 rtl/aix/Makefile.fpc svneol=native#text/plain
 rtl/aix/dlaix.inc svneol=native#text/plain
 rtl/aix/dlaix.inc svneol=native#text/plain
@@ -8054,6 +8087,8 @@ rtl/charmaps/cp8859_5.pas svneol=native#text/pascal
 rtl/charmaps/cpall.pas svneol=native#text/pascal
 rtl/charmaps/cpall.pas svneol=native#text/pascal
 rtl/darwin/Makefile svneol=native#text/plain
 rtl/darwin/Makefile svneol=native#text/plain
 rtl/darwin/Makefile.fpc svneol=native#text/plain
 rtl/darwin/Makefile.fpc svneol=native#text/plain
+rtl/darwin/aarch64/sig_cpu.inc svneol=native#text/plain
+rtl/darwin/aarch64/sighnd.inc svneol=native#text/plain
 rtl/darwin/arm/sig_cpu.inc svneol=native#text/plain
 rtl/darwin/arm/sig_cpu.inc svneol=native#text/plain
 rtl/darwin/arm/sighnd.inc svneol=native#text/plain
 rtl/darwin/arm/sighnd.inc svneol=native#text/plain
 rtl/darwin/console.pp svneol=native#text/plain
 rtl/darwin/console.pp svneol=native#text/plain
@@ -8113,6 +8148,7 @@ rtl/dragonfly/x86_64/sighnd.inc svneol=native#text/plain
 rtl/dragonfly/x86_64/signal.inc svneol=native#text/plain
 rtl/dragonfly/x86_64/signal.inc svneol=native#text/plain
 rtl/embedded/Makefile svneol=native#text/plain
 rtl/embedded/Makefile svneol=native#text/plain
 rtl/embedded/Makefile.fpc svneol=native#text/plain
 rtl/embedded/Makefile.fpc svneol=native#text/plain
+rtl/embedded/arm/allwinner_a20.pp svneol=native#text/plain
 rtl/embedded/arm/at91sam7x256.pp svneol=native#text/plain
 rtl/embedded/arm/at91sam7x256.pp svneol=native#text/plain
 rtl/embedded/arm/cortexm0.pp svneol=native#text/pascal
 rtl/embedded/arm/cortexm0.pp svneol=native#text/pascal
 rtl/embedded/arm/cortexm0_start.inc svneol=native#text/plain
 rtl/embedded/arm/cortexm0_start.inc svneol=native#text/plain
@@ -8229,6 +8265,7 @@ rtl/gba/gbabiosh.inc svneol=native#text/plain
 rtl/gba/libc.inc svneol=native#text/plain
 rtl/gba/libc.inc svneol=native#text/plain
 rtl/gba/libch.inc svneol=native#text/plain
 rtl/gba/libch.inc svneol=native#text/plain
 rtl/gba/prt0.as svneol=native#text/plain
 rtl/gba/prt0.as svneol=native#text/plain
+rtl/gba/rtl.cfg svneol=native#text/plain
 rtl/gba/rtldefs.inc svneol=native#text/plain
 rtl/gba/rtldefs.inc svneol=native#text/plain
 rtl/gba/sysdir.inc svneol=native#text/plain
 rtl/gba/sysdir.inc svneol=native#text/plain
 rtl/gba/sysfile.inc svneol=native#text/plain
 rtl/gba/sysfile.inc svneol=native#text/plain
@@ -8758,6 +8795,7 @@ rtl/nds/ndsbiosh.inc svneol=native#text/plain
 rtl/nds/ndsh.inc svneol=native#text/plain
 rtl/nds/ndsh.inc svneol=native#text/plain
 rtl/nds/prt07.as svneol=native#text/plain
 rtl/nds/prt07.as svneol=native#text/plain
 rtl/nds/prt09.as svneol=native#text/plain
 rtl/nds/prt09.as svneol=native#text/plain
+rtl/nds/rtl.cfg svneol=native#text/plain
 rtl/nds/rtldefs.inc svneol=native#text/plain
 rtl/nds/rtldefs.inc svneol=native#text/plain
 rtl/nds/sysdir.inc svneol=native#text/plain
 rtl/nds/sysdir.inc svneol=native#text/plain
 rtl/nds/sysfile.inc svneol=native#text/plain
 rtl/nds/sysfile.inc svneol=native#text/plain
@@ -9294,6 +9332,7 @@ rtl/wii/classes.pp svneol=native#text/plain
 rtl/wii/dos.pp svneol=native#text/plain
 rtl/wii/dos.pp svneol=native#text/plain
 rtl/wii/libc.inc svneol=native#text/plain
 rtl/wii/libc.inc svneol=native#text/plain
 rtl/wii/libch.inc svneol=native#text/plain
 rtl/wii/libch.inc svneol=native#text/plain
+rtl/wii/rtl.cfg svneol=native#text/plain
 rtl/wii/rtldefs.inc svneol=native#text/plain
 rtl/wii/rtldefs.inc svneol=native#text/plain
 rtl/wii/sysdir.inc svneol=native#text/plain
 rtl/wii/sysdir.inc svneol=native#text/plain
 rtl/wii/sysfile.inc svneol=native#text/plain
 rtl/wii/sysfile.inc svneol=native#text/plain
@@ -10219,6 +10258,7 @@ tests/tbs/tb0466.pp svneol=native#text/plain
 tests/tbs/tb0467.pp svneol=native#text/plain
 tests/tbs/tb0467.pp svneol=native#text/plain
 tests/tbs/tb0468.pp svneol=native#text/plain
 tests/tbs/tb0468.pp svneol=native#text/plain
 tests/tbs/tb0468a.pas svneol=native#text/plain
 tests/tbs/tb0468a.pas svneol=native#text/plain
+tests/tbs/tb0468a.pp svneol=native#text/plain
 tests/tbs/tb0469.pp svneol=native#text/plain
 tests/tbs/tb0469.pp svneol=native#text/plain
 tests/tbs/tb0470.pp svneol=native#text/plain
 tests/tbs/tb0470.pp svneol=native#text/plain
 tests/tbs/tb0471.pp svneol=native#text/plain
 tests/tbs/tb0471.pp svneol=native#text/plain
@@ -10326,6 +10366,7 @@ tests/tbs/tb0568.pp svneol=native#text/plain
 tests/tbs/tb0569.pp svneol=native#text/pascal
 tests/tbs/tb0569.pp svneol=native#text/pascal
 tests/tbs/tb0570.pp svneol=native#text/plain
 tests/tbs/tb0570.pp svneol=native#text/plain
 tests/tbs/tb0571.pas svneol=native#text/plain
 tests/tbs/tb0571.pas svneol=native#text/plain
+tests/tbs/tb0571.pp svneol=native#text/plain
 tests/tbs/tb0572.pp svneol=native#text/plain
 tests/tbs/tb0572.pp svneol=native#text/plain
 tests/tbs/tb0573.pp svneol=native#text/plain
 tests/tbs/tb0573.pp svneol=native#text/plain
 tests/tbs/tb0574.pp svneol=native#text/pascal
 tests/tbs/tb0574.pp svneol=native#text/pascal
@@ -10367,6 +10408,7 @@ tests/tbs/tb0607.pp svneol=native#text/plain
 tests/tbs/tb0608.pp svneol=native#text/pascal
 tests/tbs/tb0608.pp svneol=native#text/pascal
 tests/tbs/tb0609.pp svneol=native#text/plain
 tests/tbs/tb0609.pp svneol=native#text/plain
 tests/tbs/tb205.pp svneol=native#text/plain
 tests/tbs/tb205.pp svneol=native#text/plain
+tests/tbs/tb610.pp svneol=native#text/pascal
 tests/tbs/tbs0594.pp svneol=native#text/pascal
 tests/tbs/tbs0594.pp svneol=native#text/pascal
 tests/tbs/ub0060.pp svneol=native#text/plain
 tests/tbs/ub0060.pp svneol=native#text/plain
 tests/tbs/ub0069.pp svneol=native#text/plain
 tests/tbs/ub0069.pp svneol=native#text/plain
@@ -10474,6 +10516,13 @@ tests/test/cg/obj/beos/i386/tcext4.o -text
 tests/test/cg/obj/beos/i386/tcext5.o -text
 tests/test/cg/obj/beos/i386/tcext5.o -text
 tests/test/cg/obj/cpptcl1.cpp svneol=native#text/plain
 tests/test/cg/obj/cpptcl1.cpp svneol=native#text/plain
 tests/test/cg/obj/cpptcl2.cpp svneol=native#text/plain
 tests/test/cg/obj/cpptcl2.cpp svneol=native#text/plain
+tests/test/cg/obj/darwin/aarch64/cpptcl1.o -text
+tests/test/cg/obj/darwin/aarch64/cpptcl2.o -text
+tests/test/cg/obj/darwin/aarch64/ctest.o -text
+tests/test/cg/obj/darwin/aarch64/tcext3.o -text
+tests/test/cg/obj/darwin/aarch64/tcext4.o -text
+tests/test/cg/obj/darwin/aarch64/tcext5.o -text
+tests/test/cg/obj/darwin/aarch64/tcext6.o -text
 tests/test/cg/obj/darwin/arm/cpptcl1.o -text
 tests/test/cg/obj/darwin/arm/cpptcl1.o -text
 tests/test/cg/obj/darwin/arm/cpptcl2.o -text
 tests/test/cg/obj/darwin/arm/cpptcl2.o -text
 tests/test/cg/obj/darwin/arm/ctest.o -text
 tests/test/cg/obj/darwin/arm/ctest.o -text
@@ -10769,6 +10818,7 @@ tests/test/cg/tcnvint3.pp svneol=native#text/plain
 tests/test/cg/tcnvint4.pp svneol=native#text/plain
 tests/test/cg/tcnvint4.pp svneol=native#text/plain
 tests/test/cg/tcnvint5.pp svneol=native#text/plain
 tests/test/cg/tcnvint5.pp svneol=native#text/plain
 tests/test/cg/tcnvint6.pp svneol=native#text/plain
 tests/test/cg/tcnvint6.pp svneol=native#text/plain
+tests/test/cg/tcnvint7.pp svneol=native#text/plain
 tests/test/cg/tcnvptr.pp svneol=native#text/plain
 tests/test/cg/tcnvptr.pp svneol=native#text/plain
 tests/test/cg/tcnvset.pp svneol=native#text/plain
 tests/test/cg/tcnvset.pp svneol=native#text/plain
 tests/test/cg/tcnvstr1.pp svneol=native#text/plain
 tests/test/cg/tcnvstr1.pp svneol=native#text/plain
@@ -11648,6 +11698,8 @@ tests/test/tgenconstraint8.pp svneol=native#text/pascal
 tests/test/tgenconstraint9.pp svneol=native#text/pascal
 tests/test/tgenconstraint9.pp svneol=native#text/pascal
 tests/test/tgeneric1.pp svneol=native#text/plain
 tests/test/tgeneric1.pp svneol=native#text/plain
 tests/test/tgeneric10.pp svneol=native#text/plain
 tests/test/tgeneric10.pp svneol=native#text/plain
+tests/test/tgeneric100.pp svneol=native#text/pascal
+tests/test/tgeneric101.pp svneol=native#text/pascal
 tests/test/tgeneric11.pp svneol=native#text/plain
 tests/test/tgeneric11.pp svneol=native#text/plain
 tests/test/tgeneric12.pp svneol=native#text/plain
 tests/test/tgeneric12.pp svneol=native#text/plain
 tests/test/tgeneric13.pp svneol=native#text/plain
 tests/test/tgeneric13.pp svneol=native#text/plain
@@ -11744,6 +11796,7 @@ tests/test/tgeneric95.pp svneol=native#text/pascal
 tests/test/tgeneric96.pp svneol=native#text/pascal
 tests/test/tgeneric96.pp svneol=native#text/pascal
 tests/test/tgeneric97.pp svneol=native#text/pascal
 tests/test/tgeneric97.pp svneol=native#text/pascal
 tests/test/tgeneric98.pp svneol=native#text/pascal
 tests/test/tgeneric98.pp svneol=native#text/pascal
+tests/test/tgeneric99.pp svneol=native#text/pascal
 tests/test/tgoto.pp svneol=native#text/plain
 tests/test/tgoto.pp svneol=native#text/plain
 tests/test/theap.pp svneol=native#text/plain
 tests/test/theap.pp svneol=native#text/plain
 tests/test/theapthread.pp svneol=native#text/plain
 tests/test/theapthread.pp svneol=native#text/plain
@@ -11904,6 +11957,7 @@ tests/test/tmsg3.pp svneol=native#text/plain
 tests/test/tmsg4.pp svneol=native#text/plain
 tests/test/tmsg4.pp svneol=native#text/plain
 tests/test/tmt1.pp svneol=native#text/plain
 tests/test/tmt1.pp svneol=native#text/plain
 tests/test/tmul1.pp svneol=native#text/pascal
 tests/test/tmul1.pp svneol=native#text/pascal
+tests/test/tnest1.pp svneol=native#text/plain
 tests/test/tnoext1.pp svneol=native#text/plain
 tests/test/tnoext1.pp svneol=native#text/plain
 tests/test/tnoext2.pp svneol=native#text/plain
 tests/test/tnoext2.pp svneol=native#text/plain
 tests/test/tnoext3.pp svneol=native#text/plain
 tests/test/tnoext3.pp svneol=native#text/plain
@@ -12093,6 +12147,7 @@ tests/test/tover1.pp svneol=native#text/plain
 tests/test/tover2.pp svneol=native#text/plain
 tests/test/tover2.pp svneol=native#text/plain
 tests/test/tover3.pp svneol=native#text/plain
 tests/test/tover3.pp svneol=native#text/plain
 tests/test/tover4.pas svneol=native#text/plain
 tests/test/tover4.pas svneol=native#text/plain
+tests/test/tover4.pp svneol=native#text/plain
 tests/test/tpackrec.pp svneol=native#text/plain
 tests/test/tpackrec.pp svneol=native#text/plain
 tests/test/tparray1.pp svneol=native#text/plain
 tests/test/tparray1.pp svneol=native#text/plain
 tests/test/tparray10.pp svneol=native#text/plain
 tests/test/tparray10.pp svneol=native#text/plain
@@ -12342,6 +12397,7 @@ tests/test/tunit3.pp svneol=native#text/plain
 tests/test/tunroll1.pp svneol=native#text/plain
 tests/test/tunroll1.pp svneol=native#text/plain
 tests/test/tutf81.pp svneol=native#text/plain
 tests/test/tutf81.pp svneol=native#text/plain
 tests/test/tutf82.pp svneol=native#text/plain
 tests/test/tutf82.pp svneol=native#text/plain
+tests/test/tutf8cpl.pp svneol=native#text/plain
 tests/test/tvarpropsetter1.pp svneol=native#text/plain
 tests/test/tvarpropsetter1.pp svneol=native#text/plain
 tests/test/tvarpropsetter2.pp svneol=native#text/plain
 tests/test/tvarpropsetter2.pp svneol=native#text/plain
 tests/test/tvarset1.pp svneol=native#text/plain
 tests/test/tvarset1.pp svneol=native#text/plain
@@ -12391,6 +12447,7 @@ tests/test/ugeneric96a.pp svneol=native#text/pascal
 tests/test/ugeneric96b.pp svneol=native#text/pascal
 tests/test/ugeneric96b.pp svneol=native#text/pascal
 tests/test/ugeneric96c.pp svneol=native#text/pascal
 tests/test/ugeneric96c.pp svneol=native#text/pascal
 tests/test/ugeneric96d.pp svneol=native#text/pascal
 tests/test/ugeneric96d.pp svneol=native#text/pascal
+tests/test/ugeneric99.pp svneol=native#text/pascal
 tests/test/uhintdir.pp svneol=native#text/plain
 tests/test/uhintdir.pp svneol=native#text/plain
 tests/test/uhlp3.pp svneol=native#text/pascal
 tests/test/uhlp3.pp svneol=native#text/pascal
 tests/test/uhlp31.pp svneol=native#text/pascal
 tests/test/uhlp31.pp svneol=native#text/pascal
@@ -12605,6 +12662,7 @@ tests/test/units/system/tslice2.pp svneol=native#text/plain
 tests/test/units/system/tstr1.pp svneol=native#text/pascal
 tests/test/units/system/tstr1.pp svneol=native#text/pascal
 tests/test/units/system/tstring.pp svneol=native#text/plain
 tests/test/units/system/tstring.pp svneol=native#text/plain
 tests/test/units/system/ttrig.pas svneol=native#text/plain
 tests/test/units/system/ttrig.pas svneol=native#text/plain
+tests/test/units/system/ttrig.pp svneol=native#text/plain
 tests/test/units/system/ttrunc.pp svneol=native#text/plain
 tests/test/units/system/ttrunc.pp svneol=native#text/plain
 tests/test/units/system/tval.inc svneol=native#text/plain
 tests/test/units/system/tval.inc svneol=native#text/plain
 tests/test/units/system/tval.pp svneol=native#text/plain
 tests/test/units/system/tval.pp svneol=native#text/plain
@@ -12888,6 +12946,7 @@ tests/webtbf/tw2128.pp svneol=native#text/plain
 tests/webtbf/tw2129.pp svneol=native#text/plain
 tests/webtbf/tw2129.pp svneol=native#text/plain
 tests/webtbf/tw21363.pp svneol=native#text/pascal
 tests/webtbf/tw21363.pp svneol=native#text/pascal
 tests/webtbf/tw21466.pas svneol=native#text/pascal
 tests/webtbf/tw21466.pas svneol=native#text/pascal
+tests/webtbf/tw21466.pp svneol=native#text/pascal
 tests/webtbf/tw2154.pp svneol=native#text/plain
 tests/webtbf/tw2154.pp svneol=native#text/plain
 tests/webtbf/tw21566.pp svneol=native#text/pascal
 tests/webtbf/tw21566.pp svneol=native#text/pascal
 tests/webtbf/tw2174.pp svneol=native#text/plain
 tests/webtbf/tw2174.pp svneol=native#text/plain
@@ -13092,6 +13151,7 @@ tests/webtbf/tw8588.pp svneol=native#text/plain
 tests/webtbf/tw8591.pp svneol=native#text/plain
 tests/webtbf/tw8591.pp svneol=native#text/plain
 tests/webtbf/tw8717.pp svneol=native#text/plain
 tests/webtbf/tw8717.pp svneol=native#text/plain
 tests/webtbf/tw8738.pas svneol=native#text/plain
 tests/webtbf/tw8738.pas svneol=native#text/plain
+tests/webtbf/tw8738.pp svneol=native#text/plain
 tests/webtbf/tw8777a.pp svneol=native#text/plain
 tests/webtbf/tw8777a.pp svneol=native#text/plain
 tests/webtbf/tw8777b.pp svneol=native#text/plain
 tests/webtbf/tw8777b.pp svneol=native#text/plain
 tests/webtbf/tw8777c.pp svneol=native#text/plain
 tests/webtbf/tw8777c.pp svneol=native#text/plain
@@ -13513,6 +13573,7 @@ tests/webtbs/tw1430.pp svneol=native#text/plain
 tests/webtbs/tw14307.pp svneol=native#text/plain
 tests/webtbs/tw14307.pp svneol=native#text/plain
 tests/webtbs/tw14315.pp svneol=native#text/plain
 tests/webtbs/tw14315.pp svneol=native#text/plain
 tests/webtbs/tw1433.pp svneol=native#text/plain
 tests/webtbs/tw1433.pp svneol=native#text/plain
+tests/webtbs/tw14347.pp svneol=native#text/pascal
 tests/webtbs/tw14363.pp svneol=native#text/plain
 tests/webtbs/tw14363.pp svneol=native#text/plain
 tests/webtbs/tw14388.pp svneol=native#text/pascal
 tests/webtbs/tw14388.pp svneol=native#text/pascal
 tests/webtbs/tw14403.pp svneol=native#text/plain
 tests/webtbs/tw14403.pp svneol=native#text/plain
@@ -13712,6 +13773,7 @@ tests/webtbs/tw17838.pp svneol=native#text/pascal
 tests/webtbs/tw17846.pp svneol=native#text/plain
 tests/webtbs/tw17846.pp svneol=native#text/plain
 tests/webtbs/tw17862.pp svneol=native#text/plain
 tests/webtbs/tw17862.pp svneol=native#text/plain
 tests/webtbs/tw17904.pas svneol=native#text/plain
 tests/webtbs/tw17904.pas svneol=native#text/plain
+tests/webtbs/tw17904.pp svneol=native#text/plain
 tests/webtbs/tw17907/main/main.pas svneol=native#text/plain
 tests/webtbs/tw17907/main/main.pas svneol=native#text/plain
 tests/webtbs/tw17907/test.bat svneol=native#text/plain
 tests/webtbs/tw17907/test.bat svneol=native#text/plain
 tests/webtbs/tw17907/unit1/unit0001.pas svneol=native#text/plain
 tests/webtbs/tw17907/unit1/unit0001.pas svneol=native#text/plain
@@ -13817,6 +13879,7 @@ tests/webtbs/tw19651.pp svneol=native#text/plain
 tests/webtbs/tw19697.pp svneol=native#text/pascal
 tests/webtbs/tw19697.pp svneol=native#text/pascal
 tests/webtbs/tw19700.pp svneol=native#text/plain
 tests/webtbs/tw19700.pp svneol=native#text/plain
 tests/webtbs/tw19701.pas svneol=native#text/plain
 tests/webtbs/tw19701.pas svneol=native#text/plain
+tests/webtbs/tw19701.pp svneol=native#text/plain
 tests/webtbs/tw19851a.pp svneol=native#text/pascal
 tests/webtbs/tw19851a.pp svneol=native#text/pascal
 tests/webtbs/tw19851b.pp svneol=native#text/pascal
 tests/webtbs/tw19851b.pp svneol=native#text/pascal
 tests/webtbs/tw19864.pp svneol=native#text/pascal
 tests/webtbs/tw19864.pp svneol=native#text/pascal
@@ -14253,6 +14316,7 @@ tests/webtbs/tw2737.pp svneol=native#text/plain
 tests/webtbs/tw2738.pp svneol=native#text/plain
 tests/webtbs/tw2738.pp svneol=native#text/plain
 tests/webtbs/tw2739.pp svneol=native#text/plain
 tests/webtbs/tw2739.pp svneol=native#text/plain
 tests/webtbs/tw27424.pp svneol=native#text/pascal
 tests/webtbs/tw27424.pp svneol=native#text/pascal
+tests/webtbs/tw27515.pp svneol=native#text/pascal
 tests/webtbs/tw2758.pp svneol=native#text/plain
 tests/webtbs/tw2758.pp svneol=native#text/plain
 tests/webtbs/tw2763.pp svneol=native#text/plain
 tests/webtbs/tw2763.pp svneol=native#text/plain
 tests/webtbs/tw2765.pp svneol=native#text/plain
 tests/webtbs/tw2765.pp svneol=native#text/plain

+ 28 - 3
Makefile

@@ -1,8 +1,8 @@
 #
 #
-# Don't edit, this file is generated by FPCMake Version 2.0.0 [2015-01-04 rev 29399]
+# Don't edit, this file is generated by FPCMake Version 2.0.0 [2015-02-23 rev 29972]
 #
 #
 default: help
 default: help
-MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim i386-android i386-aros m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii powerpc-aix sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-netbsd x86_64-solaris x86_64-openbsd x86_64-darwin x86_64-win64 x86_64-embedded x86_64-dragonfly arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian arm-android powerpc64-linux powerpc64-darwin powerpc64-embedded powerpc64-aix avr-embedded armeb-linux armeb-embedded mips-linux mipsel-linux mipsel-embedded mipsel-android jvm-java jvm-android i8086-msdos
+MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim i386-android i386-aros m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii powerpc-aix sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-netbsd x86_64-solaris x86_64-openbsd x86_64-darwin x86_64-win64 x86_64-embedded x86_64-iphonesim x86_64-dragonfly arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian arm-android powerpc64-linux powerpc64-darwin powerpc64-embedded powerpc64-aix avr-embedded armeb-linux armeb-embedded mips-linux mipsel-linux mipsel-embedded mipsel-android jvm-java jvm-android i8086-msdos aarch64-darwin
 BSDs = freebsd netbsd openbsd darwin dragonfly
 BSDs = freebsd netbsd openbsd darwin dragonfly
 UNIXs = linux $(BSDs) solaris qnx haiku aix
 UNIXs = linux $(BSDs) solaris qnx haiku aix
 LIMIT83fs = go32v2 os2 emx watcom msdos
 LIMIT83fs = go32v2 os2 emx watcom msdos
@@ -379,6 +379,9 @@ endif
 ifeq ($(CPU_TARGET),avr)
 ifeq ($(CPU_TARGET),avr)
 PPSUF=avr
 PPSUF=avr
 endif
 endif
+ifeq ($(CPU_TARGET),aarch64)
+PPSUF=a64
+endif
 ifdef CROSSCOMPILE
 ifdef CROSSCOMPILE
 ifneq ($(CPU_TARGET),jvm)
 ifneq ($(CPU_TARGET),jvm)
 PPPRE=ppcross
 PPPRE=ppcross
@@ -467,7 +470,7 @@ endif
 endif
 endif
 BuildOnlyBaseCPUs=jvm
 BuildOnlyBaseCPUs=jvm
 ifneq ($(wildcard utils),)
 ifneq ($(wildcard utils),)
-NOUTILSTARGETS=embedded gba msdos $(BuildOnlyBaseCPUs)
+NOUTILSTARGETS=embedded gba nds msdos $(BuildOnlyBaseCPUs)
 ifeq ($(findstring $(OS_TARGET),$(NOUTILSTARGETS)),)
 ifeq ($(findstring $(OS_TARGET),$(NOUTILSTARGETS)),)
 ifdef BUILDFULLNATIVE
 ifdef BUILDFULLNATIVE
 UTILS=1
 UTILS=1
@@ -633,6 +636,9 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 ifeq ($(FULL_TARGET),x86_64-embedded)
 override TARGET_DIRS+=compiler rtl utils packages ide installer
 override TARGET_DIRS+=compiler rtl utils packages ide installer
 endif
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+override TARGET_DIRS+=compiler rtl utils packages ide installer
+endif
 ifeq ($(FULL_TARGET),x86_64-dragonfly)
 ifeq ($(FULL_TARGET),x86_64-dragonfly)
 override TARGET_DIRS+=compiler rtl utils packages ide installer
 override TARGET_DIRS+=compiler rtl utils packages ide installer
 endif
 endif
@@ -705,6 +711,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 ifeq ($(FULL_TARGET),i8086-msdos)
 override TARGET_DIRS+=compiler rtl utils packages ide installer
 override TARGET_DIRS+=compiler rtl utils packages ide installer
 endif
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+override TARGET_DIRS+=compiler rtl utils packages ide installer
+endif
 override INSTALL_FPCPACKAGE=y
 override INSTALL_FPCPACKAGE=y
 ifdef REQUIRE_UNITSDIR
 ifdef REQUIRE_UNITSDIR
 override UNITSDIR+=$(REQUIRE_UNITSDIR)
 override UNITSDIR+=$(REQUIRE_UNITSDIR)
@@ -2280,6 +2289,14 @@ TARGET_DIRS_PACKAGES=1
 TARGET_DIRS_IDE=1
 TARGET_DIRS_IDE=1
 TARGET_DIRS_INSTALLER=1
 TARGET_DIRS_INSTALLER=1
 endif
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_IDE=1
+TARGET_DIRS_INSTALLER=1
+endif
 ifeq ($(FULL_TARGET),x86_64-dragonfly)
 ifeq ($(FULL_TARGET),x86_64-dragonfly)
 TARGET_DIRS_COMPILER=1
 TARGET_DIRS_COMPILER=1
 TARGET_DIRS_RTL=1
 TARGET_DIRS_RTL=1
@@ -2472,6 +2489,14 @@ TARGET_DIRS_PACKAGES=1
 TARGET_DIRS_IDE=1
 TARGET_DIRS_IDE=1
 TARGET_DIRS_INSTALLER=1
 TARGET_DIRS_INSTALLER=1
 endif
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_IDE=1
+TARGET_DIRS_INSTALLER=1
+endif
 ifdef TARGET_DIRS_COMPILER
 ifdef TARGET_DIRS_COMPILER
 compiler_all:
 compiler_all:
 	$(MAKE) -C compiler all
 	$(MAKE) -C compiler all

+ 4 - 1
Makefile.fpc

@@ -79,6 +79,9 @@ endif
 ifeq ($(CPU_TARGET),avr)
 ifeq ($(CPU_TARGET),avr)
 PPSUF=avr
 PPSUF=avr
 endif
 endif
+ifeq ($(CPU_TARGET),aarch64)
+PPSUF=a64
+endif
 
 
 # cross compilers uses full cpu_target, not just ppc-suffix
 # cross compilers uses full cpu_target, not just ppc-suffix
 # (except if the target cannot run a native compiler)
 # (except if the target cannot run a native compiler)
@@ -204,7 +207,7 @@ endif
 BuildOnlyBaseCPUs=jvm
 BuildOnlyBaseCPUs=jvm
 
 
 ifneq ($(wildcard utils),)
 ifneq ($(wildcard utils),)
-NOUTILSTARGETS=embedded gba msdos $(BuildOnlyBaseCPUs)
+NOUTILSTARGETS=embedded gba nds msdos $(BuildOnlyBaseCPUs)
 ifeq ($(findstring $(OS_TARGET),$(NOUTILSTARGETS)),)
 ifeq ($(findstring $(OS_TARGET),$(NOUTILSTARGETS)),)
 ifdef BUILDFULLNATIVE
 ifdef BUILDFULLNATIVE
 UTILS=1
 UTILS=1

+ 65 - 10
compiler/Makefile

@@ -1,8 +1,8 @@
 #
 #
-# Don't edit, this file is generated by FPCMake Version 2.0.0 [2014-12-07 rev 29213]
+# Don't edit, this file is generated by FPCMake Version 2.0.0 [2015-02-23 rev 29972]
 #
 #
 default: all
 default: all
-MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim i386-android i386-aros m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii powerpc-aix sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-netbsd x86_64-solaris x86_64-openbsd x86_64-darwin x86_64-win64 x86_64-embedded x86_64-dragonfly arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian arm-android powerpc64-linux powerpc64-darwin powerpc64-embedded powerpc64-aix avr-embedded armeb-linux armeb-embedded mips-linux mipsel-linux mipsel-embedded mipsel-android jvm-java jvm-android i8086-msdos
+MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim i386-android i386-aros m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii powerpc-aix sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-netbsd x86_64-solaris x86_64-openbsd x86_64-darwin x86_64-win64 x86_64-embedded x86_64-iphonesim x86_64-dragonfly arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian arm-android powerpc64-linux powerpc64-darwin powerpc64-embedded powerpc64-aix avr-embedded armeb-linux armeb-embedded mips-linux mipsel-linux mipsel-embedded mipsel-android jvm-java jvm-android i8086-msdos aarch64-darwin
 BSDs = freebsd netbsd openbsd darwin dragonfly
 BSDs = freebsd netbsd openbsd darwin dragonfly
 UNIXs = linux $(BSDs) solaris qnx haiku aix
 UNIXs = linux $(BSDs) solaris qnx haiku aix
 LIMIT83fs = go32v2 os2 emx watcom msdos
 LIMIT83fs = go32v2 os2 emx watcom msdos
@@ -328,7 +328,7 @@ endif
 override PACKAGE_NAME=compiler
 override PACKAGE_NAME=compiler
 override PACKAGE_VERSION=3.1.1
 override PACKAGE_VERSION=3.1.1
 unexport FPC_VERSION FPC_COMPILERINFO
 unexport FPC_VERSION FPC_COMPILERINFO
-CYCLETARGETS=i386 powerpc sparc arm x86_64 powerpc64 m68k armeb mipsel mips avr jvm i8086
+CYCLETARGETS=i386 powerpc sparc arm x86_64 powerpc64 m68k armeb mipsel mips avr jvm i8086 aarch64
 ALLTARGETS=$(CYCLETARGETS)
 ALLTARGETS=$(CYCLETARGETS)
 ifdef ALPHA
 ifdef ALPHA
 PPC_TARGET=alpha
 PPC_TARGET=alpha
@@ -372,6 +372,9 @@ endif
 ifdef I8086
 ifdef I8086
 PPC_TARGET=i8086
 PPC_TARGET=i8086
 endif
 endif
+ifdef AARCH64
+PPC_TARGET=aarch64
+endif
 ifndef PPC_TARGET
 ifndef PPC_TARGET
 PPC_TARGET=$(CPU_TARGET)
 PPC_TARGET=$(CPU_TARGET)
 endif
 endif
@@ -469,6 +472,9 @@ endif
 ifeq ($(CPC_TARGET),i8086)
 ifeq ($(CPC_TARGET),i8086)
 CPUSUF=8086
 CPUSUF=8086
 endif
 endif
+ifeq ($(CPC_TARGET),aarch64)
+CPUSUF=a64
+endif
 NOCPUDEF=1
 NOCPUDEF=1
 MSGFILE=msg/error$(FPCLANG).msg
 MSGFILE=msg/error$(FPCLANG).msg
 SVNVERSION:=$(firstword $(wildcard $(addsuffix /svnversion$(SRCEXEEXT),$(SEARCHPATH))))
 SVNVERSION:=$(firstword $(wildcard $(addsuffix /svnversion$(SRCEXEEXT),$(SEARCHPATH))))
@@ -545,6 +551,9 @@ endif
 ifeq ($(OS_TARGET),msdos)
 ifeq ($(OS_TARGET),msdos)
 NoNativeBinaries=1
 NoNativeBinaries=1
 endif
 endif
+ifeq ($(OS_TARGET),nds)
+NoNativeBinaries=1
+endif
 ifeq ($(FULL_TARGET),i386-linux)
 ifeq ($(FULL_TARGET),i386-linux)
 override TARGET_DIRS+=utils
 override TARGET_DIRS+=utils
 endif
 endif
@@ -704,6 +713,9 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 ifeq ($(FULL_TARGET),x86_64-embedded)
 override TARGET_DIRS+=utils
 override TARGET_DIRS+=utils
 endif
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+override TARGET_DIRS+=utils
+endif
 ifeq ($(FULL_TARGET),x86_64-dragonfly)
 ifeq ($(FULL_TARGET),x86_64-dragonfly)
 override TARGET_DIRS+=utils
 override TARGET_DIRS+=utils
 endif
 endif
@@ -776,6 +788,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 ifeq ($(FULL_TARGET),i8086-msdos)
 override TARGET_DIRS+=utils
 override TARGET_DIRS+=utils
 endif
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+override TARGET_DIRS+=utils
+endif
 ifeq ($(FULL_TARGET),i386-linux)
 ifeq ($(FULL_TARGET),i386-linux)
 override TARGET_PROGRAMS+=pp
 override TARGET_PROGRAMS+=pp
 endif
 endif
@@ -935,6 +950,9 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 ifeq ($(FULL_TARGET),x86_64-embedded)
 override TARGET_PROGRAMS+=pp
 override TARGET_PROGRAMS+=pp
 endif
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+override TARGET_PROGRAMS+=pp
+endif
 ifeq ($(FULL_TARGET),x86_64-dragonfly)
 ifeq ($(FULL_TARGET),x86_64-dragonfly)
 override TARGET_PROGRAMS+=pp
 override TARGET_PROGRAMS+=pp
 endif
 endif
@@ -1007,6 +1025,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 ifeq ($(FULL_TARGET),i8086-msdos)
 override TARGET_PROGRAMS+=pp
 override TARGET_PROGRAMS+=pp
 endif
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+override TARGET_PROGRAMS+=pp
+endif
 override INSTALL_FPCPACKAGE=y
 override INSTALL_FPCPACKAGE=y
 ifeq ($(FULL_TARGET),i386-linux)
 ifeq ($(FULL_TARGET),i386-linux)
 override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
 override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
@@ -1167,6 +1188,9 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 ifeq ($(FULL_TARGET),x86_64-embedded)
 override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
 override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
 endif
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
+endif
 ifeq ($(FULL_TARGET),x86_64-dragonfly)
 ifeq ($(FULL_TARGET),x86_64-dragonfly)
 override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
 override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
 endif
 endif
@@ -1239,6 +1263,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 ifeq ($(FULL_TARGET),i8086-msdos)
 override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
 override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
 endif
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
+endif
 ifeq ($(FULL_TARGET),i386-linux)
 ifeq ($(FULL_TARGET),i386-linux)
 override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
 override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
 endif
 endif
@@ -1398,6 +1425,9 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 ifeq ($(FULL_TARGET),x86_64-embedded)
 override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
 override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
 endif
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
+endif
 ifeq ($(FULL_TARGET),x86_64-dragonfly)
 ifeq ($(FULL_TARGET),x86_64-dragonfly)
 override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
 override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
 endif
 endif
@@ -1470,6 +1500,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 ifeq ($(FULL_TARGET),i8086-msdos)
 override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
 override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
 endif
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
+endif
 ifeq ($(FULL_TARGET),i386-linux)
 ifeq ($(FULL_TARGET),i386-linux)
 override COMPILER_TARGETDIR+=.
 override COMPILER_TARGETDIR+=.
 endif
 endif
@@ -1629,6 +1662,9 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 ifeq ($(FULL_TARGET),x86_64-embedded)
 override COMPILER_TARGETDIR+=.
 override COMPILER_TARGETDIR+=.
 endif
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+override COMPILER_TARGETDIR+=.
+endif
 ifeq ($(FULL_TARGET),x86_64-dragonfly)
 ifeq ($(FULL_TARGET),x86_64-dragonfly)
 override COMPILER_TARGETDIR+=.
 override COMPILER_TARGETDIR+=.
 endif
 endif
@@ -1701,6 +1737,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 ifeq ($(FULL_TARGET),i8086-msdos)
 override COMPILER_TARGETDIR+=.
 override COMPILER_TARGETDIR+=.
 endif
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+override COMPILER_TARGETDIR+=.
+endif
 ifeq ($(FULL_TARGET),i386-linux)
 ifeq ($(FULL_TARGET),i386-linux)
 override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 endif
 endif
@@ -1860,6 +1899,9 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 ifeq ($(FULL_TARGET),x86_64-embedded)
 override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 endif
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
+endif
 ifeq ($(FULL_TARGET),x86_64-dragonfly)
 ifeq ($(FULL_TARGET),x86_64-dragonfly)
 override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 endif
 endif
@@ -1932,6 +1974,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 ifeq ($(FULL_TARGET),i8086-msdos)
 override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 endif
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
+endif
 ifdef REQUIRE_UNITSDIR
 ifdef REQUIRE_UNITSDIR
 override UNITSDIR+=$(REQUIRE_UNITSDIR)
 override UNITSDIR+=$(REQUIRE_UNITSDIR)
 endif
 endif
@@ -2741,6 +2786,9 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 ifeq ($(FULL_TARGET),x86_64-embedded)
 REQUIRE_PACKAGES_RTL=1
 REQUIRE_PACKAGES_RTL=1
 endif
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+REQUIRE_PACKAGES_RTL=1
+endif
 ifeq ($(FULL_TARGET),x86_64-dragonfly)
 ifeq ($(FULL_TARGET),x86_64-dragonfly)
 REQUIRE_PACKAGES_RTL=1
 REQUIRE_PACKAGES_RTL=1
 endif
 endif
@@ -2813,6 +2861,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 ifeq ($(FULL_TARGET),i8086-msdos)
 REQUIRE_PACKAGES_RTL=1
 REQUIRE_PACKAGES_RTL=1
 endif
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+REQUIRE_PACKAGES_RTL=1
+endif
 ifdef REQUIRE_PACKAGES_RTL
 ifdef REQUIRE_PACKAGES_RTL
 PACKAGEDIR_RTL:=$(firstword $(subst /Makefile.fpc,,$(strip $(wildcard $(addsuffix /rtl/Makefile.fpc,$(PACKAGESDIR))))))
 PACKAGEDIR_RTL:=$(firstword $(subst /Makefile.fpc,,$(strip $(wildcard $(addsuffix /rtl/Makefile.fpc,$(PACKAGESDIR))))))
 ifneq ($(PACKAGEDIR_RTL),)
 ifneq ($(PACKAGEDIR_RTL),)
@@ -2965,7 +3016,7 @@ ifdef CREATESHARED
 override FPCOPT+=-Cg
 override FPCOPT+=-Cg
 endif
 endif
 ifneq ($(findstring $(OS_TARGET),dragonfly freebsd openbsd netbsd linux solaris),)
 ifneq ($(findstring $(OS_TARGET),dragonfly freebsd openbsd netbsd linux solaris),)
-ifeq ($(CPU_TARGET),x86_64)
+ifneq ($(findstring $(CPU_TARGET),x86_64 mips mipsel),)
 override FPCOPT+=-Cg
 override FPCOPT+=-Cg
 endif
 endif
 endif
 endif
@@ -3604,6 +3655,9 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 ifeq ($(FULL_TARGET),x86_64-embedded)
 TARGET_DIRS_UTILS=1
 TARGET_DIRS_UTILS=1
 endif
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+TARGET_DIRS_UTILS=1
+endif
 ifeq ($(FULL_TARGET),x86_64-dragonfly)
 ifeq ($(FULL_TARGET),x86_64-dragonfly)
 TARGET_DIRS_UTILS=1
 TARGET_DIRS_UTILS=1
 endif
 endif
@@ -3676,6 +3730,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 ifeq ($(FULL_TARGET),i8086-msdos)
 TARGET_DIRS_UTILS=1
 TARGET_DIRS_UTILS=1
 endif
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+TARGET_DIRS_UTILS=1
+endif
 ifdef TARGET_DIRS_UTILS
 ifdef TARGET_DIRS_UTILS
 utils_all:
 utils_all:
 	$(MAKE) -C utils all
 	$(MAKE) -C utils all
@@ -3804,7 +3861,7 @@ INSTALLEXEFILE=$(PPCROSSNAME)
 else
 else
 INSTALLEXEFILE=$(EXENAME)
 INSTALLEXEFILE=$(EXENAME)
 endif
 endif
-PPC_TARGETS=i386 m68k powerpc sparc arm armeb x86_64 powerpc64 alpha vis ia64 mips mipsel avr jvm i8086
+PPC_TARGETS=i386 m68k powerpc sparc arm armeb x86_64 powerpc64 alpha vis ia64 mips mipsel avr jvm i8086 aarch64
 INSTALL_TARGETS=$(addsuffix _exe_install,$(sort $(CYCLETARGETS) $(PPC_TARGETS)))
 INSTALL_TARGETS=$(addsuffix _exe_install,$(sort $(CYCLETARGETS) $(PPC_TARGETS)))
 .PHONY: $(PPC_TARGETS) $(INSTALL_TARGETS)
 .PHONY: $(PPC_TARGETS) $(INSTALL_TARGETS)
 $(PPC_TARGETS):
 $(PPC_TARGETS):
@@ -3839,11 +3896,11 @@ ppuclean:
 tempclean:
 tempclean:
 	-$(DEL) $(PPCROSSNAME) $(TEMPNAME) $(TEMPNAME1) $(TEMPNAME2) $(TEMPNAME3) $(MSG2INC) pp1.wpo pp2.wpo
 	-$(DEL) $(PPCROSSNAME) $(TEMPNAME) $(TEMPNAME1) $(TEMPNAME2) $(TEMPNAME3) $(MSG2INC) pp1.wpo pp2.wpo
 execlean :
 execlean :
-	-$(DEL) ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) $(EXENAME) $(TEMPWPONAME1) $(TEMPWPONAME2)
+	-$(DEL) ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) ppca64$(EXEEXT) $(EXENAME) $(TEMPWPONAME1) $(TEMPWPONAME2)
 $(addsuffix _clean,$(ALLTARGETS)):
 $(addsuffix _clean,$(ALLTARGETS)):
 	-$(DELTREE) $(addprefix $(subst _clean,,$@),/units)
 	-$(DELTREE) $(addprefix $(subst _clean,,$@),/units)
 	-$(DEL) $(addprefix $(subst _clean,,$@)/,*$(OEXT) *$(PPUEXT) *$(RSTEXT) *$(ASMEXT) *$(STATICLIBEXT) *$(SHAREDLIBEXT) *$(PPLEXT))
 	-$(DEL) $(addprefix $(subst _clean,,$@)/,*$(OEXT) *$(PPUEXT) *$(RSTEXT) *$(ASMEXT) *$(STATICLIBEXT) *$(SHAREDLIBEXT) *$(PPLEXT))
-	-$(DEL) $(addprefix $(subst _clean,,$@)/,ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) $(EXENAME))
+	-$(DEL) $(addprefix $(subst _clean,,$@)/,ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) ppca64$(EXEEXT) $(EXENAME))
 cycleclean: cleanall $(addsuffix _clean,$(CPC_TARGET))
 cycleclean: cleanall $(addsuffix _clean,$(CPC_TARGET))
 	-$(DEL) $(EXENAME)
 	-$(DEL) $(EXENAME)
 clean: tempclean execlean cleanall $(addsuffix _clean,$(CPC_TARGET)) $(addsuffix _clean,$(TARGET_DIRS))
 clean: tempclean execlean cleanall $(addsuffix _clean,$(CPC_TARGET)) $(addsuffix _clean,$(TARGET_DIRS))
@@ -3995,13 +4052,11 @@ cycle:
 	$(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=2
 	$(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=2
 ifndef CROSSINSTALL
 ifndef CROSSINSTALL
 	$(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' rtlclean rtl CYCLELEVEL=3
 	$(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' rtlclean rtl CYCLELEVEL=3
-ifneq ($(OS_TARGET),embedded)
-ifneq ($(OS_TARGET),gba)
+ifndef NoNativeBinaries
 	$(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' cycleclean compiler CYCLELEVEL=3
 	$(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' cycleclean compiler CYCLELEVEL=3
 endif
 endif
 endif
 endif
 endif
 endif
-endif
 else
 else
 cycle: override FPC=
 cycle: override FPC=
 cycle:
 cycle:

+ 14 - 10
compiler/Makefile.fpc

@@ -32,7 +32,7 @@ fpcdir=..
 unexport FPC_VERSION FPC_COMPILERINFO
 unexport FPC_VERSION FPC_COMPILERINFO
 
 
 # Which platforms are ready for inclusion in the cycle
 # Which platforms are ready for inclusion in the cycle
-CYCLETARGETS=i386 powerpc sparc arm x86_64 powerpc64 m68k armeb mipsel mips avr jvm i8086
+CYCLETARGETS=i386 powerpc sparc arm x86_64 powerpc64 m68k armeb mipsel mips avr jvm i8086 aarch64
 
 
 # All supported targets used for clean
 # All supported targets used for clean
 ALLTARGETS=$(CYCLETARGETS)
 ALLTARGETS=$(CYCLETARGETS)
@@ -80,6 +80,9 @@ endif
 ifdef I8086
 ifdef I8086
 PPC_TARGET=i8086
 PPC_TARGET=i8086
 endif
 endif
+ifdef AARCH64
+PPC_TARGET=aarch64
+endif
 
 
 # Default is to generate a compiler for the same
 # Default is to generate a compiler for the same
 # platform as CPU_TARGET (a native compiler)
 # platform as CPU_TARGET (a native compiler)
@@ -205,6 +208,9 @@ endif
 ifeq ($(CPC_TARGET),i8086)
 ifeq ($(CPC_TARGET),i8086)
 CPUSUF=8086
 CPUSUF=8086
 endif
 endif
+ifeq ($(CPC_TARGET),aarch64)
+CPUSUF=a64
+endif
 
 
 # Do not define the default -d$(CPU_TARGET) because that
 # Do not define the default -d$(CPU_TARGET) because that
 # will conflict with our -d$(CPC_TARGET)
 # will conflict with our -d$(CPC_TARGET)
@@ -323,6 +329,9 @@ endif
 ifeq ($(OS_TARGET),msdos)
 ifeq ($(OS_TARGET),msdos)
 NoNativeBinaries=1
 NoNativeBinaries=1
 endif
 endif
+ifeq ($(OS_TARGET),nds)
+NoNativeBinaries=1
+endif
 
 
 [rules]
 [rules]
 #####################################################################
 #####################################################################
@@ -397,7 +406,7 @@ endif
 # CPU targets
 # CPU targets
 #####################################################################
 #####################################################################
 
 
-PPC_TARGETS=i386 m68k powerpc sparc arm armeb x86_64 powerpc64 alpha vis ia64 mips mipsel avr jvm i8086
+PPC_TARGETS=i386 m68k powerpc sparc arm armeb x86_64 powerpc64 alpha vis ia64 mips mipsel avr jvm i8086 aarch64
 INSTALL_TARGETS=$(addsuffix _exe_install,$(sort $(CYCLETARGETS) $(PPC_TARGETS)))
 INSTALL_TARGETS=$(addsuffix _exe_install,$(sort $(CYCLETARGETS) $(PPC_TARGETS)))
 
 
 .PHONY: $(PPC_TARGETS) $(INSTALL_TARGETS)
 .PHONY: $(PPC_TARGETS) $(INSTALL_TARGETS)
@@ -452,12 +461,12 @@ tempclean:
         -$(DEL) $(PPCROSSNAME) $(TEMPNAME) $(TEMPNAME1) $(TEMPNAME2) $(TEMPNAME3) $(MSG2INC) pp1.wpo pp2.wpo
         -$(DEL) $(PPCROSSNAME) $(TEMPNAME) $(TEMPNAME1) $(TEMPNAME2) $(TEMPNAME3) $(MSG2INC) pp1.wpo pp2.wpo
 
 
 execlean :
 execlean :
-        -$(DEL) ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) $(EXENAME) $(TEMPWPONAME1) $(TEMPWPONAME2)
+        -$(DEL) ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) ppca64$(EXEEXT) $(EXENAME) $(TEMPWPONAME1) $(TEMPWPONAME2)
 
 
 $(addsuffix _clean,$(ALLTARGETS)):
 $(addsuffix _clean,$(ALLTARGETS)):
         -$(DELTREE) $(addprefix $(subst _clean,,$@),/units)
         -$(DELTREE) $(addprefix $(subst _clean,,$@),/units)
         -$(DEL) $(addprefix $(subst _clean,,$@)/,*$(OEXT) *$(PPUEXT) *$(RSTEXT) *$(ASMEXT) *$(STATICLIBEXT) *$(SHAREDLIBEXT) *$(PPLEXT))
         -$(DEL) $(addprefix $(subst _clean,,$@)/,*$(OEXT) *$(PPUEXT) *$(RSTEXT) *$(ASMEXT) *$(STATICLIBEXT) *$(SHAREDLIBEXT) *$(PPLEXT))
-        -$(DEL) $(addprefix $(subst _clean,,$@)/,ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) $(EXENAME))
+        -$(DEL) $(addprefix $(subst _clean,,$@)/,ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) ppca64$(EXEEXT) $(EXENAME))
 
 
 cycleclean: cleanall $(addsuffix _clean,$(CPC_TARGET))
 cycleclean: cleanall $(addsuffix _clean,$(CPC_TARGET))
         -$(DEL) $(EXENAME)
         -$(DEL) $(EXENAME)
@@ -682,14 +691,10 @@ cycle:
 # ppc<ARCH> (target native)
 # ppc<ARCH> (target native)
 ifndef CROSSINSTALL
 ifndef CROSSINSTALL
         $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' rtlclean rtl CYCLELEVEL=3
         $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' rtlclean rtl CYCLELEVEL=3
-# building a native compiler for embedded targets is not possible
-ifneq ($(OS_TARGET),embedded)
-# building a native compiler for the arm-gba target is not possible
-ifneq ($(OS_TARGET),gba)
+ifndef NoNativeBinaries
         $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' cycleclean compiler CYCLELEVEL=3
         $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' cycleclean compiler CYCLELEVEL=3
 endif
 endif
 endif
 endif
-endif
 
 
 endif
 endif
 
 
@@ -715,7 +720,6 @@ cycle:
 # ppc<ARCH> (target native)
 # ppc<ARCH> (target native)
 ifndef CROSSINSTALL
 ifndef CROSSINSTALL
         $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' 'OPT=$(RTLOPT) $(CROSSOPT)' rtlclean rtl CYCLELEVEL=3
         $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' 'OPT=$(RTLOPT) $(CROSSOPT)' rtlclean rtl CYCLELEVEL=3
-# building a native compiler for JVM and embedded targets is not possible
 ifndef NoNativeBinaries
 ifndef NoNativeBinaries
         $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' 'OPT=$(strip $(LOCALOPT) $(CROSSOPT))' cycleclean compiler CYCLELEVEL=3
         $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' 'OPT=$(strip $(LOCALOPT) $(CROSSOPT))' cycleclean compiler CYCLELEVEL=3
 endif
 endif

+ 93 - 36
compiler/aarch64/a64att.inc

@@ -1,15 +1,37 @@
 { don't edit, this file is generated from armins.dat }
 { don't edit, this file is generated from armins.dat }
 (
 (
-'nop',
+'none',
 'b',
 'b',
-'cb',
-'tb',
+'cbz',
+'cbnz',
+'tbz',
+'tbnz',
 'bl',
 'bl',
 'blr',
 'blr',
 'br',
 'br',
 'ret',
 'ret',
+'brk',
+'hlt',
+'hvc',
+'smc',
+'svc',
+'eret',
+'dcps1',
+'dcps2',
+'dcps3',
+'drps',
+'dc',
+'at',
+'tlbi',
+'hint',
+'clrex',
+'dsb',
+'dmb',
+'isb',
 'ldr',
 'ldr',
 'str',
 'str',
+'ldur',
+'stur',
 'ldp',
 'ldp',
 'stp',
 'stp',
 'ldnp',
 'ldnp',
@@ -17,40 +39,66 @@
 'ldtr',
 'ldtr',
 'sttr',
 'sttr',
 'ldxr',
 'ldxr',
+'ldxp',
 'stxr',
 'stxr',
+'stxp',
 'ldar',
 'ldar',
 'stlr',
 'stlr',
 'ldaxr',
 'ldaxr',
 'stlxr',
 'stlxr',
+'stlxp',
+'ld1',
+'ld2',
+'ld3',
+'ld4',
+'st1',
+'st2',
+'st3',
+'st4',
+'ld1r',
+'ld2r',
+'ld3r',
+'ld4r',
 'prfm',
 'prfm',
+'prfum',
 'add',
 'add',
-'adc',
 'sub',
 'sub',
-'sbc',
 'cmp',
 'cmp',
 'cmn',
 'cmn',
-'mov',
 'and',
 'and',
-'bic',
 'eor',
 'eor',
-'eon',
 'orr',
 'orr',
 'orn',
 'orn',
 'tst',
 'tst',
-'mvn',
+'movz',
+'movn',
 'movk',
 'movk',
+'mrs',
+'msr',
 'adrp',
 'adrp',
 'adr',
 'adr',
 'bfm',
 'bfm',
 'sbfm',
 'sbfm',
 'ubfm',
 'ubfm',
 'extr',
 'extr',
-'sxt',
-'uxt',
+'adc',
+'sbc',
+'bic',
+'eon',
 'asrv',
 'asrv',
-'llslv',
+'lslv',
 'lsrv',
 'lsrv',
 'rorv',
 'rorv',
+'madd',
+'msub',
+'smaddl',
+'smsubl',
+'smulh',
+'umaddl',
+'umsubl',
+'umulh',
+'sdiv',
+'udiv',
 'cls',
 'cls',
 'clz',
 'clz',
 'rbit',
 'rbit',
@@ -63,33 +111,39 @@
 'csneg',
 'csneg',
 'ccmn',
 'ccmn',
 'ccmp',
 'ccmp',
-'madd',
-'msub',
-'smaddl',
-'smsubl',
-'smulh',
-'umaddl',
-'umsubl',
-'umulh',
-'sdiv',
-'udiv',
-'neg',
+'nop',
+'yield',
+'wfe',
+'wfi',
+'sev',
+'sevl',
+'mov',
+'bfi',
+'bfxil',
+'sbfiz',
+'sbfx',
+'ubfiz',
+'ubfx',
 'asr',
 'asr',
 'lsl',
 'lsl',
 'lsr',
 'lsr',
 'ror',
 'ror',
-'cset',
-'csetm',
-'cinc',
-'cinv',
-'cneg',
+'sxt',
+'uxt',
+'neg',
 'ngc',
 'ngc',
+'mvn',
 'mneg',
 'mneg',
 'mul',
 'mul',
 'smnegl',
 'smnegl',
 'smull',
 'smull',
 'umnegl',
 'umnegl',
 'umull',
 'umull',
+'cset',
+'csetm',
+'cinc',
+'cinv',
+'cneg',
 'fmov',
 'fmov',
 'fcvt',
 'fcvt',
 'fcvtas',
 'fcvtas',
@@ -104,13 +158,13 @@
 'fcvtzu',
 'fcvtzu',
 'scvtf',
 'scvtf',
 'ucvtf',
 'ucvtf',
-'fprinta',
-'fprinti',
-'fprintm',
-'fprintn',
-'fprintp',
-'fprintx',
-'fprintz',
+'frinta',
+'frinti',
+'frintm',
+'frintn',
+'frintp',
+'frintx',
+'frintz',
 'fabs',
 'fabs',
 'fneg',
 'fneg',
 'fsqrt',
 'fsqrt',
@@ -130,5 +184,8 @@
 'fcmpe',
 'fcmpe',
 'fccmp',
 'fccmp',
 'fcmmpe',
 'fcmmpe',
-'fcsel'
+'fcsel',
+'umov',
+'ins',
+'movi'
 );
 );

+ 57 - 0
compiler/aarch64/a64atts.inc

@@ -130,5 +130,62 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufNONE
 attsufNONE
 );
 );

+ 158 - 44
compiler/aarch64/a64ins.dat

@@ -1,10 +1,15 @@
-[NOP]
+; invalid
+[NONE]
 
 
 [B]
 [B]
 
 
-[CB]
+[CBZ]
+
+[CBNZ]
 
 
-[TB]
+[TBZ]
+
+[TBNZ]
 
 
 [BL]
 [BL]
 
 
@@ -14,10 +19,50 @@
 
 
 [RET]
 [RET]
 
 
+[BRK]
+
+[HLT]
+
+[HVC]
+
+[SMC]
+
+[SVC]
+
+[ERET]
+
+[DCPS1]
+
+[DCPS2]
+
+[DCPS3]
+
+[DRPS]
+
+[DC]
+
+[AT]
+
+[TLBI]
+
+[HINT]
+
+[CLREX]
+
+[DSB]
+
+[DMB]
+
+[ISB]
+
 [LDR]
 [LDR]
 
 
 [STR]
 [STR]
 
 
+[LDUR]
+
+[STUR]
+
 [LDP]
 [LDP]
 
 
 [STP]
 [STP]
@@ -32,8 +77,12 @@
 
 
 [LDXR]
 [LDXR]
 
 
+[LDXP]
+
 [STXR]
 [STXR]
 
 
+[STXP]
+
 [LDAR]
 [LDAR]
 
 
 [STLR]
 [STLR]
@@ -42,40 +91,64 @@
 
 
 [STLXR]
 [STLXR]
 
 
+[STLXP]
+
+[LD1]
+
+[LD2]
+
+[LD3]
+
+[LD4]
+
+[ST1]
+
+[ST2]
+
+[ST3]
+
+[ST4]
+
+[LD1R]
+
+[LD2R]
+
+[LD3R]
+
+[LD4R]
+
 [PRFM]
 [PRFM]
 
 
-[ADD]
+[PRFUM]
 
 
-[ADC]
+[ADD]
 
 
 [SUB]
 [SUB]
 
 
-[SBC]
-
 [CMP]
 [CMP]
 
 
 [CMN]
 [CMN]
 
 
-[MOV]
-
 [AND]
 [AND]
 
 
-[BIC]
-
 [EOR]
 [EOR]
 
 
-[EON]
-
 [ORR]
 [ORR]
 
 
 [ORN]
 [ORN]
 
 
 [TST]
 [TST]
 
 
-[MVN]
+[MOVZ]
+
+[MOVN]
 
 
 [MOVK]
 [MOVK]
 
 
+[MRS]
+
+[MSR]
+
 [ADRP]
 [ADRP]
 
 
 [ADR]
 [ADR]
@@ -88,18 +161,42 @@
 
 
 [EXTR]
 [EXTR]
 
 
-[SXT]
+[ADC]
 
 
-[UXT]
+[SBC]
+
+[BIC]
+
+[EON]
 
 
 [ASRV]
 [ASRV]
 
 
-[LLSLV]
+[LSLV]
 
 
 [LSRV]
 [LSRV]
 
 
 [RORV]
 [RORV]
 
 
+[MADD]
+
+[MSUB]
+
+[SMADDL]
+
+[SMSUBL]
+
+[SMULH]
+
+[UMADDL]
+
+[UMSUBL]
+
+[UMULH]
+
+[SDIV]
+
+[UDIV]
+
 [CLS]
 [CLS]
 
 
 [CLZ]
 [CLZ]
@@ -124,29 +221,33 @@
 
 
 [CCMP]
 [CCMP]
 
 
-[MADD]
+; Aliases
+; they are not generated by the compiler, they are only used for inline assembler
+[NOP]
 
 
-[MSUB]
+[YIELD]
 
 
-[SMADDL]
+[WFE]
 
 
-[SMSUBL]
+[WFI]
 
 
-[SMULH]
+[SEV]
 
 
-[UMADDL]
+[SEVL]
 
 
-[UMSUBL]
+[MOV]
 
 
-[UMULH]
+[BFI]
 
 
-[SDIV]
+[BFXIL]
 
 
-[UDIV]
+[SBFIZ]
 
 
-; Aliases
-; they are not generated by the compiler, they are only used for inline assembler
-[NEG]
+[SBFX]
+
+[UBFIZ]
+
+[UBFX]
 
 
 [ASR]
 [ASR]
 
 
@@ -156,18 +257,16 @@
 
 
 [ROR]
 [ROR]
 
 
-[CSET]
-
-[CSETM]
-
-[CINC]
+[SXT]
 
 
-[CINV]
+[UXT]
 
 
-[CNEG]
+[NEG]
 
 
 [NGC]
 [NGC]
 
 
+[MVN]
+
 [MNEG]
 [MNEG]
 
 
 [MUL]
 [MUL]
@@ -180,6 +279,16 @@
 
 
 [UMULL]
 [UMULL]
 
 
+[CSET]
+
+[CSETM]
+
+[CINC]
+
+[CINV]
+
+[CNEG]
+
 [FMOV]
 [FMOV]
 
 
 [FCVT]
 [FCVT]
@@ -208,19 +317,19 @@
 
 
 [UCVTF]
 [UCVTF]
 
 
-[FPRINTA]
+[FRINTA]
 
 
-[FPRINTI]
+[FRINTI]
 
 
-[FPRINTM]
+[FRINTM]
 
 
-[FPRINTN]
+[FRINTN]
 
 
-[FPRINTP]
+[FRINTP]
 
 
-[FPRINTX]
+[FRINTX]
 
 
-[FPRINTZ]
+[FRINTZ]
 
 
 [FABS]
 [FABS]
 
 
@@ -262,3 +371,8 @@
 
 
 [FCSEL]
 [FCSEL]
 
 
+[UMOV]
+
+[INS]
+
+[MOVI]

+ 93 - 36
compiler/aarch64/a64op.inc

@@ -1,15 +1,37 @@
 { don't edit, this file is generated from armins.dat }
 { don't edit, this file is generated from armins.dat }
 (
 (
-A_NOP,
+A_NONE,
 A_B,
 A_B,
-A_CB,
-A_TB,
+A_CBZ,
+A_CBNZ,
+A_TBZ,
+A_TBNZ,
 A_BL,
 A_BL,
 A_BLR,
 A_BLR,
 A_BR,
 A_BR,
 A_RET,
 A_RET,
+A_BRK,
+A_HLT,
+A_HVC,
+A_SMC,
+A_SVC,
+A_ERET,
+A_DCPS1,
+A_DCPS2,
+A_DCPS3,
+A_DRPS,
+A_DC,
+A_AT,
+A_TLBI,
+A_HINT,
+A_CLREX,
+A_DSB,
+A_DMB,
+A_ISB,
 A_LDR,
 A_LDR,
 A_STR,
 A_STR,
+A_LDUR,
+A_STUR,
 A_LDP,
 A_LDP,
 A_STP,
 A_STP,
 A_LDNP,
 A_LDNP,
@@ -17,40 +39,66 @@ A_STNP,
 A_LDTR,
 A_LDTR,
 A_STTR,
 A_STTR,
 A_LDXR,
 A_LDXR,
+A_LDXP,
 A_STXR,
 A_STXR,
+A_STXP,
 A_LDAR,
 A_LDAR,
 A_STLR,
 A_STLR,
 A_LDAXR,
 A_LDAXR,
 A_STLXR,
 A_STLXR,
+A_STLXP,
+A_LD1,
+A_LD2,
+A_LD3,
+A_LD4,
+A_ST1,
+A_ST2,
+A_ST3,
+A_ST4,
+A_LD1R,
+A_LD2R,
+A_LD3R,
+A_LD4R,
 A_PRFM,
 A_PRFM,
+A_PRFUM,
 A_ADD,
 A_ADD,
-A_ADC,
 A_SUB,
 A_SUB,
-A_SBC,
 A_CMP,
 A_CMP,
 A_CMN,
 A_CMN,
-A_MOV,
 A_AND,
 A_AND,
-A_BIC,
 A_EOR,
 A_EOR,
-A_EON,
 A_ORR,
 A_ORR,
 A_ORN,
 A_ORN,
 A_TST,
 A_TST,
-A_MVN,
+A_MOVZ,
+A_MOVN,
 A_MOVK,
 A_MOVK,
+A_MRS,
+A_MSR,
 A_ADRP,
 A_ADRP,
 A_ADR,
 A_ADR,
 A_BFM,
 A_BFM,
 A_SBFM,
 A_SBFM,
 A_UBFM,
 A_UBFM,
 A_EXTR,
 A_EXTR,
-A_SXT,
-A_UXT,
+A_ADC,
+A_SBC,
+A_BIC,
+A_EON,
 A_ASRV,
 A_ASRV,
-A_LLSLV,
+A_LSLV,
 A_LSRV,
 A_LSRV,
 A_RORV,
 A_RORV,
+A_MADD,
+A_MSUB,
+A_SMADDL,
+A_SMSUBL,
+A_SMULH,
+A_UMADDL,
+A_UMSUBL,
+A_UMULH,
+A_SDIV,
+A_UDIV,
 A_CLS,
 A_CLS,
 A_CLZ,
 A_CLZ,
 A_RBIT,
 A_RBIT,
@@ -63,33 +111,39 @@ A_CSINV,
 A_CSNEG,
 A_CSNEG,
 A_CCMN,
 A_CCMN,
 A_CCMP,
 A_CCMP,
-A_MADD,
-A_MSUB,
-A_SMADDL,
-A_SMSUBL,
-A_SMULH,
-A_UMADDL,
-A_UMSUBL,
-A_UMULH,
-A_SDIV,
-A_UDIV,
-A_NEG,
+A_NOP,
+A_YIELD,
+A_WFE,
+A_WFI,
+A_SEV,
+A_SEVL,
+A_MOV,
+A_BFI,
+A_BFXIL,
+A_SBFIZ,
+A_SBFX,
+A_UBFIZ,
+A_UBFX,
 A_ASR,
 A_ASR,
 A_LSL,
 A_LSL,
 A_LSR,
 A_LSR,
 A_ROR,
 A_ROR,
-A_CSET,
-A_CSETM,
-A_CINC,
-A_CINV,
-A_CNEG,
+A_SXT,
+A_UXT,
+A_NEG,
 A_NGC,
 A_NGC,
+A_MVN,
 A_MNEG,
 A_MNEG,
 A_MUL,
 A_MUL,
 A_SMNEGL,
 A_SMNEGL,
 A_SMULL,
 A_SMULL,
 A_UMNEGL,
 A_UMNEGL,
 A_UMULL,
 A_UMULL,
+A_CSET,
+A_CSETM,
+A_CINC,
+A_CINV,
+A_CNEG,
 A_FMOV,
 A_FMOV,
 A_FCVT,
 A_FCVT,
 A_FCVTAS,
 A_FCVTAS,
@@ -104,13 +158,13 @@ A_FCVTZS,
 A_FCVTZU,
 A_FCVTZU,
 A_SCVTF,
 A_SCVTF,
 A_UCVTF,
 A_UCVTF,
-A_FPRINTA,
-A_FPRINTI,
-A_FPRINTM,
-A_FPRINTN,
-A_FPRINTP,
-A_FPRINTX,
-A_FPRINTZ,
+A_FRINTA,
+A_FRINTI,
+A_FRINTM,
+A_FRINTN,
+A_FRINTP,
+A_FRINTX,
+A_FRINTZ,
 A_FABS,
 A_FABS,
 A_FNEG,
 A_FNEG,
 A_FSQRT,
 A_FSQRT,
@@ -130,5 +184,8 @@ A_FCMP,
 A_FCMPE,
 A_FCMPE,
 A_FCCMP,
 A_FCCMP,
 A_FCMMPE,
 A_FCMMPE,
-A_FCSEL
+A_FCSEL,
+A_UMOV,
+A_INS,
+A_MOVI
 );
 );

+ 168 - 161
compiler/aarch64/a64reg.dat

@@ -70,169 +70,176 @@ W30,$01,$04,$1E,w30,30,30
 X30,$01,$05,$1E,x30,30,30
 X30,$01,$05,$1E,x30,30,30
 WZR,$01,$04,$1F,wzr,31,31
 WZR,$01,$04,$1F,wzr,31,31
 XZR,$01,$05,$1F,xzr,31,31
 XZR,$01,$05,$1F,xzr,31,31
+; sp and zr share the same register number, but we still have to be able to
+; differentiate them because some instructions can be encoded with both ->
+; use a different superregister after all
+WSP,$01,$04,$20,wsp,31,31
+SP,$01,$05,$20,sp,31,31
 
 
 
 
 ; vfp registers
 ; vfp registers
-B0,$04,$01,$00,b0,0,0
-H0,$04,$03,$00,h0,0,0
-S0,$04,$09,$00,s0,0,0
-D0,$04,$0a,$00,d0,0,0
-Q0,$04,$05,$00,q0,0,0
-B1,$04,$01,$01,b1,1,1
-H1,$04,$03,$01,h1,1,1
-S1,$04,$09,$01,s1,1,1
-D1,$04,$0a,$01,d1,1,1
-Q1,$04,$05,$01,q1,1,1
-B2,$04,$01,$02,b2,2,2
-H2,$04,$03,$02,h2,2,2
-S2,$04,$09,$02,s2,2,2
-D2,$04,$0a,$02,d2,2,2
-Q2,$04,$05,$02,q2,2,2
-B3,$04,$01,$03,b3,3,3
-H3,$04,$03,$03,h3,3,3
-S3,$04,$09,$03,s3,3,3
-D3,$04,$0a,$03,d3,3,3
-Q3,$04,$05,$03,q3,3,3
-B4,$04,$01,$04,b4,4,4
-H4,$04,$03,$04,h4,4,4
-S4,$04,$09,$04,s4,4,4
-D4,$04,$0a,$04,d4,4,4
-Q4,$04,$05,$04,q4,4,4
-B5,$04,$01,$05,b5,5,5
-H5,$04,$03,$05,h5,5,5
-S5,$04,$09,$05,s5,5,5
-D5,$04,$0a,$05,d5,5,5
-Q5,$04,$05,$05,q5,5,5
-B6,$04,$01,$06,b6,6,6
-H6,$04,$03,$06,h6,6,6
-S6,$04,$09,$06,s6,6,6
-D6,$04,$0a,$06,d6,6,6
-Q6,$04,$05,$06,q6,6,6
-B7,$04,$01,$07,b7,7,7
-H7,$04,$03,$07,h7,7,7
-S7,$04,$09,$07,s7,7,7
-D7,$04,$0a,$07,d7,7,7
-Q7,$04,$05,$07,q7,7,7
-B8,$04,$01,$08,b8,8,8
-H8,$04,$03,$08,h8,8,8
-S8,$04,$09,$08,s8,8,8
-D8,$04,$0a,$08,d8,8,8
-Q8,$04,$05,$08,q8,8,8
-B9,$04,$01,$09,b9,9,9
-H9,$04,$03,$09,h9,9,9
-S9,$04,$09,$09,s9,9,9
-D9,$04,$0a,$09,d9,9,9
-Q9,$04,$05,$09,q9,9,9
-B10,$04,$01,$0A,b10,10,10
-H10,$04,$03,$0A,h10,10,10
-S10,$04,$09,$0A,s10,10,10
-D10,$04,$0a,$0A,d10,10,10
-Q10,$04,$05,$0A,q10,10,10
-B11,$04,$01,$0B,b11,11,11
-H11,$04,$03,$0B,h11,11,11
-S11,$04,$09,$0B,s11,11,11
-D11,$04,$0a,$0B,d11,11,11
-Q11,$04,$05,$0B,q11,11,11
-B12,$04,$01,$0C,b12,12,12
-H12,$04,$03,$0C,h12,12,12
-S12,$04,$09,$0C,s12,12,12
-D12,$04,$0a,$0C,d12,12,12
-Q12,$04,$05,$0C,q12,12,12
-B13,$04,$01,$0D,b13,13,13
-H13,$04,$03,$0D,h13,13,13
-S13,$04,$09,$0D,s13,13,13
-D13,$04,$0a,$0D,d13,13,13
-Q13,$04,$05,$0D,q13,13,13
-B14,$04,$01,$0E,b14,14,14
-H14,$04,$03,$0E,h14,14,14
-S14,$04,$09,$0E,s14,14,14
-D14,$04,$0a,$0E,d14,14,14
-Q14,$04,$05,$0E,q14,14,14
-B15,$04,$01,$0F,b15,15,15
-H15,$04,$03,$0F,h15,15,15
-S15,$04,$09,$0F,s15,15,15
-D15,$04,$0a,$0F,d15,15,15
-Q15,$04,$05,$0F,q15,15,15
-B16,$04,$01,$10,b16,16,16
-H16,$04,$03,$10,h16,16,16
-S16,$04,$09,$10,s16,16,16
-D16,$04,$0a,$10,d16,16,16
-Q16,$04,$05,$10,q16,16,16
-B17,$04,$01,$11,b17,17,17
-H17,$04,$03,$11,h17,17,17
-S17,$04,$09,$11,s17,17,17
-D17,$04,$0a,$11,d17,17,17
-Q17,$04,$05,$11,q17,17,17
-B18,$04,$01,$12,b18,18,18
-H18,$04,$03,$12,h18,18,18
-S18,$04,$09,$12,s18,18,18
-D18,$04,$0a,$12,d18,18,18
-Q18,$04,$05,$12,q18,18,18
-B19,$04,$01,$13,b19,19,19
-H19,$04,$03,$13,h19,19,19
-S19,$04,$09,$13,s19,19,19
-D19,$04,$0a,$13,d19,19,19
-Q19,$04,$05,$13,q19,19,19
-B20,$04,$01,$14,b20,20,20
-H20,$04,$03,$14,h20,20,20
-S20,$04,$09,$14,s20,20,20
-D20,$04,$0a,$14,d20,20,20
-Q20,$04,$05,$14,q20,20,20
-B21,$04,$01,$15,b21,21,21
-H21,$04,$03,$15,h21,21,21
-S21,$04,$09,$15,s21,21,21
-D21,$04,$0a,$15,d21,21,21
-Q21,$04,$05,$15,q21,21,21
-B22,$04,$01,$16,b22,22,22
-H22,$04,$03,$16,h22,22,22
-S22,$04,$09,$16,s22,22,22
-D22,$04,$0a,$16,d22,22,22
-Q22,$04,$05,$16,q22,22,22
-B23,$04,$01,$17,b23,23,23
-H23,$04,$03,$17,h23,23,23
-S23,$04,$09,$17,s23,23,23
-D23,$04,$0a,$17,d23,23,23
-Q23,$04,$05,$17,q23,23,23
-B24,$04,$01,$18,b24,24,24
-H24,$04,$03,$18,h24,24,24
-S24,$04,$09,$18,s24,24,24
-D24,$04,$0a,$18,d24,24,24
-Q24,$04,$05,$18,q24,24,24
-B25,$04,$01,$19,b25,25,25
-H25,$04,$03,$19,h25,25,25
-S25,$04,$09,$19,s25,25,25
-D25,$04,$0a,$19,d25,25,25
-Q25,$04,$05,$19,q25,25,25
-B26,$04,$01,$1A,b26,26,26
-H26,$04,$03,$1A,h26,26,26
-S26,$04,$09,$1A,s26,26,26
-D26,$04,$0a,$1A,d26,26,26
-Q26,$04,$05,$1A,q26,26,26
-B27,$04,$01,$1B,b27,27,27
-H27,$04,$03,$1B,h27,27,27
-S27,$04,$09,$1B,s27,27,27
-D27,$04,$0a,$1B,d27,27,27
-Q27,$04,$05,$1B,q27,27,27
-B28,$04,$01,$1C,b28,28,28
-H28,$04,$03,$1C,h28,28,28
-S28,$04,$09,$1C,s28,28,28
-D28,$04,$0a,$1C,d28,28,28
-Q28,$04,$05,$1C,q28,28,28
-B29,$04,$01,$1D,b29,29,29
-H29,$04,$03,$1D,h29,29,29
-S29,$04,$09,$1D,s29,29,29
-D29,$04,$0a,$1D,d29,29,29
-Q29,$04,$05,$1D,q29,29,29
-B30,$04,$01,$1E,b30,30,30
-H30,$04,$03,$1E,h30,30,30
-S30,$04,$09,$1E,s30,30,30
-D30,$04,$0a,$1E,d30,30,30
-Q30,$04,$05,$1E,q30,30,30
-B31,$04,$01,$1F,b31,31,31
-H31,$04,$03,$1F,h31,31,31
-S31,$04,$09,$1F,s31,31,31
-D31,$04,$0a,$1F,d31,31,31
-Q31,$04,$05,$1F,q31,31,31
+B0,$04,$01,$00,b0,64,64
+H0,$04,$03,$00,h0,64,64
+S0,$04,$09,$00,s0,64,64
+D0,$04,$0a,$00,d0,64,64
+Q0,$04,$05,$00,q0,64,64
+B1,$04,$01,$01,b1,65,65
+H1,$04,$03,$01,h1,65,65
+S1,$04,$09,$01,s1,65,65
+D1,$04,$0a,$01,d1,65,65
+Q1,$04,$05,$01,q1,65,65
+B2,$04,$01,$02,b2,66,66
+H2,$04,$03,$02,h2,66,66
+S2,$04,$09,$02,s2,66,66
+D2,$04,$0a,$02,d2,66,66
+Q2,$04,$05,$02,q2,66,66
+B3,$04,$01,$03,b3,67,67
+H3,$04,$03,$03,h3,67,67
+S3,$04,$09,$03,s3,67,67
+D3,$04,$0a,$03,d3,67,67
+Q3,$04,$05,$03,q3,67,67
+B4,$04,$01,$04,b4,68,68
+H4,$04,$03,$04,h4,68,68
+S4,$04,$09,$04,s4,68,68
+D4,$04,$0a,$04,d4,68,68
+Q4,$04,$05,$04,q4,68,68
+B5,$04,$01,$05,b5,69,69
+H5,$04,$03,$05,h5,69,69
+S5,$04,$09,$05,s5,69,69
+D5,$04,$0a,$05,d5,69,69
+Q5,$04,$05,$05,q5,69,69
+B6,$04,$01,$06,b6,70,70
+H6,$04,$03,$06,h6,70,70
+S6,$04,$09,$06,s6,70,70
+D6,$04,$0a,$06,d6,70,70
+Q6,$04,$05,$06,q6,70,70
+B7,$04,$01,$07,b7,71,71
+H7,$04,$03,$07,h7,71,71
+S7,$04,$09,$07,s7,71,71
+D7,$04,$0a,$07,d7,71,71
+Q7,$04,$05,$07,q7,71,71
+B8,$04,$01,$08,b8,72,72
+H8,$04,$03,$08,h8,72,72
+S8,$04,$09,$08,s8,72,72
+D8,$04,$0a,$08,d8,72,72
+Q8,$04,$05,$08,q8,72,72
+B9,$04,$01,$09,b9,73,73
+H9,$04,$03,$09,h9,73,73
+S9,$04,$09,$09,s9,73,73
+D9,$04,$0a,$09,d9,73,73
+Q9,$04,$05,$09,q9,73,73
+B10,$04,$01,$0A,b10,74,74
+H10,$04,$03,$0A,h10,74,74
+S10,$04,$09,$0A,s10,74,74
+D10,$04,$0a,$0A,d10,74,74
+Q10,$04,$05,$0A,q10,74,74
+B11,$04,$01,$0B,b11,75,75
+H11,$04,$03,$0B,h11,75,75
+S11,$04,$09,$0B,s11,75,75
+D11,$04,$0a,$0B,d11,75,75
+Q11,$04,$05,$0B,q11,75,75
+B12,$04,$01,$0C,b12,76,76
+H12,$04,$03,$0C,h12,76,76
+S12,$04,$09,$0C,s12,76,76
+D12,$04,$0a,$0C,d12,76,76
+Q12,$04,$05,$0C,q12,76,76
+B13,$04,$01,$0D,b13,77,77
+H13,$04,$03,$0D,h13,77,77
+S13,$04,$09,$0D,s13,77,77
+D13,$04,$0a,$0D,d13,77,77
+Q13,$04,$05,$0D,q13,77,77
+B14,$04,$01,$0E,b14,78,78
+H14,$04,$03,$0E,h14,78,78
+S14,$04,$09,$0E,s14,78,78
+D14,$04,$0a,$0E,d14,78,78
+Q14,$04,$05,$0E,q14,78,78
+B15,$04,$01,$0F,b15,79,79
+H15,$04,$03,$0F,h15,79,79
+S15,$04,$09,$0F,s15,79,79
+D15,$04,$0a,$0F,d15,79,79
+Q15,$04,$05,$0F,q15,79,79
+B16,$04,$01,$10,b16,80,80
+H16,$04,$03,$10,h16,80,80
+S16,$04,$09,$10,s16,80,80
+D16,$04,$0a,$10,d16,80,80
+Q16,$04,$05,$10,q16,80,80
+B17,$04,$01,$11,b17,81,81
+H17,$04,$03,$11,h17,81,81
+S17,$04,$09,$11,s17,81,81
+D17,$04,$0a,$11,d17,81,81
+Q17,$04,$05,$11,q17,81,81
+B18,$04,$01,$12,b18,82,82
+H18,$04,$03,$12,h18,82,82
+S18,$04,$09,$12,s18,82,82
+D18,$04,$0a,$12,d18,82,82
+Q18,$04,$05,$12,q18,82,82
+B19,$04,$01,$13,b19,83,83
+H19,$04,$03,$13,h19,83,83
+S19,$04,$09,$13,s19,83,83
+D19,$04,$0a,$13,d19,83,83
+Q19,$04,$05,$13,q19,83,83
+B20,$04,$01,$14,b20,84,84
+H20,$04,$03,$14,h20,84,84
+S20,$04,$09,$14,s20,84,84
+D20,$04,$0a,$14,d20,84,84
+Q20,$04,$05,$14,q20,84,84
+B21,$04,$01,$15,b21,85,85
+H21,$04,$03,$15,h21,85,85
+S21,$04,$09,$15,s21,85,85
+D21,$04,$0a,$15,d21,85,85
+Q21,$04,$05,$15,q21,85,85
+B22,$04,$01,$16,b22,86,86
+H22,$04,$03,$16,h22,86,86
+S22,$04,$09,$16,s22,86,86
+D22,$04,$0a,$16,d22,86,86
+Q22,$04,$05,$16,q22,86,86
+B23,$04,$01,$17,b23,87,87
+H23,$04,$03,$17,h23,87,87
+S23,$04,$09,$17,s23,87,87
+D23,$04,$0a,$17,d23,87,87
+Q23,$04,$05,$17,q23,87,87
+B24,$04,$01,$18,b24,88,88
+H24,$04,$03,$18,h24,88,88
+S24,$04,$09,$18,s24,88,88
+D24,$04,$0a,$18,d24,88,88
+Q24,$04,$05,$18,q24,88,88
+B25,$04,$01,$19,b25,89,89
+H25,$04,$03,$19,h25,89,89
+S25,$04,$09,$19,s25,89,89
+D25,$04,$0a,$19,d25,89,89
+Q25,$04,$05,$19,q25,89,89
+B26,$04,$01,$1A,b26,90,90
+H26,$04,$03,$1A,h26,90,90
+S26,$04,$09,$1A,s26,90,90
+D26,$04,$0a,$1A,d26,90,90
+Q26,$04,$05,$1A,q26,90,90
+B27,$04,$01,$1B,b27,91,91
+H27,$04,$03,$1B,h27,91,91
+S27,$04,$09,$1B,s27,91,91
+D27,$04,$0a,$1B,d27,91,91
+Q27,$04,$05,$1B,q27,91,91
+B28,$04,$01,$1C,b28,92,92
+H28,$04,$03,$1C,h28,92,92
+S28,$04,$09,$1C,s28,92,92
+D28,$04,$0a,$1C,d28,92,92
+Q28,$04,$05,$1C,q28,92,92
+B29,$04,$01,$1D,b29,93,93
+H29,$04,$03,$1D,h29,93,93
+S29,$04,$09,$1D,s29,93,93
+D29,$04,$0a,$1D,d29,93,93
+Q29,$04,$05,$1D,q29,93,93
+B30,$04,$01,$1E,b30,94,94
+H30,$04,$03,$1E,h30,94,94
+S30,$04,$09,$1E,s30,94,94
+D30,$04,$0a,$1E,d30,94,94
+Q30,$04,$05,$1E,q30,94,94
+B31,$04,$01,$1F,b31,95,95
+H31,$04,$03,$1F,h31,95,95
+S31,$04,$09,$1F,s31,95,95
+D31,$04,$0a,$1F,d31,95,95
+Q31,$04,$05,$1F,q31,95,95
 
 
 NZCV,$05,$00,$00,nzcv,0,0
 NZCV,$05,$00,$00,nzcv,0,0
-
+FPCR,$05,$00,$01,fpcr,0,0
+FPSR,$05,$00,$02,fpsr,0,0
+TPIDR_EL0,$05,$00,$03,tpidr_el0,0,0

+ 462 - 1125
compiler/aarch64/aasmcpu.pas

@@ -1,7 +1,7 @@
 {
 {
     Copyright (c) 2003-2012 by Florian Klaempfl and others
     Copyright (c) 2003-2012 by Florian Klaempfl and others
 
 
-    Contains the assembler object for ARM64
+    Contains the assembler object for Aarch64
 
 
     This program is free software; you can redistribute it and/or modify
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
     it under the terms of the GNU General Public License as published by
@@ -149,9 +149,6 @@ uses
 
 
       pinsentry=^tinsentry;
       pinsentry=^tinsentry;
 
 
-{    const
-      InsTab : array[0..instabentries-1] of TInsEntry={$i a64tab.inc} }
-
     var
     var
       InsTabCache : PInsTabCache;
       InsTabCache : PInsTabCache;
 
 
@@ -159,6 +156,7 @@ uses
       taicpu = class(tai_cpu_abstract_sym)
       taicpu = class(tai_cpu_abstract_sym)
          oppostfix : TOpPostfix;
          oppostfix : TOpPostfix;
          procedure loadshifterop(opidx:longint;const so:tshifterop);
          procedure loadshifterop(opidx:longint;const so:tshifterop);
+         procedure loadconditioncode(opidx: longint; const c: tasmcond);
          constructor op_none(op : tasmop);
          constructor op_none(op : tasmop);
 
 
          constructor op_reg(op : tasmop;_op1 : tregister);
          constructor op_reg(op : tasmop;_op1 : tregister);
@@ -167,15 +165,21 @@ uses
 
 
          constructor op_reg_reg(op : tasmop;_op1,_op2 : tregister);
          constructor op_reg_reg(op : tasmop;_op1,_op2 : tregister);
          constructor op_reg_ref(op : tasmop;_op1 : tregister;const _op2 : treference);
          constructor op_reg_ref(op : tasmop;_op1 : tregister;const _op2 : treference);
+         constructor op_reg_cond(op: tasmop; _op1: tregister; _op2: tasmcond);
          constructor op_reg_const(op:tasmop; _op1: tregister; _op2: aint);
          constructor op_reg_const(op:tasmop; _op1: tregister; _op2: aint);
+         constructor op_reg_const_shifterop(op : tasmop;_op1: tregister; _op2: aint;_op3 : tshifterop);
 
 
          constructor op_reg_reg_reg(op : tasmop;_op1,_op2,_op3 : tregister);
          constructor op_reg_reg_reg(op : tasmop;_op1,_op2,_op3 : tregister);
          constructor op_reg_reg_reg_reg(op : tasmop;_op1,_op2,_op3,_op4 : tregister);
          constructor op_reg_reg_reg_reg(op : tasmop;_op1,_op2,_op3,_op4 : tregister);
          constructor op_reg_reg_const(op : tasmop;_op1,_op2 : tregister; _op3: aint);
          constructor op_reg_reg_const(op : tasmop;_op1,_op2 : tregister; _op3: aint);
+         constructor op_reg_reg_const_const(op : tasmop;_op1,_op2 : tregister; _op3, _op4: aint);
+         constructor op_reg_reg_const_shifterop(op : tasmop;_op1,_op2 : tregister; _op3: aint; const _op4 : tshifterop);
          constructor op_reg_reg_sym_ofs(op : tasmop;_op1,_op2 : tregister; _op3: tasmsymbol;_op3ofs: longint);
          constructor op_reg_reg_sym_ofs(op : tasmop;_op1,_op2 : tregister; _op3: tasmsymbol;_op3ofs: longint);
          constructor op_reg_reg_ref(op : tasmop;_op1,_op2 : tregister; const _op3: treference);
          constructor op_reg_reg_ref(op : tasmop;_op1,_op2 : tregister; const _op3: treference);
          constructor op_reg_reg_shifterop(op : tasmop;_op1,_op2 : tregister;_op3 : tshifterop);
          constructor op_reg_reg_shifterop(op : tasmop;_op1,_op2 : tregister;_op3 : tshifterop);
-         constructor op_reg_reg_reg_shifterop(op : tasmop;_op1,_op2,_op3 : tregister;_op4 : tshifterop);
+         constructor op_reg_reg_reg_shifterop(op : tasmop;_op1,_op2,_op3 : tregister; const _op4 : tshifterop);
+         constructor op_reg_reg_reg_cond(op : tasmop;_op1,_op2,_op3 : tregister; const _op4: tasmcond);
+
 
 
          { this is for Jmp instructions }
          { this is for Jmp instructions }
          constructor op_cond_sym(op : tasmop;cond:TAsmCond;_op1 : tasmsymbol);
          constructor op_cond_sym(op : tasmop;cond:TAsmCond;_op1 : tasmsymbol);
@@ -188,6 +192,7 @@ uses
          function is_same_reg_move(regtype: Tregistertype):boolean; override;
          function is_same_reg_move(regtype: Tregistertype):boolean; override;
 
 
          function spilling_get_operation_type(opnr: longint): topertype;override;
          function spilling_get_operation_type(opnr: longint): topertype;override;
+         function spilling_get_operation_type_ref(opnr: longint; reg: tregister): topertype;override;
 
 
          { assembler }
          { assembler }
       public
       public
@@ -203,28 +208,29 @@ uses
          procedure ppuwriteoper(ppufile:tcompilerppufile;const o:toper);override;
          procedure ppuwriteoper(ppufile:tcompilerppufile;const o:toper);override;
          procedure ppubuildderefimploper(var o:toper);override;
          procedure ppubuildderefimploper(var o:toper);override;
          procedure ppuderefoper(var o:toper);override;
          procedure ppuderefoper(var o:toper);override;
-      private
-         { next fields are filled in pass1, so pass2 is faster }
-         inssize   : shortint;
-         insoffset : longint;
-         LastInsOffset : longint; { need to be public to be reset }
-         insentry  : PInsEntry;
-         function  InsEnd:longint;
-         procedure create_ot(objdata:TObjData);
-         function  Matches(p:PInsEntry):longint;
-         function  calcsize(p:PInsEntry):shortint;
-         procedure gencode(objdata:TObjData);
-         function  NeedAddrPrefix(opidx:byte):boolean;
-         procedure Swapoperands;
-         function  FindInsentry(objdata:TObjData):boolean;
       end;
       end;
 
 
       tai_align = class(tai_align_abstract)
       tai_align = class(tai_align_abstract)
         { nothing to add }
         { nothing to add }
       end;
       end;
 
 
-    function spilling_create_load(const ref:treference;r:tregister):Taicpu;
-    function spilling_create_store(r:tregister; const ref:treference):Taicpu;
+    type
+      tsimplereftype =
+         { valid reference }
+        (sr_simple,
+         { invalid reference, should not be generated by the code generator (but
+           can be encountered via inline assembly, where it must be rejected) }
+         sr_internal_illegal,
+         { invalid reference, may be generated by the code generator and then
+           must be simplified (also rejected in inline assembly) }
+         sr_complex);
+
+    function simple_ref_type(op: tasmop; size:tcgsize; oppostfix: toppostfix; const ref: treference): tsimplereftype;
+    function can_be_shifter_operand(opc: tasmop; opnr: longint): boolean;
+    function valid_shifter_operand(opc: tasmop; useszr, usessp, is64bit: boolean; sm: tshiftmode; shiftimm: longint): boolean;
+
+    function spilling_create_load(const ref: treference; r: tregister): taicpu;
+    function spilling_create_store(r: tregister; const ref: treference): taicpu;
 
 
     function setoppostfix(i : taicpu;pf : toppostfix) : taicpu;
     function setoppostfix(i : taicpu;pf : toppostfix) : taicpu;
     function setcondition(i : taicpu;c : tasmcond) : taicpu;
     function setcondition(i : taicpu;c : tasmcond) : taicpu;
@@ -261,6 +267,21 @@ implementation
       end;
       end;
 
 
 
 
+    procedure taicpu.loadconditioncode(opidx: longint; const c: tasmcond);
+      begin
+        allocate_oper(opidx+1);
+        with oper[opidx]^ do
+          begin
+            if typ<>top_conditioncode then
+              begin
+                clearop(opidx);
+              end;
+            cc:=c;
+            typ:=top_conditioncode;
+          end;
+      end;
+
+
 {*****************************************************************************
 {*****************************************************************************
                                  taicpu Constructors
                                  taicpu Constructors
 *****************************************************************************}
 *****************************************************************************}
@@ -314,6 +335,16 @@ implementation
       end;
       end;
 
 
 
 
+    constructor taicpu.op_reg_const_shifterop(op: tasmop; _op1: tregister; _op2: aint; _op3: tshifterop);
+      begin
+        inherited create(op);
+        ops:=3;
+        loadreg(0,_op1);
+        loadconst(1,_op2);
+        loadshifterop(2,_op3);
+      end;
+
+
     constructor taicpu.op_reg_ref(op : tasmop;_op1 : tregister;const _op2 : treference);
     constructor taicpu.op_reg_ref(op : tasmop;_op1 : tregister;const _op2 : treference);
       begin
       begin
          inherited create(op);
          inherited create(op);
@@ -323,6 +354,15 @@ implementation
       end;
       end;
 
 
 
 
+    constructor taicpu.op_reg_cond(op: tasmop; _op1: tregister; _op2: tasmcond);
+      begin
+        inherited create(op);
+        ops:=2;
+        loadreg(0,_op1);
+        loadconditioncode(1,_op2);
+      end;
+
+
     constructor taicpu.op_reg_reg_reg(op : tasmop;_op1,_op2,_op3 : tregister);
     constructor taicpu.op_reg_reg_reg(op : tasmop;_op1,_op2,_op3 : tregister);
       begin
       begin
          inherited create(op);
          inherited create(op);
@@ -354,6 +394,28 @@ implementation
       end;
       end;
 
 
 
 
+     constructor taicpu.op_reg_reg_const_const(op: tasmop; _op1, _op2: tregister; _op3, _op4: aint);
+       begin
+         inherited create(op);
+         ops:=4;
+         loadreg(0,_op1);
+         loadreg(1,_op2);
+         loadconst(2,aint(_op3));
+         loadconst(3,aint(_op4));
+       end;
+
+
+     constructor taicpu.op_reg_reg_const_shifterop(op: tasmop; _op1, _op2: tregister; _op3: aint; const _op4: tshifterop);
+       begin
+         inherited create(op);
+         ops:=4;
+         loadreg(0,_op1);
+         loadreg(1,_op2);
+         loadconst(2,aint(_op3));
+         loadshifterop(3,_op4);
+       end;
+
+
      constructor taicpu.op_reg_reg_sym_ofs(op : tasmop;_op1,_op2 : tregister; _op3: tasmsymbol;_op3ofs: longint);
      constructor taicpu.op_reg_reg_sym_ofs(op : tasmop;_op1,_op2 : tregister; _op3: tasmsymbol;_op3ofs: longint);
        begin
        begin
          inherited create(op);
          inherited create(op);
@@ -384,7 +446,7 @@ implementation
       end;
       end;
 
 
 
 
-     constructor taicpu.op_reg_reg_reg_shifterop(op : tasmop;_op1,_op2,_op3 : tregister;_op4 : tshifterop);
+     constructor taicpu.op_reg_reg_reg_shifterop(op : tasmop;_op1,_op2,_op3 : tregister; const _op4 : tshifterop);
       begin
       begin
          inherited create(op);
          inherited create(op);
          ops:=4;
          ops:=4;
@@ -394,6 +456,16 @@ implementation
          loadshifterop(3,_op4);
          loadshifterop(3,_op4);
       end;
       end;
 
 
+     constructor taicpu.op_reg_reg_reg_cond(op: tasmop; _op1, _op2, _op3: tregister; const _op4: tasmcond);
+       begin
+         inherited create(op);
+         ops:=4;
+         loadreg(0,_op1);
+         loadreg(1,_op2);
+         loadreg(2,_op3);
+         loadconditioncode(3,_op4);
+       end;
+
 
 
     constructor taicpu.op_cond_sym(op : tasmop;cond:TAsmCond;_op1 : tasmsymbol);
     constructor taicpu.op_cond_sym(op : tasmop;cond:TAsmCond;_op1 : tasmsymbol);
       begin
       begin
@@ -454,85 +526,403 @@ implementation
       end;
       end;
 
 
 
 
-    function spilling_create_load(const ref:treference;r:tregister):Taicpu;
+    function spilling_create_op(op: tasmop; const ref: treference; r: tregister): taicpu;
+      const
+        { invalid sizes for aarch64 are 0 }
+        subreg2bytesize: array[TSubRegister] of byte =
+          (0,0,0,0,4,8,0,0,0,4,8,0,0,0);
       var
       var
-        op: tasmop;
+        scalefactor: byte;
       begin
       begin
+        scalefactor:=subreg2bytesize[getsubreg(r)];
+        if scalefactor=0 then
+          internalerror(2014120301);
+        if (ref.offset>4095*scalefactor) or
+           ((ref.offset>255) and
+            ((ref.offset mod scalefactor)<>0)) or
+           (ref.offset<-256) then
+          internalerror(2014120302);
         case getregtype(r) of
         case getregtype(r) of
-          R_INTREGISTER :
-            result:=taicpu.op_reg_ref(A_LDR,r,ref);
-          R_MMREGISTER :
-            begin
-              case getsubreg(r) of
-                R_SUBFD:
-                  op:=A_LDR;
-                R_SUBFS:
-                  op:=A_LDR;
-                else
-                  internalerror(2009112905);
-              end;
-              result:=taicpu.op_reg_ref(op,r,ref);
-            end;
+          R_INTREGISTER,
+          R_MMREGISTER:
+            result:=taicpu.op_reg_ref(op,r,ref);
           else
           else
             internalerror(200401041);
             internalerror(200401041);
         end;
         end;
       end;
       end;
 
 
 
 
-    function spilling_create_store(r:tregister; const ref:treference):Taicpu;
+    function is_valid_load_symbol(op: tasmop; oppostfix: toppostfix; const ref: treference): tsimplereftype;
+      begin
+        result:=sr_complex;
+        if not assigned(ref.symboldata) and
+           not(ref.refaddr in [addr_gotpageoffset,addr_gotpage,addr_pageoffset,addr_page]) then
+          exit;
+        { can't use pre-/post-indexed mode here (makes no sense either) }
+        if ref.addressmode<>AM_OFFSET then
+          exit;
+        { "ldr literal" must be a 32/64 bit LDR and have a symbol }
+        if assigned(ref.symboldata) and
+           ((op<>A_LDR) or
+            not(oppostfix in [PF_NONE,PF_W,PF_SW]) or
+            not assigned(ref.symbol)) then
+          exit;
+        { if this is a (got) page offset load, we must have a base register and a
+          symbol }
+        if (ref.refaddr in [addr_gotpageoffset,addr_pageoffset]) and
+           (not assigned(ref.symbol) or
+            (ref.base=NR_NO) or
+            (ref.index<>NR_NO) or
+            (ref.offset<>0)) then
+          begin
+            result:=sr_internal_illegal;
+            exit;
+          end;
+        { cannot have base or index register (we generate these kind of
+          references internally, they should never end up here with an
+          extra base or offset) }
+        if (ref.refaddr in [addr_gotpage,addr_page]) and
+           (ref.base<>NR_NO) or
+           (ref.index<>NR_NO) then
+          begin
+            result:=sr_internal_illegal;
+            exit;
+          end;
+        result:=sr_simple;
+      end;
+
+
+    function simple_ref_type(op: tasmop; size:tcgsize; oppostfix: toppostfix; const ref: treference): tsimplereftype;
       var
       var
-        op: tasmop;
+        maxoffs: asizeint;
+        accesssize: longint;
       begin
       begin
-        case getregtype(r) of
-          R_INTREGISTER :
-            result:=taicpu.op_reg_ref(A_STR,r,ref);
-          R_MMREGISTER :
-            begin
-              case getsubreg(r) of
-                R_SUBFD:
-                  op:=A_STR;
-                R_SUBFS:
-                  op:=A_STR;
+        result:=sr_internal_illegal;
+        { post-indexed is only allowed for vector and immediate loads/stores }
+        if (ref.addressmode=AM_POSTINDEXED) and
+           not(op in [A_LD1,A_LD2,A_LD3,A_LD4,A_ST1,A_ST2,A_ST3,A_ST4]) and
+           (not(op in [A_LDR,A_STR,A_LDP,A_STP]) or
+            (ref.base=NR_NO) or
+            (ref.index<>NR_NO)) then
+          exit;
+
+        { can only have a shift mode if we have an index }
+        if (ref.index=NR_NO) and
+           (ref.shiftmode<>SM_None) then
+          exit;
+
+        { the index can never be the stack pointer }
+        if ref.index=NR_SP then
+          exit;
+
+        { no instruction supports an index without a base }
+        if (ref.base=NR_NO) and
+           (ref.index<>NR_NO) then
+          begin
+            result:=sr_complex;
+            exit;
+          end;
+
+        { LDR literal or GOT entry: 32 or 64 bit, label }
+        if assigned(ref.symboldata) or
+           assigned(ref.symbol) then
+          begin
+            { we generate these kind of references internally; at least for now,
+              they should never end up here with an extra base or offset or so }
+            result:=is_valid_load_symbol(op,oppostfix,ref);
+            exit;
+          end;
+
+        { any other reference cannot be gotpage/gotpageoffset/pic }
+        if ref.refaddr in [addr_gotpage,addr_gotpageoffset,addr_page,addr_pageoffset,addr_pic] then
+          exit;
+
+        { base & index:
+            * index cannot be the stack pointer
+            * offset must be 0
+            * can scale with the size of the access
+            * can zero/sign extend 32 bit index register, and/or multiple by
+              access size
+            * no pre/post-indexing
+        }
+        if (ref.base<>NR_NO) and
+           (ref.index<>NR_NO) then
+          begin
+            if ref.addressmode in [AM_PREINDEXED,AM_POSTINDEXED] then
+              exit;
+            case op of
+              { this holds for both integer and fpu/vector loads }
+              A_LDR,A_STR:
+                if (ref.offset=0) and
+                   (((ref.shiftmode=SM_None) and
+                     (ref.shiftimm=0)) or
+                    ((ref.shiftmode in [SM_LSL,SM_UXTW,SM_SXTW]) and
+                     (ref.shiftimm=tcgsizep2size[size]))) then
+                  result:=sr_simple
                 else
                 else
-                  internalerror(2009112904);
-              end;
-              result:=taicpu.op_reg_ref(op,r,ref);
+                  result:=sr_complex;
+              { todo }
+              A_LD1,A_LD2,A_LD3,A_LD4,
+              A_ST1,A_ST2,A_ST3,A_ST4:
+                internalerror(2014110704);
+              { these don't support base+index }
+              A_LDUR,A_STUR,
+              A_LDP,A_STP:
+                result:=sr_complex;
+              else
+                { nothing: result is already sr_internal_illegal };
             end;
             end;
+            exit;
+          end;
+
+        { base + immediate offset. Variants:
+            * LDR*/STR*:
+              - pre- or post-indexed with signed 9 bit immediate
+              - regular with unsiged scaled immediate (multiple of access
+                size), in the range 0 to (12 bit * access_size)-1
+            * LDP/STP
+              - pre- or post-indexed with signed 9 bit immediate
+              - regular with signed 9 bit immediate
+            * LDUR*/STUR*:
+              - regular with signed 9 bit immediate
+        }
+        if ref.base<>NR_NO then
+          begin
+            accesssize:=1 shl tcgsizep2size[size];
+            case op of
+              A_LDR,A_STR:
+                begin
+                  if (ref.addressmode=AM_OFFSET) and
+                     (ref.offset>=0) and
+                     (ref.offset<(((1 shl 12)-1)*accesssize)) and
+                     ((ref.offset mod accesssize)=0) then
+                    result:=sr_simple
+                  else if (ref.offset>=-256) and
+                     (ref.offset<=255) then
+                    begin
+                      { non pre-/post-indexed regular loads/stores can only be
+                        performed using LDUR/STUR }
+                      if ref.addressmode in [AM_PREINDEXED,AM_POSTINDEXED] then
+                        result:=sr_simple
+                      else
+                        result:=sr_complex
+                    end
+                  else
+                    result:=sr_complex;
+                end;
+              A_LDP,A_LDNP,
+              A_STP,A_STNP:
+                begin
+                  { only supported for 32/64 bit }
+                  if not(oppostfix in [PF_W,PF_SW,PF_None]) then
+                    exit;
+                  { offset must be a multple of the access size }
+                  if (ref.offset mod accesssize)<>0 then
+                    exit;
+                  { offset must fit in a signed 7 bit offset }
+                  if (ref.offset>=-(1 shl (6+tcgsizep2size[size]))) and
+                     (ref.offset<=(1 shl (6+tcgsizep2size[size]))-1) then
+                    result:=sr_simple
+                  else
+                    result:=sr_complex;
+                end;
+              A_LDUR,A_STUR:
+                begin
+                  if (ref.addressmode=AM_OFFSET) and
+                     (ref.offset>=-256) and
+                     (ref.offset<=255) then
+                    result:=sr_simple
+                  else
+                    result:=sr_complex;
+                end;
+              { todo }
+              A_LD1,A_LD2,A_LD3,A_LD4,
+              A_ST1,A_ST2,A_ST3,A_ST4:
+                internalerror(2014110907);
+              A_LDAR,
+              A_LDAXR,
+              A_LDXR,
+              A_LDXP,
+              A_STLR,
+              A_STLXR,
+              A_STLXP,
+              A_STXP,
+              A_STXR:
+                begin
+                  if (ref.addressmode=AM_OFFSET) and
+                     (ref.offset=0) then
+                    result:=sr_simple;
+                end
+              else
+                { nothing: result is already sr_internal_illegal };
+            end;
+            exit;
+          end;
+        { absolute addresses are not supported, have to load them first into
+          a register }
+        result:=sr_complex;
+      end;
+
+
+    function can_be_shifter_operand(opc: tasmop; opnr: longint): boolean;
+      begin
+        case opc of
+          A_ADD,
+          A_AND,
+          A_EON,
+          A_EOR,
+          A_ORN,
+          A_ORR,
+          A_SUB:
+            result:=opnr=3;
+          A_BIC,
+          A_CMN,
+          A_CMP,
+          A_MOVK,
+          A_MOVZ,
+          A_MOVN,
+          A_MVN,
+          A_NEG,
+          A_TST:
+            result:=opnr=2;
           else
           else
-            internalerror(200401041);
+            result:=false;
         end;
         end;
       end;
       end;
 
 
 
 
+    function valid_shifter_operand(opc: tasmop; useszr, usessp, is64bit: boolean; sm: tshiftmode; shiftimm: longint): boolean;
+      begin
+        case opc of
+          A_ADD,
+          A_SUB,
+          A_NEG,
+          A_AND,
+          A_TST,
+          A_CMN,
+          A_CMP:
+            begin
+              result:=false;
+              if not useszr then
+                result:=
+                  (sm in shiftedregmodes) and
+                  ((shiftimm in [0..31]) or
+                   (is64bit and
+                    (shiftimm in [32..63])));
+              if not usessp then
+                result:=
+                  result or
+                  ((sm in extendedregmodes) and
+                   (shiftimm in [0..4]));
+            end;
+          A_BIC,
+          A_EON,
+          A_EOR,
+          A_MVN,
+          A_ORN,
+          A_ORR:
+            result:=
+              (sm in shiftedregmodes) and
+              (shiftimm in [0..31*(ord(is64bit)+1)+ord(is64bit)]);
+          A_MOVK,
+          A_MOVZ,
+          A_MOVN:
+            result:=
+              (sm=SM_LSL) and
+              ((shiftimm in [0,16]) or
+               (is64bit and
+                (shiftimm in [32,48])));
+          else
+            result:=false;
+        end;
+      end;
+
+
+    function spilling_create_load(const ref: treference; r: tregister): taicpu;
+      var
+        op: tasmop;
+      begin
+        if (ref.index<>NR_NO) or
+           (ref.offset<-256) or
+           (ref.offset>255) then
+          op:=A_LDR
+        else
+          op:=A_LDUR;
+        result:=spilling_create_op(op,ref,r);
+      end;
+
+
+    function spilling_create_store(r: tregister; const ref: treference): taicpu;
+      var
+        op: tasmop;
+      begin
+        if (ref.index<>NR_NO) or
+           (ref.offset<-256) or
+           (ref.offset>255) then
+          op:=A_STR
+        else
+          op:=A_STUR;
+        result:=spilling_create_op(op,ref,r);
+      end;
+
+
     function taicpu.spilling_get_operation_type(opnr: longint): topertype;
     function taicpu.spilling_get_operation_type(opnr: longint): topertype;
       begin
       begin
         case opcode of
         case opcode of
-          A_ADC,A_ADD,A_AND,A_BIC,
-          A_EOR,A_CLZ,A_RBIT,
-          A_LDR,
-          A_MOV,A_MVN,A_MUL,
-          A_ORR,A_SBC,A_SUB,
-          A_UXT,A_SXT:
+          A_B,A_BL,
+          A_CMN,A_CMP,
+          A_CCMN,A_CCMP,
+          A_TST:
+            result:=operand_read;
+          A_STR,A_STUR:
             if opnr=0 then
             if opnr=0 then
-              result:=operand_write
+              result:=operand_read
             else
             else
+              { check for pre/post indexed in spilling_get_operation_type_ref }
               result:=operand_read;
               result:=operand_read;
-          A_B,A_BL,
-          A_CMN,A_CMP,A_TST:
-            result:=operand_read;
-          A_STR:
-            { important is what happens with the involved registers }
+          A_STLXP,
+          A_STLXR,
+          A_STXP,
+          A_STXR:
             if opnr=0 then
             if opnr=0 then
-              result := operand_read
+              result:=operand_write
             else
             else
-              { check for pre/post indexed }
-              result := operand_read;
-          else
-            internalerror(200403151);
+              result:=operand_read;
+          A_STP:
+            begin
+              if opnr in [0,1] then
+                result:=operand_read
+              else
+                { check for pre/post indexed in spilling_get_operation_type_ref }
+                result:=operand_read;
+            end;
+           A_LDP,
+           A_LDXP:
+             begin
+               if opnr in [0,1] then
+                 result:=operand_write
+               else
+                 { check for pre/post indexed in spilling_get_operation_type_ref }
+                 result:=operand_read;
+             end;
+           else
+             if opnr=0 then
+               result:=operand_write
+             else
+               result:=operand_read;
         end;
         end;
       end;
       end;
 
 
 
 
+    function taicpu.spilling_get_operation_type_ref(opnr: longint; reg: tregister): topertype;
+      begin
+        result:=operand_read;
+        if (oper[opnr]^.ref^.base = reg) and
+          (oper[opnr]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) then
+           result:=operand_readwrite;
+      end;
+
+
     procedure BuildInsTabCache;
     procedure BuildInsTabCache;
       var
       var
         i : longint;
         i : longint;
@@ -1069,22 +1459,12 @@ implementation
         { we need to reset everything here, because the choosen insentry
         { we need to reset everything here, because the choosen insentry
           can be invalid for a new situation where the previously optimized
           can be invalid for a new situation where the previously optimized
           insentry is not correct }
           insentry is not correct }
-        InsEntry:=nil;
-        InsSize:=0;
-        LastInsOffset:=-1;
       end;
       end;
 
 
 
 
     procedure taicpu.ResetPass2;
     procedure taicpu.ResetPass2;
       begin
       begin
         { we are here in a second pass, check if the instruction can be optimized }
         { we are here in a second pass, check if the instruction can be optimized }
-        if assigned(InsEntry) and
-           ((InsEntry^.flags and IF_PASS2)<>0) then
-         begin
-           InsEntry:=nil;
-           InsSize:=0;
-         end;
-        LastInsOffset:=-1;
       end;
       end;
 
 
 
 
@@ -1097,18 +1477,15 @@ implementation
     function taicpu.Pass1(objdata:TObjData):longint;
     function taicpu.Pass1(objdata:TObjData):longint;
       begin
       begin
         Pass1:=0;
         Pass1:=0;
-        LastInsOffset:=-1;
       end;
       end;
 
 
 
 
     procedure taicpu.Pass2(objdata:TObjData);
     procedure taicpu.Pass2(objdata:TObjData);
       begin
       begin
         { error in pass1 ? }
         { error in pass1 ? }
-        if insentry=nil then
-         exit;
         current_filepos:=fileinfo;
         current_filepos:=fileinfo;
         { Generate the instruction }
         { Generate the instruction }
-        GenCode(objdata);
+        { GenCode(objdata); }
       end;
       end;
 
 
 
 
@@ -1132,1046 +1509,6 @@ implementation
       end;
       end;
 
 
 
 
-    function  taicpu.InsEnd:longint;
-      begin
-        Result:=0; { unimplemented }
-      end;
-
-
-    procedure taicpu.create_ot(objdata:TObjData);
-      begin
-      end;
-
-
-    function taicpu.Matches(p:PInsEntry):longint;
-      begin
-      end;
-
-
-    function  taicpu.calcsize(p:PInsEntry):shortint;
-      begin
-        result:=4;
-      end;
-
-
-    function  taicpu.NeedAddrPrefix(opidx:byte):boolean;
-      begin
-        Result:=False; { unimplemented }
-      end;
-
-
-    procedure taicpu.Swapoperands;
-      begin
-      end;
-
-
-    function taicpu.FindInsentry(objdata:TObjData):boolean;
-      begin
-      end;
-
-
-    procedure taicpu.gencode(objdata:TObjData);
-      var
-        bytes : dword;
-        i_field : byte;
-
-      procedure setshifterop(op : byte);
-        begin
-          case oper[op]^.typ of
-            top_const:
-              begin
-                i_field:=1;
-                bytes:=bytes or dword(oper[op]^.val and $fff);
-              end;
-            top_reg:
-              begin
-                i_field:=0;
-                bytes:=bytes or (getsupreg(oper[op]^.reg) shl 16);
-
-                { does a real shifter op follow? }
-                if (op+1<=op) and (oper[op+1]^.typ=top_shifterop) then
-                  begin
-                  end;
-              end;
-          else
-            internalerror(2005091103);
-          end;
-        end;
-
-      begin
-        bytes:=$0;
-        { evaluate and set condition code }
-
-        { condition code allowed? }
-
-        { setup rest of the instruction }
-        case insentry^.code[0] of
-          #$08:
-            begin
-              { set instruction code }
-              bytes:=bytes or (ord(insentry^.code[1]) shl 26);
-              bytes:=bytes or (ord(insentry^.code[2]) shl 21);
-
-              { set destination }
-              bytes:=bytes or (getsupreg(oper[0]^.reg) shl 12);
-
-              { create shifter op }
-              setshifterop(1);
-
-              { set i field }
-              bytes:=bytes or (i_field shl 25);
-
-              { set s if necessary }
-              if oppostfix=PF_S then
-                bytes:=bytes or (1 shl 20);
-            end;
-          #$ff:
-            internalerror(2005091101);
-          else
-            internalerror(2005091102);
-        end;
-        { we're finished, write code }
-        objdata.writebytes(bytes,sizeof(bytes));
-      end;
-
-
-{$ifdef dummy}
-(*
-static void gencode (long segment, long offset, int bits,
-                     insn *ins, char *codes, long insn_end)
-{
-    int has_S_code;             /* S - setflag */
-    int has_B_code;             /* B - setflag */
-    int has_T_code;             /* T - setflag */
-    int has_W_code;             /* ! => W flag */
-    int has_F_code;             /* ^ => S flag */
-    int keep;
-    unsigned char c;
-    unsigned char bytes[4];
-    long          data, size;
-    static int cc_code[] =      /* bit pattern of cc */
-  {                             /* order as enum in  */
-    0x0E, 0x03, 0x02, 0x00,     /* nasm.h            */
-    0x0A, 0x0C, 0x08, 0x0D,
-    0x09, 0x0B, 0x04, 0x01,
-    0x05, 0x07, 0x06,
-  };
-
-
-#ifdef DEBUG
-static char *CC[] =
-  {                                    /* condition code names */
-    "AL", "CC", "CS", "EQ",
-    "GE", "GT", "HI", "LE",
-    "LS", "LT", "MI", "NE",
-    "PL", "VC", "VS", "",
-    "S"
-};
-
-
-    has_S_code = (ins->condition & C_SSETFLAG);
-    has_B_code = (ins->condition & C_BSETFLAG);
-    has_T_code = (ins->condition & C_TSETFLAG);
-    has_W_code = (ins->condition & C_EXSETFLAG);
-    has_F_code = (ins->condition & C_FSETFLAG);
-    ins->condition = (ins->condition & 0x0F);
-
-
-    if (rt_debug)
-      {
-    printf ("gencode: instruction: %s%s", insn_names[ins->opcode],
-            CC[ins->condition & 0x0F]);
-    if (has_S_code)
-      printf ("S");
-    if (has_B_code)
-      printf ("B");
-    if (has_T_code)
-      printf ("T");
-    if (has_W_code)
-      printf ("!");
-    if (has_F_code)
-      printf ("^");
-
-    printf ("\n");
-
-    c = *codes;
-
-    printf ("   (%d)  decode - '0x%02X'\n", ins->operands, c);
-
-
-    bytes[0] = 0xB;
-    bytes[1] = 0xE;
-    bytes[2] = 0xE;
-    bytes[3] = 0xF;
-      }
-
-    // First condition code in upper nibble
-    if (ins->condition < C_NONE)
-      {
-        c = cc_code[ins->condition] << 4;
-      }
-    else
-      {
-        c = cc_code[C_AL] << 4; // is often ALWAYS but not always
-      }
-
-
-    switch (keep = *codes)
-      {
-        case 1:
-          // B, BL
-          ++codes;
-          c |= *codes++;
-          bytes[0] = c;
-
-          if (ins->oprs[0].segment != segment)
-            {
-              // fais une relocation
-              c = 1;
-              data = 0; // Let the linker locate ??
-            }
-          else
-            {
-              c = 0;
-              data = ins->oprs[0].offset - (offset + 8);
-
-              if (data % 4)
-                {
-                  errfunc (ERR_NONFATAL, "offset not aligned on 4 bytes");
-                }
-            }
-
-          if (data >= 0x1000)
-            {
-              errfunc (ERR_NONFATAL, "too long offset");
-            }
-
-          data = data >> 2;
-          bytes[1] = (data >> 16) & 0xFF;
-          bytes[2] = (data >> 8)  & 0xFF;
-          bytes[3] = (data )      & 0xFF;
-
-          if (c == 1)
-            {
-//            out (offset, segment, &bytes[0], OUT_RAWDATA+1, NO_SEG, NO_SEG);
-              out (offset, segment, &bytes[0], OUT_REL3ADR+4, ins->oprs[0].segment, NO_SEG);
-            }
-          else
-            {
-              out (offset, segment, &bytes[0], OUT_RAWDATA+4, NO_SEG, NO_SEG);
-            }
-          return;
-
-        case 2:
-          // SWI
-          ++codes;
-          c |= *codes++;
-          bytes[0] = c;
-          data = ins->oprs[0].offset;
-          bytes[1] = (data >> 16) & 0xFF;
-          bytes[2] = (data >> 8) & 0xFF;
-          bytes[3] = (data) & 0xFF;
-          out (offset, segment, &bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
-          return;
-        case 3:
-          // BX
-          ++codes;
-          c |= *codes++;
-          bytes[0] = c;
-          bytes[1] = *codes++;
-          bytes[2] = *codes++;
-          bytes[3] = *codes++;
-          c = regval (&ins->oprs[0],1);
-          if (c == 15)  // PC
-            {
-              errfunc (ERR_WARNING, "'BX' with R15 has undefined behaviour");
-            }
-          else if (c > 15)
-            {
-              errfunc (ERR_NONFATAL, "Illegal register specified for 'BX'");
-            }
-
-          bytes[3] |= (c & 0x0F);
-          out (offset, segment, bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
-          return;
-
-        case 4:         // AND Rd,Rn,Rm
-        case 5:         // AND Rd,Rn,Rm,<shift>Rs
-        case 6:         // AND Rd,Rn,Rm,<shift>imm
-        case 7:         // AND Rd,Rn,<shift>imm
-          ++codes;
-#ifdef DEBUG
-          if (rt_debug)
-            {
-              printf ("         decode - '0x%02X'\n", keep);
-              printf ("           code - '0x%02X'\n", (unsigned char) ( *codes));
-            }
-#endif
-          bytes[0] = c | *codes;
-          ++codes;
-
-          bytes[1] = *codes;
-          if (has_S_code)
-            bytes[1] |= 0x10;
-          c = regval (&ins->oprs[1],1);
-          // Rn in low nibble
-          bytes[1] |= c;
-
-          // Rd in high nibble
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-
-          if (keep != 7)
-            {
-              // Rm in low nibble
-              bytes[3] = regval (&ins->oprs[2],1);
-            }
-
-          // Shifts if any
-          if (keep == 5 || keep == 6)
-            {
-              // Shift in bytes 2 and 3
-              if (keep == 5)
-                {
-                  // Rs
-                  c = regval (&ins->oprs[3],1);
-                  bytes[2] |= c;
-
-                  c = 0x10;             // Set bit 4 in byte[3]
-                }
-              if (keep == 6)
-                {
-                  c = (ins->oprs[3].offset) & 0x1F;
-
-                  // #imm
-                  bytes[2] |= c >> 1;
-                  if (c & 0x01)
-                    {
-                      bytes[3] |= 0x80;
-                    }
-                  c = 0;                // Clr bit 4 in byte[3]
-                }
-              // <shift>
-              c |= shiftval (&ins->oprs[3]) << 5;
-
-              bytes[3] |= c;
-            }
-
-          // reg,reg,imm
-          if (keep == 7)
-            {
-              int shimm;
-
-              shimm = imm_shift (ins->oprs[2].offset);
-
-              if (shimm == -1)
-                {
-                  errfunc (ERR_NONFATAL, "cannot create that constant");
-                }
-              bytes[3] = shimm & 0xFF;
-              bytes[2] |= (shimm & 0xF00) >> 8;
-            }
-
-          out (offset, segment, bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
-          return;
-
-        case 8:         // MOV Rd,Rm
-        case 9:         // MOV Rd,Rm,<shift>Rs
-        case 0xA:       // MOV Rd,Rm,<shift>imm
-        case 0xB:       // MOV Rd,<shift>imm
-          ++codes;
-#ifdef DEBUG
-          if (rt_debug)
-            {
-              printf ("         decode - '0x%02X'\n", keep);
-              printf ("           code - '0x%02X'\n", (unsigned char) ( *codes));
-            }
-#endif
-          bytes[0] = c | *codes;
-          ++codes;
-
-          bytes[1] = *codes;
-          if (has_S_code)
-            bytes[1] |= 0x10;
-
-          // Rd in high nibble
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-
-          if (keep != 0x0B)
-            {
-              // Rm in low nibble
-              bytes[3] = regval (&ins->oprs[1],1);
-            }
-
-          // Shifts if any
-          if (keep == 0x09 || keep == 0x0A)
-            {
-              // Shift in bytes 2 and 3
-              if (keep == 0x09)
-                {
-                  // Rs
-                  c = regval (&ins->oprs[2],1);
-                  bytes[2] |= c;
-
-                  c = 0x10;             // Set bit 4 in byte[3]
-                }
-              if (keep == 0x0A)
-                {
-                  c = (ins->oprs[2].offset) & 0x1F;
-
-                  // #imm
-                  bytes[2] |= c >> 1;
-                  if (c & 0x01)
-                    {
-                      bytes[3] |= 0x80;
-                    }
-                  c = 0;                // Clr bit 4 in byte[3]
-                }
-              // <shift>
-              c |= shiftval (&ins->oprs[2]) << 5;
-
-              bytes[3] |= c;
-            }
-
-          // reg,imm
-          if (keep == 0x0B)
-            {
-              int shimm;
-
-              shimm = imm_shift (ins->oprs[1].offset);
-
-              if (shimm == -1)
-                {
-                  errfunc (ERR_NONFATAL, "cannot create that constant");
-                }
-              bytes[3] = shimm & 0xFF;
-              bytes[2] |= (shimm & 0xF00) >> 8;
-            }
-
-          out (offset, segment, bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
-          return;
-
-
-        case 0xC:       // CMP Rn,Rm
-        case 0xD:       // CMP Rn,Rm,<shift>Rs
-        case 0xE:       // CMP Rn,Rm,<shift>imm
-        case 0xF:       // CMP Rn,<shift>imm
-          ++codes;
-
-          bytes[0] = c | *codes++;
-
-          bytes[1] = *codes;
-
-          // Implicit S code
-          bytes[1] |= 0x10;
-
-          c = regval (&ins->oprs[0],1);
-          // Rn in low nibble
-          bytes[1] |= c;
-
-          // No destination
-          bytes[2] = 0;
-
-          if (keep != 0x0B)
-            {
-              // Rm in low nibble
-              bytes[3] = regval (&ins->oprs[1],1);
-            }
-
-          // Shifts if any
-          if (keep == 0x0D || keep == 0x0E)
-            {
-              // Shift in bytes 2 and 3
-              if (keep == 0x0D)
-                {
-                  // Rs
-                  c = regval (&ins->oprs[2],1);
-                  bytes[2] |= c;
-
-                  c = 0x10;             // Set bit 4 in byte[3]
-                }
-              if (keep == 0x0E)
-                {
-                  c = (ins->oprs[2].offset) & 0x1F;
-
-                  // #imm
-                  bytes[2] |= c >> 1;
-                  if (c & 0x01)
-                    {
-                      bytes[3] |= 0x80;
-                    }
-                  c = 0;                // Clr bit 4 in byte[3]
-                }
-              // <shift>
-              c |= shiftval (&ins->oprs[2]) << 5;
-
-              bytes[3] |= c;
-            }
-
-          // reg,imm
-          if (keep == 0x0F)
-            {
-              int shimm;
-
-              shimm = imm_shift (ins->oprs[1].offset);
-
-              if (shimm == -1)
-                {
-                  errfunc (ERR_NONFATAL, "cannot create that constant");
-                }
-              bytes[3] = shimm & 0xFF;
-              bytes[2] |= (shimm & 0xF00) >> 8;
-            }
-
-          out (offset, segment, bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
-          return;
-
-        case 0x10:      // MRS Rd,<psr>
-          ++codes;
-
-          bytes[0] = c | *codes++;
-
-          bytes[1] = *codes++;
-
-          // Rd
-          c = regval (&ins->oprs[0],1);
-
-          bytes[2] = c << 4;
-
-          bytes[3] = 0;
-
-          c = ins->oprs[1].basereg;
-
-          if (c == R_CPSR || c == R_SPSR)
-            {
-              if (c == R_SPSR)
-                {
-                  bytes[1] |= 0x40;
-                }
-            }
-          else
-            {
-              errfunc (ERR_NONFATAL, "CPSR or SPSR expected");
-            }
-
-          out (offset, segment, bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
-
-          return;
-
-        case 0x11:      // MSR <psr>,Rm
-        case 0x12:      // MSR <psrf>,Rm
-        case 0x13:      // MSR <psrf>,#expression
-          ++codes;
-
-          bytes[0] = c | *codes++;
-
-          bytes[1] = *codes++;
-
-          bytes[2] = *codes;
-
-
-          if (keep == 0x11 || keep == 0x12)
-            {
-              // Rm
-              c = regval (&ins->oprs[1],1);
-
-              bytes[3] = c;
-            }
-          else
-            {
-              int shimm;
-
-              shimm = imm_shift (ins->oprs[1].offset);
-
-              if (shimm == -1)
-                {
-                  errfunc (ERR_NONFATAL, "cannot create that constant");
-                }
-              bytes[3] = shimm & 0xFF;
-              bytes[2] |= (shimm & 0xF00) >> 8;
-            }
-
-          c = ins->oprs[0].basereg;
-
-          if ( keep == 0x11)
-            {
-              if ( c == R_CPSR || c == R_SPSR)
-                {
-                if ( c== R_SPSR)
-                  {
-                    bytes[1] |= 0x40;
-                  }
-                }
-            else
-              {
-                errfunc (ERR_NONFATAL, "CPSR or SPSR expected");
-              }
-            }
-          else
-            {
-              if ( c == R_CPSR_FLG || c == R_SPSR_FLG)
-                {
-                  if ( c== R_SPSR_FLG)
-                    {
-                      bytes[1] |= 0x40;
-                    }
-                }
-              else
-                {
-                  errfunc (ERR_NONFATAL, "CPSR_flg or SPSR_flg expected");
-                }
-            }
-          break;
-
-        case 0x14:      // MUL  Rd,Rm,Rs
-        case 0x15:      // MULA Rd,Rm,Rs,Rn
-          ++codes;
-
-          bytes[0] = c | *codes++;
-
-          bytes[1] = *codes++;
-
-          bytes[3] = *codes;
-
-          // Rd
-          bytes[1] |= regval (&ins->oprs[0],1);
-          if (has_S_code)
-            bytes[1] |= 0x10;
-
-          // Rm
-          bytes[3] |= regval (&ins->oprs[1],1);
-
-          // Rs
-          bytes[2] = regval (&ins->oprs[2],1);
-
-          if (keep == 0x15)
-            {
-              bytes[2] |= regval (&ins->oprs[3],1) << 4;
-            }
-          break;
-
-        case 0x16:      // SMLAL RdHi,RdLo,Rm,Rs
-          ++codes;
-
-          bytes[0] = c | *codes++;
-
-          bytes[1] = *codes++;
-
-          bytes[3] = *codes;
-
-          // RdHi
-          bytes[1] |= regval (&ins->oprs[1],1);
-          if (has_S_code)
-            bytes[1] |= 0x10;
-
-          // RdLo
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-          // Rm
-          bytes[3] |= regval (&ins->oprs[2],1);
-
-          // Rs
-          bytes[2] |= regval (&ins->oprs[3],1);
-
-          break;
-
-        case 0x17:      // LDR Rd, expression
-          ++codes;
-
-          bytes[0] = c | *codes++;
-
-          bytes[1] = *codes++;
-
-          // Rd
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-          if (has_B_code)
-            bytes[1] |= 0x40;
-          if (has_T_code)
-            {
-              errfunc (ERR_NONFATAL, "'T' not allowed in pre-index mode");
-            }
-          if (has_W_code)
-            {
-              errfunc (ERR_NONFATAL, "'!' not allowed");
-            }
-
-          // Rn - implicit R15
-          bytes[1] |= 0xF;
-
-          if (ins->oprs[1].segment != segment)
-            {
-              errfunc (ERR_NONFATAL, "label not in same segment");
-            }
-
-          data = ins->oprs[1].offset - (offset + 8);
-
-          if (data < 0)
-            {
-              data = -data;
-            }
-          else
-            {
-              bytes[1] |= 0x80;
-            }
-
-          if (data >= 0x1000)
-            {
-              errfunc (ERR_NONFATAL, "too long offset");
-            }
-
-          bytes[2] |= ((data & 0xF00) >> 8);
-          bytes[3] = data & 0xFF;
-          break;
-
-        case 0x18:      // LDR Rd, [Rn]
-          ++codes;
-
-          bytes[0] = c | *codes++;
-
-          bytes[1] = *codes++;
-
-          // Rd
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-          if (has_B_code)
-            bytes[1] |= 0x40;
-          if (has_T_code)
-            {
-              bytes[1] |= 0x20;         // write-back
-            }
-          else
-            {
-              bytes[0] |= 0x01;         // implicit pre-index mode
-            }
-
-          if (has_W_code)
-            {
-              bytes[1] |= 0x20;         // write-back
-            }
-
-          // Rn
-          c = regval (&ins->oprs[1],1);
-          bytes[1] |= c;
-
-          if (c == 0x15)                // R15
-            data = -8;
-          else
-            data = 0;
-
-          if (data < 0)
-            {
-              data = -data;
-            }
-          else
-            {
-              bytes[1] |= 0x80;
-            }
-
-          bytes[2] |= ((data & 0xF00) >> 8);
-          bytes[3] = data & 0xFF;
-          break;
-
-        case 0x19:      // LDR Rd, [Rn,#expression]
-        case 0x20:      // LDR Rd, [Rn,Rm]
-        case 0x21:      // LDR Rd, [Rn,Rm,shift]
-          ++codes;
-
-          bytes[0] = c | *codes++;
-
-          bytes[1] = *codes++;
-
-          // Rd
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-          if (has_B_code)
-            bytes[1] |= 0x40;
-
-          // Rn
-          c = regval (&ins->oprs[1],1);
-          bytes[1] |= c;
-
-          if (ins->oprs[ins->operands-1].bracket)       // FIXME: Bracket on last operand -> pre-index  <--
-            {
-              bytes[0] |= 0x01;         // pre-index mode
-              if (has_W_code)
-                {
-                  bytes[1] |= 0x20;
-                }
-              if (has_T_code)
-                {
-                  errfunc (ERR_NONFATAL, "'T' not allowed in pre-index mode");
-                }
-            }
-          else
-            {
-              if (has_T_code)           // Forced write-back in post-index mode
-                {
-                  bytes[1] |= 0x20;
-                }
-              if (has_W_code)
-                {
-                  errfunc (ERR_NONFATAL, "'!' not allowed in post-index mode");
-                }
-            }
-
-          if (keep == 0x19)
-            {
-              data = ins->oprs[2].offset;
-
-              if (data < 0)
-                {
-                  data = -data;
-                }
-              else
-                {
-                  bytes[1] |= 0x80;
-                }
-
-              if (data >= 0x1000)
-                {
-                  errfunc (ERR_NONFATAL, "too long offset");
-                }
-
-              bytes[2] |= ((data & 0xF00) >> 8);
-              bytes[3] = data & 0xFF;
-            }
-          else
-            {
-              if (ins->oprs[2].minus == 0)
-                {
-                  bytes[1] |= 0x80;
-                }
-              c = regval (&ins->oprs[2],1);
-              bytes[3] = c;
-
-              if (keep == 0x21)
-                {
-                  c = ins->oprs[3].offset;
-                  if (c > 0x1F)
-                    {
-                      errfunc (ERR_NONFATAL, "too large shiftvalue");
-                      c = c & 0x1F;
-                    }
-
-                  bytes[2] |= c >> 1;
-                  if (c & 0x01)
-                    {
-                      bytes[3] |= 0x80;
-                    }
-                  bytes[3] |= shiftval (&ins->oprs[3]) << 5;
-                }
-            }
-
-          break;
-
-        case 0x22:      // LDRH Rd, expression
-          ++codes;
-
-          bytes[0] = c | 0x01;          // Implicit pre-index
-
-          bytes[1] = *codes++;
-
-          // Rd
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-
-          // Rn - implicit R15
-          bytes[1] |= 0xF;
-
-          if (ins->oprs[1].segment != segment)
-            {
-              errfunc (ERR_NONFATAL, "label not in same segment");
-            }
-
-          data = ins->oprs[1].offset - (offset + 8);
-
-          if (data < 0)
-            {
-              data = -data;
-            }
-          else
-            {
-              bytes[1] |= 0x80;
-            }
-
-          if (data >= 0x100)
-            {
-              errfunc (ERR_NONFATAL, "too long offset");
-            }
-          bytes[3] = *codes++;
-
-          bytes[2] |= ((data & 0xF0) >> 4);
-          bytes[3] |= data & 0xF;
-          break;
-
-        case 0x23:      // LDRH Rd, Rn
-          ++codes;
-
-          bytes[0] = c | 0x01;          // Implicit pre-index
-
-          bytes[1] = *codes++;
-
-          // Rd
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-
-          // Rn
-          c = regval (&ins->oprs[1],1);
-          bytes[1] |= c;
-
-          if (c == 0x15)                // R15
-            data = -8;
-          else
-            data = 0;
-
-          if (data < 0)
-            {
-              data = -data;
-            }
-          else
-            {
-              bytes[1] |= 0x80;
-            }
-
-          if (data >= 0x100)
-            {
-              errfunc (ERR_NONFATAL, "too long offset");
-            }
-          bytes[3] = *codes++;
-
-          bytes[2] |= ((data & 0xF0) >> 4);
-          bytes[3] |= data & 0xF;
-          break;
-
-        case 0x24:      // LDRH Rd, Rn, expression
-        case 0x25:      // LDRH Rd, Rn, Rm
-          ++codes;
-
-          bytes[0] = c;
-
-          bytes[1] = *codes++;
-
-          // Rd
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-
-          // Rn
-          c = regval (&ins->oprs[1],1);
-          bytes[1] |= c;
-
-          if (ins->oprs[ins->operands-1].bracket)       // FIXME: Bracket on last operand -> pre-index  <--
-            {
-              bytes[0] |= 0x01;         // pre-index mode
-              if (has_W_code)
-                {
-                  bytes[1] |= 0x20;
-                }
-            }
-          else
-            {
-              if (has_W_code)
-                {
-                  errfunc (ERR_NONFATAL, "'!' not allowed in post-index mode");
-                }
-            }
-
-          bytes[3] = *codes++;
-
-          if (keep == 0x24)
-            {
-              data = ins->oprs[2].offset;
-
-              if (data < 0)
-                {
-                  data = -data;
-                }
-              else
-                {
-                  bytes[1] |= 0x80;
-                }
-
-              if (data >= 0x100)
-                {
-                  errfunc (ERR_NONFATAL, "too long offset");
-                }
-
-              bytes[2] |= ((data & 0xF0) >> 4);
-              bytes[3] |= data & 0xF;
-            }
-          else
-            {
-              if (ins->oprs[2].minus == 0)
-                {
-                  bytes[1] |= 0x80;
-                }
-              c = regval (&ins->oprs[2],1);
-              bytes[3] |= c;
-
-            }
-          break;
-
-        case 0x26:      // LDM/STM Rn, {reg-list}
-          ++codes;
-
-          bytes[0] = c;
-
-          bytes[0] |= ( *codes >> 4) & 0xF;
-          bytes[1] = ( *codes << 4) & 0xF0;
-          ++codes;
-
-          if (has_W_code)
-            {
-              bytes[1] |= 0x20;
-            }
-          if (has_F_code)
-            {
-              bytes[1] |= 0x40;
-            }
-
-          // Rn
-          bytes[1] |= regval (&ins->oprs[0],1);
-
-          data = ins->oprs[1].basereg;
-
-          bytes[2] = ((data >> 8) & 0xFF);
-          bytes[3] = (data & 0xFF);
-
-          break;
-
-        case 0x27:      // SWP Rd, Rm, [Rn]
-          ++codes;
-
-          bytes[0] = c;
-
-          bytes[0] |= *codes++;
-
-          bytes[1] = regval (&ins->oprs[2],1);
-          if (has_B_code)
-            {
-              bytes[1] |= 0x40;
-            }
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-          bytes[3] = *codes++;
-          bytes[3] |= regval (&ins->oprs[1],1);
-          break;
-
-        default:
-          errfunc (ERR_FATAL, "unknown decoding of instruction");
-
-          bytes[0] = c;
-          // And a fix nibble
-          ++codes;
-          bytes[0] |= *codes++;
-
-         if ( *codes == 0x01)           // An I bit
-           {
-
-           }
-         if ( *codes == 0x02)           // An I bit
-           {
-
-           }
-         ++codes;
-      }
-    out (offset, segment, bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
-}
-
-*)
-{$endif dummy}
-
 begin
 begin
   cai_align:=tai_align;
   cai_align:=tai_align;
 end.
 end.

+ 288 - 0
compiler/aarch64/agcpugas.pas

@@ -0,0 +1,288 @@
+{
+    Copyright (c) 2003,2014 by Florian Klaempfl and Jonas Maebe
+
+    This unit implements an asm for AArch64
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+{ This unit implements the GNU Assembler writer for AArch64
+}
+
+unit agcpugas;
+
+{$i fpcdefs.inc}
+
+  interface
+
+    uses
+       globtype,
+       aasmtai,
+       aggas,
+       cpubase,cpuinfo;
+
+    type
+      TAArch64InstrWriter=class(TCPUInstrWriter)
+        procedure WriteInstruction(hp : tai);override;
+      end;
+
+      TAArch64AppleAssembler=class(TAppleGNUassembler)
+        constructor create(smart: boolean); override;
+        function MakeCmdLine: TCmdStr; override;
+      end;
+
+
+    const
+      gas_shiftmode2str : array[tshiftmode] of string[4] = (
+        '','lsl','lsr','asr',
+        'uxtb','uxth','uxtw','uxtx',
+        'sxtb','sxth','sxtw','sxtx');
+
+    const 
+      cputype_to_gas_march : array[tcputype] of string = (
+        '', // cpu_none
+        'armv8'
+      );
+
+  implementation
+
+    uses
+       cutils,globals,verbose,
+       systems,
+       assemble,
+       aasmcpu,
+       itcpugas,
+       cgbase,cgutils;
+
+
+{****************************************************************************}
+{                      Apple AArch64 Assembler writer                        }
+{****************************************************************************}
+
+    constructor TAArch64AppleAssembler.create(smart: boolean);
+      begin
+        inherited create(smart);
+        InstrWriter := TAArch64InstrWriter.create(self);
+      end;
+
+    function TAArch64AppleAssembler.MakeCmdLine: TCmdStr;
+      begin
+        { 'as' calls through to clang for aarch64, and that one only supports
+          reading from standard input in case "-" is specified as input file
+          (in which case you also have to specify the language via -x) }
+        result:=inherited;
+{$ifdef hasunix}
+        if DoPipe then
+          result:=result+' -x assembler -'
+{$endif}
+      end;
+
+
+{****************************************************************************}
+{                  Helper routines for Instruction Writer                    }
+{****************************************************************************}
+
+    function getreferencestring(var ref : treference) : string;
+      const
+        darwin_addrpage2str: array[addr_page..addr_gotpageoffset] of string[11] =
+           ('@PAGE','@PAGEOFF','@GOTPAGE','@GOTPAGEOFF');
+      begin
+        if ref.base=NR_NO then
+          begin
+            case ref.refaddr of
+              addr_gotpage,
+              addr_page,
+              addr_gotpageoffset,
+              addr_pageoffset:
+                begin
+                  if not assigned(ref.symbol) or
+                     (ref.base<>NR_NO) or
+                     (ref.index<>NR_NO) or
+                     (ref.shiftmode<>SM_None) or
+                     (ref.offset<>0) then
+                    internalerror(2014121501);
+                  if target_asm.id=as_darwin then
+                    result:=ref.symbol.name+darwin_addrpage2str[ref.refaddr]
+                  else
+                    { todo }
+                    internalerror(2014121502);
+                end
+              else
+                internalerror(2015022301);
+            end
+          end
+        else
+          begin
+            result:='['+gas_regname(ref.base);
+            if ref.addressmode=AM_POSTINDEXED then
+              result:=result+']';
+            if ref.index<>NR_NO then
+              begin
+                if (ref.offset<>0) or
+                   assigned(ref.symbol) then
+                  internalerror(2014121504);
+                result:=result+', '+gas_regname(ref.index);
+                case ref.shiftmode of
+                  SM_None: ;
+                  SM_LSL,
+                  SM_UXTW, SM_UXTX, SM_SXTW, SM_SXTX:
+                    begin
+                      result:=result+', '+gas_shiftmode2str[ref.shiftmode];
+                      if (ref.shiftmode=SM_LSL) or
+                         (ref.shiftimm<>0) then
+                        result:=result+' #'+tostr(ref.shiftimm);
+                    end
+                  else
+                    internalerror(2014121505);
+                end;
+              end
+            else
+              begin
+                if assigned(ref.symbol) then
+                  begin
+                    case ref.refaddr of
+                      addr_gotpageoffset,
+                      addr_pageoffset:
+                        begin
+                          if target_asm.id=as_darwin then
+                            result:=result+', '+ref.symbol.name+darwin_addrpage2str[ref.refaddr]
+                          else
+                            { todo }
+                            internalerror(2014122510);
+                        end
+                      else
+                        { todo: not yet generated/don't know syntax }
+                        internalerror(2014121506);
+                    end;
+                  end
+                else
+                  begin
+                    if ref.refaddr<>addr_no then
+                      internalerror(2014121506);
+                    if (ref.offset<>0) then
+                      result:=result+', #'+tostr(ref.offset);
+                  end;
+              end;
+            case ref.addressmode of
+              AM_OFFSET:
+                result:=result+']';
+              AM_PREINDEXED:
+                result:=result+']!';
+            end;
+          end;
+      end;
+
+
+    function getopstr(hp: taicpu; opnr: longint; const o: toper): string;
+      begin
+        case o.typ of
+          top_reg:
+            { we cannot yet represent "umov w0, v4.s[0]" or "ins v4.d[0], x1",
+              so for now we use "s4" or "d4" instead -> translate here }
+            if ((hp.opcode=A_INS) or
+                (hp.opcode=A_UMOV)) and
+               (getregtype(hp.oper[opnr]^.reg)=R_MMREGISTER) then
+              begin
+                case getsubreg(hp.oper[opnr]^.reg) of
+                  R_SUBMMS:
+                    getopstr:='v'+tostr(getsupreg(hp.oper[opnr]^.reg))+'.S[0]';
+                  R_SUBMMD:
+                    getopstr:='v'+tostr(getsupreg(hp.oper[opnr]^.reg))+'.D[0]';
+                  else
+                    internalerror(2014122907);
+                end;
+              end
+            else
+              getopstr:=gas_regname(o.reg);
+          top_shifterop:
+            begin
+              getopstr:=gas_shiftmode2str[o.shifterop^.shiftmode];
+              if o.shifterop^.shiftimm<>0 then
+                getopstr:=getopstr+' #'+tostr(o.shifterop^.shiftimm)
+            end;
+          top_const:
+            if o.val>=0 then
+              getopstr:='#'+tostr(o.val)
+            else
+              getopstr:='#0x'+hexStr(o.val,16);
+          top_conditioncode:
+            getopstr:=cond2str[o.cc];
+          top_ref:
+            if is_calljmp(hp.opcode) then
+              begin
+                if o.ref^.refaddr<>addr_full then
+                  internalerror(2014122220);
+                if not assigned(o.ref^.symbol) or
+                   assigned(o.ref^.relsymbol) or
+                   (o.ref^.base<>NR_NO) or
+                   (o.ref^.index<>NR_NO) or
+                   (o.ref^.offset<>0) then
+                  internalerror(2014122221);
+                getopstr:=o.ref^.symbol.name;
+              end
+            else
+              getopstr:=getreferencestring(o.ref^);
+          else
+            internalerror(2014121507);
+        end;
+      end;
+
+
+    procedure TAArch64InstrWriter.WriteInstruction(hp : tai);
+      var
+        op: TAsmOp;
+        s: string;
+        i: byte;
+        sep: string[3];
+      begin
+        op:=taicpu(hp).opcode;
+        s:=#9+gas_op2str[op]+oppostfix2str[taicpu(hp).oppostfix];
+        if taicpu(hp).condition<>C_NONE then
+          s:=s+'.'+cond2str[taicpu(hp).condition];
+        if taicpu(hp).ops<>0 then
+          begin
+            sep:=#9;
+            for i:=0 to taicpu(hp).ops-1 do
+              begin
+                 // debug code
+                 // writeln(s);
+                 // writeln(taicpu(hp).fileinfo.line);
+                 s:=s+sep+getopstr(taicpu(hp),i,taicpu(hp).oper[i]^);
+                 sep:=',';
+              end;
+          end;
+        owner.AsmWriteLn(s);
+      end;
+
+
+    const
+       as_aarch64_gas_darwin_info : tasminfo =
+          (
+            id     : as_darwin;
+            idtxt  : 'AS-Darwin';
+            asmbin : 'as';
+            asmcmd : '-o $OBJ $EXTRAOPT $ASM -arch arm64';
+            supported_targets : [system_aarch64_darwin];
+            flags : [af_needar,af_smartlink_sections,af_supports_dwarf,af_stabs_use_function_absolute_addresses];
+            labelprefix : 'L';
+            comment : '# ';
+            dollarsign: '$';
+          );
+
+
+begin
+  RegisterAssembler(as_aarch64_gas_darwin_info,TAArch64AppleAssembler);
+end.

+ 25 - 7
compiler/aarch64/aoptcpub.pas

@@ -118,17 +118,35 @@ Implementation
     End;
     End;
 
 
 
 
-  function TAoptBaseCpu.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;
+  function TAoptBaseCpu.RegModifiedByInstruction(reg: tregister; p1: tai): boolean;
     var
     var
-      i : Longint;
+      i: longint;
+      preg: tregister;
     begin
     begin
       result:=false;
       result:=false;
       for i:=0 to taicpu(p1).ops-1 do
       for i:=0 to taicpu(p1).ops-1 do
-        if (taicpu(p1).oper[i]^.typ=top_reg) and (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
-          begin
-            result:=true;
-            exit;
-          end;
+        case taicpu(p1).oper[i]^.typ of
+          top_reg:
+            begin
+              preg:=taicpu(p1).oper[i]^.reg;
+              if (getregtype(preg)=getregtype(reg)) and
+                 (getsupreg(preg)=getsupreg(reg)) and
+                 (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
+                begin
+                  result:=true;
+                  exit;
+                end;
+            end;
+          top_ref:
+            begin
+              if (taicpu(p1).oper[i]^.ref^.addressmode<>am_offset) and
+                 (reg=taicpu(p1).oper[i]^.ref^.base) then
+                begin
+                  result:=true;
+                  exit
+                end;
+            end;
+        end;
     end;
     end;
 
 
 End.
 End.

+ 2284 - 0
compiler/aarch64/cgcpu.pas

@@ -0,0 +1,2284 @@
+{
+    Copyright (c) 2014 by Jonas Maebe
+
+    This unit implements the code generator for AArch64
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit cgcpu;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+       globtype,parabase,
+       cgbase,cgutils,cgobj,
+       aasmbase,aasmtai,aasmdata,aasmcpu,
+       cpubase,cpuinfo,
+       node,symconst,SymType,symdef,
+       rgcpu;
+
+    type
+      tcgaarch64=class(tcg)
+       protected
+        { simplifies "ref" so it can be used with "op". If "ref" can be used
+          with a different load/Store operation that has the same meaning as the
+          original one, "op" will be replaced with the alternative }
+        procedure make_simple_ref(list:TAsmList; var op: tasmop; size: tcgsize; oppostfix: toppostfix; var ref: treference; preferred_newbasereg: tregister);
+        { changes register size without adding register allocation info }
+        function makeregsize(reg: tregister; size: tcgsize): tregister; overload;
+       public
+        function getfpuregister(list: TAsmList; size: Tcgsize): Tregister; override;
+        procedure handle_reg_imm12_reg(list: TAsmList; op: Tasmop; size: tcgsize; src: tregister; a: tcgint; dst: tregister; tmpreg: tregister; setflags, usedest: boolean);
+        procedure init_register_allocators;override;
+        procedure done_register_allocators;override;
+        function  getmmregister(list:TAsmList;size:tcgsize):tregister;override;
+        function handle_load_store(list:TAsmList; op: tasmop; size: tcgsize; oppostfix: toppostfix; reg: tregister; ref: treference):treference;
+        procedure a_call_name(list:TAsmList;const s:string; weak: boolean);override;
+        procedure a_call_reg(list:TAsmList;Reg:tregister);override;
+        { General purpose instructions }
+        procedure maybeadjustresult(list: TAsmList; op: topcg; size: tcgsize; dst: tregister);
+        procedure a_op_const_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; reg: tregister);override;
+        procedure a_op_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src, dst: tregister);override;
+        procedure a_op_const_reg_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister);override;
+        procedure a_op_reg_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister);override;
+        procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister; setflags : boolean; var ovloc : tlocation);override;
+        procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister; setflags : boolean; var ovloc : tlocation);override;
+        { move instructions }
+        procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg: tregister);override;
+        procedure a_load_const_ref(list: TAsmList; size: tcgsize; a: tcgint; const ref: treference); override;
+        procedure a_load_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister;const ref: TReference);override;
+        procedure a_load_reg_ref_unaligned(list: TAsmList; fromsize, tosize: tcgsize; register: tregister; const ref: treference); override;
+        procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: TReference; reg: tregister);override;
+        procedure a_load_ref_reg_unaligned(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; register: tregister); override;
+        procedure a_load_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister);override;
+        procedure a_loadaddr_ref_reg(list: TAsmList; const ref: TReference; r: tregister);override;
+        { fpu move instructions (not used, all floating point is vector unit-based) }
+        procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
+        procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
+        procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
+        procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister;shuffle : pmmshuffle);override;
+        procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: TReference; reg: tregister; shuffle: pmmshuffle);override;
+        procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: TReference; shuffle: pmmshuffle);override;
+
+        procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle); override;
+        procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle); override;
+
+        procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle); override;
+
+        procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); override;
+        { comparison operations }
+        procedure a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel);override;
+        procedure a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel);override;
+        procedure a_jmp_always(list: TAsmList; l: TAsmLabel);override;
+        procedure a_jmp_name(list: TAsmList; const s: string);override;
+        procedure a_jmp_cond(list: TAsmList; cond: TOpCmp; l: tasmlabel);{ override;}
+        procedure a_jmp_flags(list: TAsmList; const f: tresflags; l: tasmlabel);override;
+        procedure g_flags2reg(list: TAsmList; size: tcgsize; const f:tresflags; reg: tregister);override;
+        procedure g_overflowcheck(list: TAsmList; const loc: tlocation; def: tdef);override;
+        procedure g_overflowcheck_loc(list: TAsmList; const loc: tlocation; def: tdef; ovloc: tlocation);override;
+        procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);override;
+        procedure g_proc_exit(list: TAsmList; parasize: longint; nostackframe: boolean);override;
+        procedure g_maybe_got_init(list: TAsmList); override;
+        procedure g_restore_registers(list: TAsmList);override;
+        procedure g_save_registers(list: TAsmList);override;
+        procedure g_concatcopy_move(list: TAsmList; const source, dest: treference; len: tcgint);
+        procedure g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);override;
+        procedure g_adjust_self_value(list: TAsmList; procdef: tprocdef; ioffset: tcgint);override;
+        procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);override;
+       private
+        function save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
+        procedure load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
+      end;
+
+    procedure create_codegen;
+
+    const
+      TOpCG2AsmOpReg: array[topcg] of TAsmOp = (
+        A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NEG,A_MVN,A_ORR,A_ASRV,A_LSLV,A_LSRV,A_SUB,A_EOR,A_NONE,A_RORV
+      );
+      TOpCG2AsmOpImm: array[topcg] of TAsmOp = (
+        A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NEG,A_MVN,A_ORR,A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR
+      );
+      TOpCmp2AsmCond: array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
+        C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI
+      );
+
+
+implementation
+
+  uses
+    globals,verbose,systems,cutils,
+    paramgr,fmodule,
+    symtable,symsym,
+    tgobj,
+    procinfo,cpupi;
+
+
+    procedure tcgaarch64.make_simple_ref(list:TAsmList; var op: tasmop; size: tcgsize; oppostfix: toppostfix; var ref: treference; preferred_newbasereg: tregister);
+      var
+        href: treference;
+        so: tshifterop;
+        accesssize: longint;
+      begin
+        if (ref.base=NR_NO) then
+          begin
+            if ref.shiftmode<>SM_None then
+              internalerror(2014110701);
+            ref.base:=ref.index;
+            ref.index:=NR_NO;
+          end;
+        { no abitrary scale factor support (the generic code doesn't set it,
+          AArch-specific code shouldn't either) }
+        if not(ref.scalefactor in [0,1]) then
+          internalerror(2014111002);
+
+        case simple_ref_type(op,size,oppostfix,ref) of
+          sr_simple:
+            exit;
+          sr_internal_illegal:
+            internalerror(2014121702);
+          sr_complex:
+            { continue } ;
+        end;
+
+        if assigned(ref.symbol) then
+          begin
+            { internal "load symbol" instructions should already be valid }
+            if assigned(ref.symboldata) or
+               (ref.refaddr in [addr_pic,addr_gotpage,addr_gotpageoffset,addr_page,addr_pageoffset]) then
+              internalerror(2014110802);
+            { no relative symbol support (needed) yet }
+            if assigned(ref.relsymbol) then
+              internalerror(2014111001);
+            { on Darwin: load the address from the GOT. There does not appear to
+              be a non-GOT variant. This consists of first loading the address
+              of the page containing the GOT entry for this variable, and then
+              the address of the entry itself from that page (can be relaxed by
+              the linker in case the variable itself can be stored directly in
+              the GOT) }
+            if target_info.system in systems_darwin then
+              begin
+                if (preferred_newbasereg=NR_NO) or
+                   (ref.base=preferred_newbasereg) or
+                   (ref.index=preferred_newbasereg) then
+                  preferred_newbasereg:=getaddressregister(list);
+                { load the (GOT) page }
+                reference_reset_symbol(href,ref.symbol,0,8);
+                if ((ref.symbol.typ in [AT_FUNCTION,AT_LABEL]) and
+                    (ref.symbol.bind in [AB_LOCAL,AB_GLOBAL])) or
+                   ((ref.symbol.typ=AT_DATA) and
+                    (ref.symbol.bind=AB_LOCAL)) then
+                  href.refaddr:=addr_page
+                else
+                  href.refaddr:=addr_gotpage;
+                list.concat(taicpu.op_reg_ref(A_ADRP,preferred_newbasereg,href));
+                { load the GOT entry (= address of the variable) }
+                reference_reset_base(href,preferred_newbasereg,0,sizeof(pint));
+                href.symbol:=ref.symbol;
+                { code symbols defined in the current compilation unit do not
+                  have to be accessed via the GOT }
+                if ((ref.symbol.typ in [AT_FUNCTION,AT_LABEL]) and
+                    (ref.symbol.bind in [AB_LOCAL,AB_GLOBAL])) or
+                   ((ref.symbol.typ=AT_DATA) and
+                    (ref.symbol.bind=AB_LOCAL)) then
+                  begin
+                    href.base:=NR_NO;
+                    href.refaddr:=addr_pageoffset;
+                    list.concat(taicpu.op_reg_reg_ref(A_ADD,preferred_newbasereg,preferred_newbasereg,href));
+                  end
+                else
+                  begin
+                    href.refaddr:=addr_gotpageoffset;
+                    { use a_load_ref_reg() rather than directly encoding the LDR,
+                      so that we'll check the validity of the reference }
+                    a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,preferred_newbasereg);
+                  end;
+                { set as new base register }
+                if ref.base=NR_NO then
+                  ref.base:=preferred_newbasereg
+                else if ref.index=NR_NO then
+                  ref.index:=preferred_newbasereg
+                else
+                  begin
+                    { make sure it's valid in case ref.base is SP -> make it
+                      the second operand}
+                    a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,preferred_newbasereg,ref.base,preferred_newbasereg);
+                    ref.base:=preferred_newbasereg
+                  end;
+                ref.symbol:=nil;
+              end
+            else
+              { todo }
+              internalerror(2014111003);
+          end;
+
+        { base & index }
+        if (ref.base<>NR_NO) and
+           (ref.index<>NR_NO) then
+          begin
+            case op of
+              A_LDR, A_STR:
+                begin
+                  if (ref.shiftmode=SM_None) and
+                     (ref.shiftimm<>0) then
+                    internalerror(2014110805);
+                  { wrong shift? (possible in case of something like
+                     array_of_2byte_rec[x].bytefield -> shift will be set 1, but
+                     the final load is a 1 byte -> can't use shift after all }
+                  if (ref.shiftmode in [SM_LSL,SM_UXTW,SM_SXTW]) and
+                     ((ref.shiftimm<>BsfDWord(tcgsizep2size[size])) or
+                      (ref.offset<>0)) then
+                    begin
+                      if preferred_newbasereg=NR_NO then
+                        preferred_newbasereg:=getaddressregister(list);
+                      { "add" supports a superset of the shift modes supported by
+                        load/store instructions }
+                      shifterop_reset(so);
+                      so.shiftmode:=ref.shiftmode;
+                      so.shiftimm:=ref.shiftimm;
+                      list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,preferred_newbasereg,ref.base,ref.index,so));
+                      reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.alignment);
+                      { possibly still an invalid offset -> fall through }
+                    end
+                  else if ref.offset<>0 then
+                    begin
+                      if (preferred_newbasereg=NR_NO) or
+                         { we keep ref.index, so it must not be overwritten }
+                         (ref.index=preferred_newbasereg) then
+                        preferred_newbasereg:=getaddressregister(list);
+                      { add to the base and not to the index, because the index
+                        may be scaled; this works even if the base is SP }
+                      a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
+                      ref.offset:=0;
+                      ref.base:=preferred_newbasereg;
+                      { finished }
+                      exit;
+                    end
+                  else
+                    { valid -> exit }
+                    exit;
+                end;
+              { todo }
+              A_LD1,A_LD2,A_LD3,A_LD4,
+              A_ST1,A_ST2,A_ST3,A_ST4:
+                internalerror(2014110704);
+              { these don't support base+index }
+              A_LDUR,A_STUR,
+              A_LDP,A_STP:
+                begin
+                  { these either don't support pre-/post-indexing, or don't
+                    support it with base+index }
+                  if ref.addressmode<>AM_OFFSET then
+                    internalerror(2014110911);
+                  if preferred_newbasereg=NR_NO then
+                    preferred_newbasereg:=getaddressregister(list);
+                  if ref.shiftmode<>SM_None then
+                    begin
+                      { "add" supports a superset of the shift modes supported by
+                        load/store instructions }
+                      shifterop_reset(so);
+                      so.shiftmode:=ref.shiftmode;
+                      so.shiftimm:=ref.shiftimm;
+                      list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,preferred_newbasereg,ref.base,ref.index,so));
+                    end
+                  else
+                    a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,ref.index,ref.base,preferred_newbasereg);
+                  reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.alignment);
+                  { fall through to the handling of base + offset, since the
+                    offset may still be too big }
+                end;
+              else
+                internalerror(2014110901);
+            end;
+          end;
+
+        { base + offset }
+        if ref.base<>NR_NO then
+          begin
+            { valid offset for LDUR/STUR -> use that }
+            if (ref.addressmode=AM_OFFSET) and
+               (op in [A_LDR,A_STR]) and
+               (ref.offset>=-256) and
+               (ref.offset<=255) then
+              begin
+                if op=A_LDR then
+                  op:=A_LDUR
+                else
+                  op:=A_STUR
+              end
+            { if it's not a valid LDUR/STUR, use LDR/STR }
+            else if (op in [A_LDUR,A_STUR]) and
+               ((ref.offset<-256) or
+                (ref.offset>255) or
+                (ref.addressmode<>AM_OFFSET)) then
+              begin
+                if op=A_LDUR then
+                  op:=A_LDR
+                else
+                  op:=A_STR
+              end;
+            case op of
+              A_LDR,A_STR:
+                begin
+                  case ref.addressmode of
+                    AM_PREINDEXED:
+                      begin
+                        { since the loaded/stored register cannot be the same
+                          as the base register, we can safely add the
+                          offset to the base if it doesn't fit}
+                        if (ref.offset<-256) or
+                            (ref.offset>255) then
+                          begin
+                            a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base);
+                            ref.offset:=0;
+                          end;
+                      end;
+                    AM_POSTINDEXED:
+                      begin
+                        { cannot emulate post-indexing if we have to fold the
+                          offset into the base register }
+                        if (ref.offset<-256) or
+                            (ref.offset>255) then
+                          internalerror(2014110909);
+                        { ok }
+                      end;
+                    AM_OFFSET:
+                      begin
+                        { unsupported offset -> fold into base register }
+                        accesssize:=1 shl tcgsizep2size[size];
+                        if (ref.offset<0) or
+                           (ref.offset>(((1 shl 12)-1)*accesssize)) or
+                           ((ref.offset mod accesssize)<>0) then
+                          begin
+                            if preferred_newbasereg=NR_NO then
+                              preferred_newbasereg:=getaddressregister(list);
+                            { can we split the offset beween an
+                              "add/sub (imm12 shl 12)" and the load (also an
+                              imm12)?
+                              -- the offset from the load will always be added,
+                                that's why the lower bound has a smaller range
+                                than the upper bound; it must also be a multiple
+                                of the access size }
+                            if (ref.offset>=-(((1 shl 12)-1) shl 12)) and
+                               (ref.offset<=((1 shl 12)-1) shl 12 + ((1 shl 12)-1)) and
+                               ((ref.offset mod accesssize)=0) then
+                              begin
+                                a_op_const_reg_reg(list,OP_ADD,OS_ADDR,(ref.offset shr 12) shl 12,ref.base,preferred_newbasereg);
+                                ref.offset:=ref.offset-(ref.offset shr 12) shl 12;
+                              end
+                            else
+                              begin
+                                a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
+                                ref.offset:=0;
+                              end;
+                            reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.alignment);
+                          end;
+                      end
+                    else
+                      internalerror(2014110904);
+                  end;
+                end;
+              A_LDP,A_STP:
+                begin
+                  { unsupported offset -> fold into base register (these
+                    instructions support all addressmodes) }
+                  if (ref.offset<-(1 shl (6+tcgsizep2size[size]))) or
+                     (ref.offset>(1 shl (6+tcgsizep2size[size]))-1) then
+                    begin
+                      case ref.addressmode of
+                        AM_POSTINDEXED:
+                          { don't emulate post-indexing if we have to fold the
+                            offset into the base register }
+                          internalerror(2014110910);
+                        AM_PREINDEXED:
+                          { this means the offset must be added to the current
+                            base register }
+                          preferred_newbasereg:=ref.base;
+                        AM_OFFSET:
+                          if preferred_newbasereg=NR_NO then
+                            preferred_newbasereg:=getaddressregister(list);
+                      end;
+                      a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
+                      reference_reset_base(ref,preferred_newbasereg,0,ref.alignment);
+                    end
+                end;
+              A_LDUR,A_STUR:
+                begin
+                  { valid, checked above }
+                end;
+              { todo }
+              A_LD1,A_LD2,A_LD3,A_LD4,
+              A_ST1,A_ST2,A_ST3,A_ST4:
+                internalerror(2014110908);
+              else
+                internalerror(2014110708);
+            end;
+            { done }
+            exit;
+          end;
+
+        { only an offset -> change to base (+ offset 0) }
+        if preferred_newbasereg=NR_NO then
+          preferred_newbasereg:=getaddressregister(list);
+        a_load_const_reg(list,OS_ADDR,ref.offset,preferred_newbasereg);
+        reference_reset_base(ref,preferred_newbasereg,0,newalignment(8,ref.offset));
+      end;
+
+
+    function tcgaarch64.makeregsize(reg: tregister; size: tcgsize): tregister;
+      var
+        subreg:Tsubregister;
+      begin
+        subreg:=cgsize2subreg(getregtype(reg),size);
+        result:=reg;
+        setsubreg(result,subreg);
+      end;
+
+
+    function tcgaarch64.getfpuregister(list: TAsmList; size: Tcgsize): Tregister;
+      begin
+        internalerror(2014122110);
+        { squash warning }
+        result:=NR_NO;
+      end;
+
+
+    function tcgaarch64.handle_load_store(list: TAsmList; op: tasmop; size: tcgsize; oppostfix: toppostfix; reg: tregister; ref: treference):treference;
+      begin
+        make_simple_ref(list,op,size,oppostfix,ref,NR_NO);
+        list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
+        result:=ref;
+      end;
+
+
+    procedure tcgaarch64.handle_reg_imm12_reg(list: TAsmList; op: Tasmop; size: tcgsize; src: tregister; a: tcgint; dst: tregister; tmpreg: tregister; setflags, usedest: boolean);
+      var
+        instr: taicpu;
+        so: tshifterop;
+        hadtmpreg: boolean;
+      begin
+        { imm12 }
+        if (a>=0) and
+           (a<=((1 shl 12)-1)) then
+          if usedest then
+            instr:=taicpu.op_reg_reg_const(op,dst,src,a)
+          else
+            instr:=taicpu.op_reg_const(op,src,a)
+        { imm12 lsl 12 }
+        else if (a and not(((tcgint(1) shl 12)-1) shl 12))=0 then
+          begin
+            so.shiftmode:=SM_LSL;
+            so.shiftimm:=12;
+            if usedest then
+              instr:=taicpu.op_reg_reg_const_shifterop(op,dst,src,a shr 12,so)
+            else
+              instr:=taicpu.op_reg_const_shifterop(op,src,a shr 12,so)
+          end
+        else
+          begin
+            { todo: other possible optimizations (e.g. load 16 bit constant in
+                register and then add/sub/cmp/cmn shifted the rest) }
+            if tmpreg=NR_NO then
+              begin
+                hadtmpreg:=false;
+                tmpreg:=getintregister(list,size);
+              end
+            else
+              begin
+                hadtmpreg:=true;
+                getcpuregister(list,tmpreg);
+              end;
+            a_load_const_reg(list,size,a,tmpreg);
+            if usedest then
+              instr:=taicpu.op_reg_reg_reg(op,dst,src,tmpreg)
+            else
+              instr:=taicpu.op_reg_reg(op,src,tmpreg);
+            if hadtmpreg then
+              ungetcpuregister(list,tmpreg);
+          end;
+        if setflags then
+          setoppostfix(instr,PF_S);
+        list.concat(instr);
+      end;
+
+
+{****************************************************************************
+                              Assembler code
+****************************************************************************}
+
+    procedure tcgaarch64.init_register_allocators;
+      begin
+        inherited init_register_allocators;
+
+        rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
+            [RS_X0,RS_X1,RS_X2,RS_X3,RS_X4,RS_X5,RS_X6,RS_X7,RS_X8,
+             RS_X9,RS_X10,RS_X11,RS_X12,RS_X13,RS_X14,RS_X15,RS_X16,RS_X17,
+             RS_X19,RS_X20,RS_X21,RS_X22,RS_X23,RS_X24,RS_X25,RS_X26,RS_X27,RS_X28
+             { maybe we can enable this in the future for leaf functions (it's
+               the frame pointer)
+             ,RS_X29 }],
+            first_int_imreg,[]);
+
+        rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBMMD,
+            [RS_Q0,RS_Q1,RS_Q2,RS_Q3,RS_Q4,RS_Q5,RS_Q6,RS_Q7,
+             RS_Q8,RS_Q9,RS_Q10,RS_Q11,RS_Q12,RS_Q13,RS_Q14,RS_Q15,
+             RS_Q16,RS_Q17,RS_Q18,RS_Q19,RS_Q20,RS_Q21,RS_Q22,RS_Q23,
+             RS_Q24,RS_Q25,RS_Q26,RS_Q27,RS_Q28,RS_Q29,RS_Q30,RS_Q31],
+            first_mm_imreg,[]);
+      end;
+
+
+    procedure tcgaarch64.done_register_allocators;
+      begin
+        rg[R_INTREGISTER].free;
+        rg[R_FPUREGISTER].free;
+        rg[R_MMREGISTER].free;
+        inherited done_register_allocators;
+      end;
+
+
+    function tcgaarch64.getmmregister(list: TAsmList; size: tcgsize):tregister;
+      begin
+        case size of
+          OS_F32:
+            result:=rg[R_MMREGISTER].getregister(list,R_SUBMMS);
+          OS_F64:
+            result:=rg[R_MMREGISTER].getregister(list,R_SUBMMD)
+          else
+            internalerror(2014102701);
+        end;
+      end;
+
+
+    procedure tcgaarch64.a_call_name(list: TAsmList; const s: string; weak: boolean);
+      begin
+        if not weak then
+          list.concat(taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(s)))
+        else
+          list.concat(taicpu.op_sym(A_BL,current_asmdata.WeakRefAsmSymbol(s)));
+      end;
+
+
+    procedure tcgaarch64.a_call_reg(list:TAsmList;Reg:tregister);
+      begin
+        list.concat(taicpu.op_reg(A_BLR,reg));
+      end;
+
+
+    {********************** load instructions ********************}
+
+    procedure tcgaarch64.a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg : tregister);
+      var
+        preva: tcgint;
+        opc: tasmop;
+        shift,maxshift: byte;
+        so: tshifterop;
+        reginited: boolean;
+        mask: tcgint;
+      begin
+        { if we load a value into a 32 bit register, it is automatically
+          zero-extended to 64 bit }
+        if (high(a)=0) and
+           (size in [OS_64,OS_S64]) then
+          begin
+            size:=OS_32;
+            reg:=makeregsize(reg,size);
+          end;
+        { values <= 32 bit are stored in a 32 bit register }
+        if not(size in [OS_64,OS_S64]) then
+          a:=cardinal(a);
+
+        if size in [OS_64,OS_S64] then
+          begin
+            mask:=-1;
+            maxshift:=64;
+          end
+        else
+          begin
+            mask:=$ffffffff;
+            maxshift:=32;
+          end;
+        { single movn enough? (to be extended) }
+        shift:=16;
+        preva:=a;
+        repeat
+          if (a shr shift)=(mask shr shift) then
+            begin
+              if shift=16 then
+                list.concat(taicpu.op_reg_const(A_MOVN,reg,not(word(preva))))
+              else
+                begin
+                  shifterop_reset(so);
+                  so.shiftmode:=SM_LSL;
+                  so.shiftimm:=shift-16;
+                  list.concat(taicpu.op_reg_const_shifterop(A_MOVN,reg,not(word(preva)),so));
+                end;
+              exit;
+            end;
+          { only try the next 16 bits if the current one is all 1 bits, since
+            the movn will set all lower bits to 1 }
+          if word(a shr (shift-16))<>$ffff then
+            break;
+          inc(shift,16);
+        until shift=maxshift;
+        reginited:=false;
+        shift:=0;
+        { can be optimized later to use more movn }
+        repeat
+          { leftover is shifterconst? (don't check if we can represent it just
+            as effectively with movz/movk, as this check is expensive) }
+          if ((shift<tcgsize2size[size]*(8 div 2)) and
+              (word(a)<>0) and
+              ((a shr 16)<>0)) and
+             is_shifter_const(a shl shift,size) then
+            begin
+              if reginited then
+                list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,a shl shift))
+              else
+                list.concat(taicpu.op_reg_reg_const(A_ORR,reg,makeregsize(NR_XZR,size),a shl shift));
+              exit;
+            end;
+          { set all 16 bit parts <> 0 }
+          if (word(a)<>0) or
+             ((shift=0) and
+              (a=0)) then
+            if shift=0 then
+              begin
+                list.concat(taicpu.op_reg_const(A_MOVZ,reg,word(a)));
+                reginited:=true;
+              end
+            else
+              begin
+                shifterop_reset(so);
+                so.shiftmode:=SM_LSL;
+                so.shiftimm:=shift;
+                if not reginited then
+                  begin
+                    opc:=A_MOVZ;
+                    reginited:=true;
+                  end
+                else
+                  opc:=A_MOVK;
+                list.concat(taicpu.op_reg_const_shifterop(opc,reg,word(a),so));
+              end;
+            preva:=a;
+            a:=a shr 16;
+           inc(shift,16);
+        until word(preva)=preva;
+        if not reginited then
+          internalerror(2014102702);
+      end;
+
+
+    procedure tcgaarch64.a_load_const_ref(list: TAsmList; size: tcgsize; a: tcgint; const ref: treference);
+      var
+        reg: tregister;
+      begin
+        { use the zero register if possible }
+        if a=0 then
+          begin
+            if size in [OS_64,OS_S64] then
+              reg:=NR_XZR
+            else
+              reg:=NR_WZR;
+            a_load_reg_ref(list,size,size,reg,ref);
+          end
+        else
+          inherited;
+      end;
+
+
+    procedure tcgaarch64.a_load_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
+      var
+        oppostfix:toppostfix;
+        hreg: tregister;
+      begin
+        if tcgsize2Size[fromsize]>=tcgsize2Size[tosize] then
+          fromsize:=tosize
+        { have a 32 bit register but need a 64 bit one? }
+        else if tosize in [OS_64,OS_S64] then
+          begin
+            { sign extend if necessary }
+            if fromsize in [OS_S8,OS_S16,OS_S32] then
+              begin
+                { can't overwrite reg, may be a constant reg }
+                hreg:=getintregister(list,tosize);
+                a_load_reg_reg(list,fromsize,tosize,reg,hreg);
+                reg:=hreg;
+              end
+            else
+              { top 32 bit are zero by default }
+              reg:=makeregsize(reg,OS_64);
+            fromsize:=tosize;
+          end;
+        if (ref.alignment<>0) and
+           (ref.alignment<tcgsize2size[tosize]) then
+          begin
+            a_load_reg_ref_unaligned(list,fromsize,tosize,reg,ref);
+          end
+        else
+          begin
+            case tosize of
+              { signed integer registers }
+              OS_8,
+              OS_S8:
+                oppostfix:=PF_B;
+              OS_16,
+              OS_S16:
+                oppostfix:=PF_H;
+              OS_32,
+              OS_S32,
+              OS_64,
+              OS_S64:
+                oppostfix:=PF_None;
+              else
+                InternalError(200308299);
+            end;
+            handle_load_store(list,A_STR,tosize,oppostfix,reg,ref);
+          end;
+      end;
+
+
+    procedure tcgaarch64.a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister);
+      var
+        oppostfix:toppostfix;
+      begin
+        if tcgsize2Size[fromsize]>=tcgsize2Size[tosize] then
+          fromsize:=tosize;
+        { ensure that all bits of the 32/64 register are always correctly set:
+           * default behaviour is always to zero-extend to the entire (64 bit)
+             register -> unsigned 8/16/32 bit loads only exist with a 32 bit
+             target register, as the upper 32 bit will be zeroed implicitly
+             -> always make target register 32 bit
+           * signed loads exist both with 32 and 64 bit target registers,
+             depending on whether the value should be sign extended to 32 or
+             to 64 bit (if sign extended to 32 bit, the upper 32 bits of the
+             corresponding 64 bit register are again zeroed) -> no need to
+             change anything (we only have 32 and 64 bit registers), except that
+             when loading an OS_S32 to a 32 bit register, we don't need/can't
+             use sign extension
+        }
+        if fromsize in [OS_8,OS_16,OS_32] then
+          reg:=makeregsize(reg,OS_32);
+        if (ref.alignment<>0) and
+           (ref.alignment<tcgsize2size[fromsize]) then
+          begin
+            a_load_ref_reg_unaligned(list,fromsize,tosize,ref,reg);
+            exit;
+          end;
+        case fromsize of
+          { signed integer registers }
+          OS_8:
+            oppostfix:=PF_B;
+          OS_S8:
+            oppostfix:=PF_SB;
+          OS_16:
+            oppostfix:=PF_H;
+          OS_S16:
+            oppostfix:=PF_SH;
+          OS_S32:
+            if getsubreg(reg)=R_SUBD then
+              oppostfix:=PF_NONE
+            else
+              oppostfix:=PF_SW;
+          OS_32,
+          OS_64,
+          OS_S64:
+            oppostfix:=PF_None;
+          else
+            InternalError(200308297);
+        end;
+        handle_load_store(list,A_LDR,fromsize,oppostfix,reg,ref);
+
+        { clear upper 16 bits if the value was negative }
+        if (fromsize=OS_S8) and (tosize=OS_16) then
+          a_load_reg_reg(list,fromsize,tosize,reg,reg);
+      end;
+
+
+    procedure tcgaarch64.a_load_ref_reg_unaligned(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; register: tregister);
+      var
+        href: treference;
+        hreg1, hreg2, tmpreg: tregister;
+      begin
+        if fromsize in [OS_64,OS_S64] then
+          begin
+            { split into two 32 bit loads }
+            hreg1:=makeregsize(register,OS_32);
+            hreg2:=getintregister(list,OS_32);
+            if target_info.endian=endian_big then
+              begin
+                tmpreg:=hreg1;
+                hreg1:=hreg2;
+                hreg2:=tmpreg;
+              end;
+            { can we use LDP? }
+            if (ref.alignment=4) and
+               (simple_ref_type(A_LDP,OS_32,PF_None,ref)=sr_simple) then
+              list.concat(taicpu.op_reg_reg_ref(A_LDP,hreg1,hreg2,ref))
+            else
+              begin
+                a_load_ref_reg(list,OS_32,OS_32,ref,hreg1);
+                href:=ref;
+                inc(href.offset,4);
+                a_load_ref_reg(list,OS_32,OS_32,href,hreg2);
+              end;
+            list.concat(taicpu.op_reg_reg_const_const(A_BFI,register,makeregsize(hreg2,OS_64),32,32));
+          end
+       else
+         inherited;
+      end;
+
+
+    procedure tcgaarch64.a_load_reg_reg(list:TAsmList;fromsize,tosize:tcgsize;reg1,reg2:tregister);
+      var
+        instr: taicpu;
+      begin
+        { we use both 32 and 64 bit registers -> insert conversion when when
+          we have to truncate/sign extend inside the (32 or 64 bit) register
+          holding the value, and when we sign extend from a 32 to a 64 bit
+          register }
+        if (tcgsize2size[fromsize]>tcgsize2size[tosize]) or
+           ((tcgsize2size[fromsize]=tcgsize2size[tosize]) and
+            (fromsize<>tosize) and
+            not(fromsize in [OS_32,OS_S32,OS_64,OS_S64])) or
+           ((fromsize in [OS_S8,OS_S16,OS_S32]) and
+            (tosize in [OS_64,OS_S64])) or
+           { needs to mask out the sign in the top 16 bits }
+           ((fromsize=OS_S8) and
+            (tosize=OS_16)) then
+          begin
+            case tosize of
+              OS_8:
+                list.concat(setoppostfix(taicpu.op_reg_reg(A_UXT,reg2,makeregsize(reg1,OS_32)),PF_B));
+              OS_16:
+                list.concat(setoppostfix(taicpu.op_reg_reg(A_UXT,reg2,makeregsize(reg1,OS_32)),PF_H));
+              OS_S8:
+                list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_B));
+              OS_S16:
+                list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_H));
+              { while "mov wN, wM" automatically inserts a zero-extension and
+                hence we could encode a 64->32 bit move like that, the problem
+                is that we then can't distinguish 64->32 from 32->32 moves, and
+                the 64->32 truncation could be removed altogether... So use a
+                different instruction }
+              OS_32,
+              OS_S32:
+                { in theory, reg1 should be 64 bit here (since fromsize>tosize),
+                  but because of the way location_force_register() tries to
+                  avoid superfluous zero/sign extensions, it's not always the
+                  case -> also force reg1 to to 64 bit }
+                list.concat(taicpu.op_reg_reg_const_const(A_UBFIZ,makeregsize(reg2,OS_64),makeregsize(reg1,OS_64),0,32));
+              OS_64,
+              OS_S64:
+                list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_W));
+              else
+                internalerror(2002090901);
+            end;
+          end
+        else
+          begin
+            { 32 -> 32 bit move implies zero extension (sign extensions have
+              been handled above) -> also use for 32 <-> 64 bit moves }
+            if not(fromsize in [OS_64,OS_S64]) or
+               not(tosize in [OS_64,OS_S64]) then
+              instr:=taicpu.op_reg_reg(A_MOV,makeregsize(reg2,OS_32),makeregsize(reg1,OS_32))
+            else
+              instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
+            list.Concat(instr);
+            { Notify the register allocator that we have written a move instruction so
+             it can try to eliminate it. }
+            add_move_instruction(instr);
+          end;
+      end;
+
+
+    procedure tcgaarch64.a_loadaddr_ref_reg(list: TAsmList; const ref: treference; r: tregister);
+      var
+         href: treference;
+         so: tshifterop;
+         op: tasmop;
+      begin
+        op:=A_LDR;
+        href:=ref;
+        { simplify as if we're going to perform a regular 64 bit load, using
+          "r" as the new base register if possible/necessary }
+        make_simple_ref(list,op,OS_ADDR,PF_None,href,r);
+        { load literal? }
+        if assigned(href.symbol) then
+          begin
+            if (href.base<>NR_NO) or
+               (href.index<>NR_NO) or
+               not assigned(href.symboldata) then
+              internalerror(2014110912);
+            list.concat(taicpu.op_reg_sym_ofs(A_ADR,r,href.symbol,href.offset));
+          end
+        else
+          begin
+            if href.index<>NR_NO then
+              begin
+                if href.shiftmode<>SM_None then
+                  begin
+                    { "add" supports a supperset of the shift modes supported by
+                      load/store instructions }
+                    shifterop_reset(so);
+                    so.shiftmode:=href.shiftmode;
+                    so.shiftimm:=href.shiftimm;
+                    list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,r,href.base,href.index,so));
+                  end
+                else
+                  a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,href.index,href.base,r);
+              end
+            else if href.offset<>0 then
+              a_op_const_reg_reg(list,OP_ADD,OS_ADDR,href.offset,href.base,r)
+            else
+              a_load_reg_reg(list,OS_ADDR,OS_ADDR,href.base,r);
+          end;
+      end;
+
+
+    procedure tcgaarch64.a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister);
+      begin
+        internalerror(2014122107)
+      end;
+
+
+    procedure tcgaarch64.a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister);
+      begin
+        internalerror(2014122108)
+      end;
+
+
+    procedure tcgaarch64.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
+      begin
+        internalerror(2014122109)
+      end;
+
+
+    procedure tcgaarch64.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
+      var
+        instr: taicpu;
+      begin
+        if assigned(shuffle) and
+           not shufflescalar(shuffle) then
+          internalerror(2014122104);
+        if fromsize=tosize then
+          begin
+            instr:=taicpu.op_reg_reg(A_FMOV,reg2,reg1);
+            { Notify the register allocator that we have written a move
+              instruction so it can try to eliminate it. }
+            add_move_instruction(instr);
+          end
+        else
+          begin
+            if (reg_cgsize(reg1)<>fromsize) or
+               (reg_cgsize(reg2)<>tosize) then
+              internalerror(2014110913);
+            instr:=taicpu.op_reg_reg(A_FCVT,reg2,reg1);
+          end;
+        list.Concat(instr);
+      end;
+
+
+    procedure tcgaarch64.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
+       var
+         tmpreg: tregister;
+       begin
+         if assigned(shuffle) and
+            not shufflescalar(shuffle) then
+           internalerror(2014122105);
+         tmpreg:=NR_NO;
+         if (fromsize<>tosize) then
+           begin
+             tmpreg:=reg;
+             reg:=getmmregister(list,fromsize);
+           end;
+         handle_load_store(list,A_LDR,fromsize,PF_None,reg,ref);
+         if (fromsize<>tosize) then
+           a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpreg,nil);
+       end;
+
+
+     procedure tcgaarch64.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
+       var
+         tmpreg: tregister;
+       begin
+         if assigned(shuffle) and
+            not shufflescalar(shuffle) then
+           internalerror(2014122106);
+         if (fromsize<>tosize) then
+           begin
+             tmpreg:=getmmregister(list,tosize);
+             a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpreg,nil);
+             reg:=tmpreg;
+           end;
+         handle_load_store(list,A_STR,tosize,PF_NONE,reg,ref);
+       end;
+
+
+     procedure tcgaarch64.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
+       begin
+         if not shufflescalar(shuffle) then
+           internalerror(2014122801);
+         if not(tcgsize2size[fromsize] in [4,8]) or
+            (tcgsize2size[fromsize]<>tcgsize2size[tosize]) then
+           internalerror(2014122803);
+         list.concat(taicpu.op_reg_reg(A_INS,mmreg,intreg));
+       end;
+
+
+     procedure tcgaarch64.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
+       begin
+         if not shufflescalar(shuffle) then
+           internalerror(2014122802);
+         if not(tcgsize2size[fromsize] in [4,8]) or
+            (tcgsize2size[fromsize]<>tcgsize2size[tosize]) then
+           internalerror(2014122804);
+         list.concat(taicpu.op_reg_reg(A_UMOV,intreg,mmreg));
+       end;
+
+
+    procedure tcgaarch64.a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle);
+      begin
+        case op of
+          { "xor Vx,Vx" is used to initialize global regvars to 0 }
+          OP_XOR:
+            begin
+              if (src<>dst) or
+                 (reg_cgsize(src)<>size) or
+                 assigned(shuffle) then
+                internalerror(2015011401);
+              case size of
+                OS_F32,
+                OS_F64:
+                  list.concat(taicpu.op_reg_const(A_MOVI,makeregsize(dst,OS_F64),0));
+                else
+                  internalerror(2015011402);
+              end;
+            end
+          else
+            internalerror(2015011403);
+        end;
+      end;
+
+
+    procedure tcgaarch64.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister);
+      var
+        bitsize,
+        signbit: longint;
+      begin
+        if srcsize in [OS_64,OS_S64] then
+          begin
+            bitsize:=64;
+            signbit:=6;
+          end
+        else
+          begin
+            bitsize:=32;
+            signbit:=5;
+          end;
+        { source is 0 -> dst will have to become 255 }
+        list.concat(taicpu.op_reg_const(A_CMP,src,0));
+        if reverse then
+          begin
+            list.Concat(taicpu.op_reg_reg(A_CLZ,makeregsize(dst,srcsize),src));
+            { xor 31/63 is the same as setting the lower 5/6 bits to
+              "31/63-(lower 5/6 bits of dst)" }
+            list.Concat(taicpu.op_reg_reg_const(A_EOR,dst,dst,bitsize-1));
+          end
+        else
+          begin
+            list.Concat(taicpu.op_reg_reg(A_RBIT,makeregsize(dst,srcsize),src));
+            list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
+          end;
+        { set dst to -1 if src was 0 }
+        list.Concat(taicpu.op_reg_reg_reg_cond(A_CSINV,dst,dst,makeregsize(NR_XZR,dstsize),C_NE));
+        { mask the -1 to 255 if src was 0 (anyone find a two-instruction
+          branch-free version? All of mine are 3...) }
+        list.Concat(setoppostfix(taicpu.op_reg_reg(A_UXT,dst,dst),PF_B));
+      end;
+
+
+    procedure tcgaarch64.a_load_reg_ref_unaligned(list: TAsmList; fromsize, tosize: tcgsize; register: tregister; const ref: treference);
+      var
+        href: treference;
+        hreg1, hreg2, tmpreg: tregister;
+      begin
+        if fromsize in [OS_64,OS_S64] then
+          begin
+            { split into two 32 bit stores }
+            hreg1:=makeregsize(register,OS_32);
+            hreg2:=getintregister(list,OS_32);
+            a_op_const_reg_reg(list,OP_SHR,OS_64,32,register,makeregsize(hreg2,OS_64));
+            if target_info.endian=endian_big then
+              begin
+                tmpreg:=hreg1;
+                hreg1:=hreg2;
+                hreg2:=tmpreg;
+              end;
+            { can we use STP? }
+            if (ref.alignment=4) and
+               (simple_ref_type(A_STP,OS_32,PF_None,ref)=sr_simple) then
+              list.concat(taicpu.op_reg_reg_ref(A_STP,hreg1,hreg2,ref))
+            else
+              begin
+                a_load_reg_ref(list,OS_32,OS_32,hreg1,ref);
+                href:=ref;
+                inc(href.offset,4);
+                a_load_reg_ref(list,OS_32,OS_32,hreg2,href);
+              end;
+          end
+       else
+         inherited;
+      end;
+
+
+    procedure tcgaarch64.maybeadjustresult(list: TAsmList; op: topcg; size: tcgsize; dst: tregister);
+      const
+        overflowops = [OP_MUL,OP_IMUL,OP_SHL,OP_ADD,OP_SUB,OP_NOT,OP_NEG];
+      begin
+        if (op in overflowops) and
+           (size in [OS_8,OS_S8,OS_16,OS_S16]) then
+          a_load_reg_reg(list,OS_32,size,makeregsize(dst,OS_32),makeregsize(dst,OS_32))
+      end;
+
+
+    procedure tcgaarch64.a_op_const_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; reg: tregister);
+      begin
+        optimize_op_const(size,op,a);
+        case op of
+          OP_NONE:
+            exit;
+          OP_MOVE:
+            a_load_const_reg(list,size,a,reg);
+          OP_NEG,OP_NOT:
+            internalerror(200306011);
+          else
+            a_op_const_reg_reg(list,op,size,a,reg,reg);
+        end;
+      end;
+
+
+    procedure tcgaarch64.a_op_reg_reg(list:TAsmList;op:topcg;size:tcgsize;src,dst:tregister);
+      begin
+        Case op of
+          OP_NEG,
+          OP_NOT:
+            begin
+              list.concat(taicpu.op_reg_reg(TOpCG2AsmOpReg[op],dst,src));
+              maybeadjustresult(list,op,size,dst);
+            end
+          else
+            a_op_reg_reg_reg(list,op,size,src,dst,dst);
+        end;
+      end;
+
+
+    procedure tcgaarch64.a_op_const_reg_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister);
+      var
+        l: tlocation;
+      begin
+        a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,l);
+      end;
+
+
+    procedure tcgaarch64.a_op_reg_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister);
+      var
+        hreg: tregister;
+      begin
+        { no ROLV opcode... }
+        if op=OP_ROL then
+          begin
+            case size of
+              OS_32,OS_S32,
+              OS_64,OS_S64:
+                begin
+                  hreg:=getintregister(list,size);
+                  a_load_const_reg(list,size,tcgsize2size[size]*8,hreg);
+                  a_op_reg_reg(list,OP_SUB,size,src1,hreg);
+                  a_op_reg_reg_reg(list,OP_ROR,size,hreg,src2,dst);
+                  exit;
+                end;
+              else
+                internalerror(2014111005);
+            end;
+          end
+        else if (op=OP_ROR) and
+           not(size in [OS_32,OS_S32,OS_64,OS_S64]) then
+          internalerror(2014111006);
+        if TOpCG2AsmOpReg[op]=A_NONE then
+          internalerror(2014111007);
+        list.concat(taicpu.op_reg_reg_reg(TOpCG2AsmOpReg[op],dst,src2,src1));
+        maybeadjustresult(list,op,size,dst);
+      end;
+
+
+    procedure tcgaarch64.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister; setflags : boolean; var ovloc : tlocation);
+      var
+        shiftcountmask: longint;
+        constreg: tregister;
+      begin
+        { add/sub instructions have only positive immediate operands }
+        if (op in [OP_ADD,OP_SUB]) and
+           (a<0) then
+          begin
+            if op=OP_ADD then
+              op:=op_SUB
+            else
+              op:=OP_ADD;
+            { avoid range/overflow error in case a = low(tcgint) }
+{$push}{$r-}{$q-}
+            a:=-a;
+{$pop}
+          end;
+        ovloc.loc:=LOC_VOID;
+        optimize_op_const(size,op,a);
+        case op of
+          OP_NONE:
+            begin
+              a_load_reg_reg(list,size,size,src,dst);
+              exit;
+            end;
+          OP_MOVE:
+            begin
+              a_load_const_reg(list,size,a,dst);
+              exit;
+            end;
+        end;
+        case op of
+          OP_ADD,
+          OP_SUB:
+            begin
+              handle_reg_imm12_reg(list,TOpCG2AsmOpImm[op],size,src,a,dst,NR_NO,setflags,true);
+              { on a 64 bit target, overflows with smaller data types
+                are handled via range errors }
+              if setflags and
+                 (size in [OS_64,OS_S64]) then
+                begin
+                  location_reset(ovloc,LOC_FLAGS,OS_8);
+                  if size=OS_64 then
+                    if op=OP_ADD then
+                      ovloc.resflags:=F_CS
+                    else
+                      ovloc.resflags:=F_CC
+                  else
+                    ovloc.resflags:=F_VS;
+                end;
+            end;
+          OP_OR,
+          OP_AND,
+          OP_XOR:
+            begin
+              if not(size in [OS_64,OS_S64]) then
+                a:=cardinal(a);
+              if is_shifter_const(a,size) then
+                list.concat(taicpu.op_reg_reg_const(TOpCG2AsmOpReg[op],dst,src,a))
+              else
+                begin
+                  constreg:=getintregister(list,size);
+                  a_load_const_reg(list,size,a,constreg);
+                  a_op_reg_reg_reg(list,op,size,constreg,src,dst);
+                end;
+            end;
+          OP_SHL,
+          OP_SHR,
+          OP_SAR:
+            begin
+              if size in [OS_64,OS_S64] then
+                shiftcountmask:=63
+              else
+                shiftcountmask:=31;
+              if (a and shiftcountmask)<>0 Then
+                list.concat(taicpu.op_reg_reg_const(
+                  TOpCG2AsmOpImm[Op],dst,src,a and shiftcountmask))
+              else
+                a_load_reg_reg(list,size,size,src,dst);
+              if (a and not(tcgint(shiftcountmask)))<>0 then
+                internalError(2014112101);
+            end;
+          OP_ROL,
+          OP_ROR:
+            begin
+              case size of
+                OS_32,OS_S32:
+                  if (a and not(tcgint(31)))<>0 then
+                    internalError(2014112102);
+                OS_64,OS_S64:
+                  if (a and not(tcgint(63)))<>0 then
+                    internalError(2014112103);
+                else
+                  internalError(2014112104);
+              end;
+              { there's only a ror opcode }
+              if op=OP_ROL then
+                a:=(tcgsize2size[size]*8)-a;
+              list.concat(taicpu.op_reg_reg_const(A_ROR,dst,src,a));
+            end;
+          OP_MUL,
+          OP_IMUL,
+          OP_DIV,
+          OP_IDIV:
+            begin
+              constreg:=getintregister(list,size);
+              a_load_const_reg(list,size,a,constreg);
+              a_op_reg_reg_reg_checkoverflow(list,op,size,constreg,src,dst,setflags,ovloc);
+            end;
+          else
+            internalerror(2014111403);
+        end;
+        maybeadjustresult(list,op,size,dst);
+      end;
+
+
+    procedure tcgaarch64.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister; setflags : boolean; var ovloc : tlocation);
+      var
+        tmpreg1: tregister;
+      begin
+        ovloc.loc:=LOC_VOID;
+        { overflow can only occur with 64 bit calculations on 64 bit cpus }
+        if setflags and
+           (size in [OS_64,OS_S64]) then
+          begin
+            case op of
+              OP_ADD,
+              OP_SUB:
+                begin
+                  list.concat(setoppostfix(taicpu.op_reg_reg_reg(TOpCG2AsmOpReg[op],dst,src2,src1),PF_S));
+                  ovloc.loc:=LOC_FLAGS;
+                  if size=OS_64 then
+                    if op=OP_ADD then
+                      ovloc.resflags:=F_CS
+                    else
+                      ovloc.resflags:=F_CC
+                  else
+                    ovloc.resflags:=F_VS;
+                  { finished; since we won't call through to a_op_reg_reg_reg,
+                    adjust the result here if necessary }
+                  maybeadjustresult(list,op,size,dst);
+                  exit;
+                end;
+              OP_MUL:
+                begin
+                  { check whether the upper 64 bit of the 128 bit product is 0 }
+                  tmpreg1:=getintregister(list,OS_64);
+                  list.concat(taicpu.op_reg_reg_reg(A_UMULH,tmpreg1,src2,src1));
+                  list.concat(taicpu.op_reg_const(A_CMP,tmpreg1,0));
+                  ovloc.loc:=LOC_FLAGS;
+                  ovloc.resflags:=F_NE;
+                  { still have to perform the actual multiplication  }
+                end;
+              OP_IMUL:
+                begin
+                  { check whether the sign bit of the (128 bit) result is the
+                    same as "sign bit of src1" xor "signbit of src2" (if so, no
+                    overflow and the xor-product of all sign bits is 0) }
+                  tmpreg1:=getintregister(list,OS_64);
+                  list.concat(taicpu.op_reg_reg_reg(A_SMULH,tmpreg1,src2,src1));
+                  list.concat(taicpu.op_reg_reg_reg(A_EOR,tmpreg1,tmpreg1,src1));
+                  list.concat(taicpu.op_reg_reg_reg(A_EOR,tmpreg1,tmpreg1,src2));
+                  list.concat(taicpu.op_reg_const(A_TST,tmpreg1,$80000000));
+                  ovloc.loc:=LOC_FLAGS;
+                  ovloc.resflags:=F_NE;
+                  { still have to perform the actual multiplication }
+                end;
+              OP_IDIV,
+              OP_DIV:
+                begin
+                  { not handled here, needs div-by-zero check (dividing by zero
+                    just gives a 0 result on aarch64), and low(int64) div -1
+                    check for overflow) }
+                  internalerror(2014122101);
+                end;
+            end;
+          end;
+        a_op_reg_reg_reg(list,op,size,src1,src2,dst);
+      end;
+
+
+
+  {*************** compare instructructions ****************}
+
+    procedure tcgaarch64.a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel);
+      var
+        op: tasmop;
+      begin
+        if a>=0 then
+          op:=A_CMP
+        else
+          op:=A_CMN;
+        { avoid range/overflow error in case a=low(tcgint) }
+{$push}{$r-}{$q-}
+        handle_reg_imm12_reg(list,op,size,reg,abs(a),NR_XZR,NR_NO,false,false);
+{$pop}
+        a_jmp_cond(list,cmp_op,l);
+      end;
+
+
+    procedure tcgaarch64.a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1,reg2: tregister; l: tasmlabel);
+      begin
+        list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
+        a_jmp_cond(list,cmp_op,l);
+      end;
+
+
+    procedure tcgaarch64.a_jmp_always(list: TAsmList; l: TAsmLabel);
+      var
+        ai: taicpu;
+      begin
+        ai:=TAiCpu.op_sym(A_B,current_asmdata.RefAsmSymbol(l.name));
+        ai.is_jmp:=true;
+        list.Concat(ai);
+      end;
+
+
+    procedure tcgaarch64.a_jmp_name(list: TAsmList; const s: string);
+      var
+        ai: taicpu;
+      begin
+        ai:=TAiCpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s));
+        ai.is_jmp:=true;
+        list.Concat(ai);
+      end;
+
+
+    procedure tcgaarch64.a_jmp_cond(list: TAsmList; cond: TOpCmp; l: TAsmLabel);
+      var
+        ai: taicpu;
+      begin
+        ai:=TAiCpu.op_sym(A_B,l);
+        ai.is_jmp:=true;
+        ai.SetCondition(TOpCmp2AsmCond[cond]);
+        list.Concat(ai);
+      end;
+
+
+    procedure tcgaarch64.a_jmp_flags(list: TAsmList; const f: tresflags; l: tasmlabel);
+      var
+        ai : taicpu;
+      begin
+        ai:=Taicpu.op_sym(A_B,l);
+        ai.is_jmp:=true;
+        ai.SetCondition(flags_to_cond(f));
+        list.Concat(ai);
+      end;
+
+
+    procedure tcgaarch64.g_flags2reg(list: TAsmList; size: tcgsize; const f: tresflags; reg: tregister);
+      begin
+        list.concat(taicpu.op_reg_cond(A_CSET,reg,flags_to_cond(f)));
+      end;
+
+
+    procedure tcgaarch64.g_overflowcheck(list: TAsmList; const loc: tlocation; def: tdef);
+      begin
+        { we need an explicit overflow location, because there are many
+          possibilities (not just the overflow flag, which is only used for
+          signed add/sub) }
+        internalerror(2014112303);
+      end;
+
+
+    procedure tcgaarch64.g_overflowcheck_loc(list: TAsmList; const loc: tlocation; def: tdef; ovloc : tlocation);
+      var
+        hl : tasmlabel;
+        hflags : tresflags;
+      begin
+        if not(cs_check_overflow in current_settings.localswitches) then
+          exit;
+        current_asmdata.getjumplabel(hl);
+        case ovloc.loc of
+          LOC_FLAGS:
+            begin
+              hflags:=ovloc.resflags;
+              inverse_flags(hflags);
+              cg.a_jmp_flags(list,hflags,hl);
+            end;
+          else
+            internalerror(2014112304);
+        end;
+        a_call_name(list,'FPC_OVERFLOW',false);
+        a_label(list,hl);
+      end;
+
+  { *********** entry/exit code and address loading ************ }
+
+    function tcgaarch64.save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
+      var
+        ref: treference;
+        sr: tsuperregister;
+        pairreg: tregister;
+      begin
+        result:=0;
+        reference_reset_base(ref,NR_SP,-16,16);
+        ref.addressmode:=AM_PREINDEXED;
+        pairreg:=NR_NO;
+        { store all used registers pairwise }
+        for sr:=lowsr to highsr do
+          if sr in rg[rt].used_in_proc then
+            if pairreg=NR_NO then
+              pairreg:=newreg(rt,sr,sub)
+            else
+              begin
+                inc(result,16);
+                list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,newreg(rt,sr,sub),ref));
+                pairreg:=NR_NO
+              end;
+        { one left -> store twice (stack must be 16 bytes aligned) }
+        if pairreg<>NR_NO then
+          begin
+            list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,pairreg,ref));
+            inc(result,16);
+          end;
+      end;
+
+
+    procedure FixupOffsets(p:TObject;arg:pointer);
+      var
+        sym: tabstractnormalvarsym absolute p;
+      begin
+        if (tsym(p).typ in [paravarsym,localvarsym]) and
+          (sym.localloc.loc=LOC_REFERENCE) and
+          (sym.localloc.reference.base=NR_STACK_POINTER_REG) then
+          begin
+            sym.localloc.reference.base:=NR_FRAME_POINTER_REG;
+            dec(sym.localloc.reference.offset,PLongint(arg)^);
+          end;
+      end;
+
+
+    procedure tcgaarch64.g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);
+      var
+        ref: treference;
+        totalstackframesize: longint;
+      begin
+        if nostackframe then
+          exit;
+        { stack pointer has to be aligned to 16 bytes at all times }
+        localsize:=align(localsize,16);
+
+        { save stack pointer and return address }
+        reference_reset_base(ref,NR_SP,-16,16);
+        ref.addressmode:=AM_PREINDEXED;
+        list.concat(taicpu.op_reg_reg_ref(A_STP,NR_FP,NR_LR,ref));
+        { initialise frame pointer }
+        a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_SP,NR_FP);
+
+        totalstackframesize:=localsize;
+        { save modified integer registers }
+        inc(totalstackframesize,
+          save_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE));
+        { only the lower 64 bits of the modified vector registers need to be
+          saved; if the caller needs the upper 64 bits, it has to save them
+          itself }
+        inc(totalstackframesize,
+          save_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD));
+
+        { allocate stack space }
+        if localsize<>0 then
+          begin
+            localsize:=align(localsize,16);
+            current_procinfo.final_localsize:=localsize;
+            handle_reg_imm12_reg(list,A_SUB,OS_ADDR,NR_SP,localsize,NR_SP,NR_IP0,false,true);
+          end;
+        { By default, we use the frame pointer to access parameters passed via
+          the stack and the stack pointer to address local variables and temps
+          because
+           a) we can use bigger positive than negative offsets (so accessing
+              locals via negative offsets from the frame pointer would be less
+              efficient)
+           b) we don't know the local size while generating the code, so
+              accessing the parameters via the stack pointer is not possible
+              without copying them
+          The problem with this is the get_frame() intrinsic:
+           a) it must return the same value as what we pass as parentfp
+              parameter, since that's how it's used in the TP-style objects unit
+           b) its return value must usable to access all local data from a
+              routine (locals and parameters), since it's all the nested
+              routines have access to
+           c) its return value must be usable to construct a backtrace, as it's
+              also used by the exception handling routines
+
+          The solution we use here, based on something similar that's done in
+          the MIPS port, is to generate all accesses to locals in the routine
+          itself SP-relative, and then after the code is generated and the local
+          size is known (namely, here), we change all SP-relative variables/
+          parameters into FP-relative ones. This means that they'll be accessed
+          less efficiently from nested routines, but those accesses are indirect
+          anyway and at least this way they can be accessed at all
+        }
+        if current_procinfo.has_nestedprocs then
+          begin
+            current_procinfo.procdef.localst.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
+            current_procinfo.procdef.parast.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
+          end;
+      end;
+
+
+    procedure tcgaarch64.g_maybe_got_init(list : TAsmList);
+      begin
+        { nothing to do on Darwin; check on ELF targets }
+        if not(target_info.system in systems_darwin) then
+          internalerror(2014112601);
+      end;
+
+
+    procedure tcgaarch64.g_restore_registers(list:TAsmList);
+      begin
+        { done in g_proc_exit }
+      end;
+
+
+    procedure tcgaarch64.load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
+      var
+        ref: treference;
+        sr, highestsetsr: tsuperregister;
+        pairreg: tregister;
+        regcount: longint;
+      begin
+        reference_reset_base(ref,NR_SP,16,16);
+        ref.addressmode:=AM_POSTINDEXED;
+        { highest reg stored twice? }
+        regcount:=0;
+        highestsetsr:=RS_NO;
+        for sr:=lowsr to highsr do
+          if sr in rg[rt].used_in_proc then
+            begin
+              inc(regcount);
+              highestsetsr:=sr;
+            end;
+        if odd(regcount) then
+          begin
+            list.concat(taicpu.op_reg_ref(A_LDR,newreg(rt,highestsetsr,sub),ref));
+            highestsetsr:=pred(highestsetsr);
+          end;
+        { load all (other) used registers pairwise }
+        pairreg:=NR_NO;
+        for sr:=highestsetsr downto lowsr do
+          if sr in rg[rt].used_in_proc then
+            if pairreg=NR_NO then
+              pairreg:=newreg(rt,sr,sub)
+            else
+              begin
+                list.concat(taicpu.op_reg_reg_ref(A_LDP,newreg(rt,sr,sub),pairreg,ref));
+                pairreg:=NR_NO
+              end;
+        { There can't be any register left }
+        if pairreg<>NR_NO then
+          internalerror(2014112602);
+      end;
+
+
+
+    procedure tcgaarch64.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
+      var
+        ref: treference;
+        regsstored: boolean;
+        sr: tsuperregister;
+      begin
+        if not nostackframe then
+          begin
+            { if no registers have been stored, we don't have to subtract the
+              allocated temp space from the stack pointer }
+            regsstored:=false;
+            for sr:=RS_X19 to RS_X28 do
+              if sr in rg[R_INTREGISTER].used_in_proc then
+                begin
+                  regsstored:=true;
+                  break;
+                end;
+            if not regsstored then
+              for sr:=RS_D8 to RS_D15 do
+                if sr in rg[R_MMREGISTER].used_in_proc then
+                  begin
+                    regsstored:=true;
+                    break;
+                  end;
+            { restore registers (and stack pointer) }
+            if regsstored then
+              begin
+                if current_procinfo.final_localsize<>0 then
+                  handle_reg_imm12_reg(list,A_ADD,OS_ADDR,NR_SP,current_procinfo.final_localsize,NR_SP,NR_IP0,false,true);
+                load_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD);
+                load_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE);
+              end
+            else if current_procinfo.final_localsize<>0 then
+              { restore stack pointer }
+              a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_FP,NR_SP);
+
+            { restore framepointer and return address }
+            reference_reset_base(ref,NR_SP,16,16);
+            ref.addressmode:=AM_POSTINDEXED;
+            list.concat(taicpu.op_reg_reg_ref(A_LDP,NR_FP,NR_LR,ref));
+          end;
+
+        { return }
+        list.concat(taicpu.op_none(A_RET));
+      end;
+
+
+    procedure tcgaarch64.g_save_registers(list : TAsmList);
+      begin
+        { done in g_proc_entry }
+      end;
+
+
+    { ************* concatcopy ************ }
+
+    procedure tcgaarch64.g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
+      var
+        paraloc1,paraloc2,paraloc3 : TCGPara;
+        pd : tprocdef;
+      begin
+        pd:=search_system_proc('MOVE');
+        paraloc1.init;
+        paraloc2.init;
+        paraloc3.init;
+        paramanager.getintparaloc(pd,1,paraloc1);
+        paramanager.getintparaloc(pd,2,paraloc2);
+        paramanager.getintparaloc(pd,3,paraloc3);
+        a_load_const_cgpara(list,OS_SINT,len,paraloc3);
+        a_loadaddr_ref_cgpara(list,dest,paraloc2);
+        a_loadaddr_ref_cgpara(list,source,paraloc1);
+        paramanager.freecgpara(list,paraloc3);
+        paramanager.freecgpara(list,paraloc2);
+        paramanager.freecgpara(list,paraloc1);
+        alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
+        alloccpuregisters(list,R_MMREGISTER,paramanager.get_volatile_registers_mm(pocall_default));
+        a_call_name(list,'FPC_MOVE',false);
+        dealloccpuregisters(list,R_MMREGISTER,paramanager.get_volatile_registers_mm(pocall_default));
+        dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
+        paraloc3.done;
+        paraloc2.done;
+        paraloc1.done;
+      end;
+
+
+    procedure tcgaarch64.g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);
+
+      var
+        sourcebasereplaced, destbasereplaced: boolean;
+
+      { get optimal memory operation to use for loading/storing data
+        in an unrolled loop }
+      procedure getmemop(scaledop, unscaledop: tasmop; const startref, endref: treference; opsize: tcgsize; postfix: toppostfix; out memop: tasmop; out needsimplify: boolean);
+        begin
+          if (simple_ref_type(scaledop,opsize,postfix,startref)=sr_simple) and
+             (simple_ref_type(scaledop,opsize,postfix,endref)=sr_simple) then
+            begin
+              memop:=unscaledop;
+              needsimplify:=true;
+            end
+          else if (unscaledop<>A_NONE) and
+             (simple_ref_type(unscaledop,opsize,postfix,startref)=sr_simple) and
+             (simple_ref_type(unscaledop,opsize,postfix,endref)=sr_simple) then
+            begin
+              memop:=unscaledop;
+              needsimplify:=false;
+            end
+          else
+            begin
+              memop:=scaledop;
+              needsimplify:=true;
+            end;
+        end;
+
+      { adjust the offset and/or addressing mode after a load/store so it's
+        correct for the next one of the same size }
+      procedure updaterefafterloadstore(var ref: treference; oplen: longint);
+        begin
+          case ref.addressmode of
+            AM_OFFSET:
+              inc(ref.offset,oplen);
+            AM_POSTINDEXED:
+              { base register updated by instruction, next offset can remain
+                the same }
+              ;
+            AM_PREINDEXED:
+              begin
+                { base register updated by instruction -> next instruction can
+                  use post-indexing with offset = sizeof(operation) }
+                ref.offset:=0;
+                ref.addressmode:=AM_OFFSET;
+              end;
+          end;
+        end;
+
+      { generate a load/store and adjust the reference offset to the next
+        memory location if necessary }
+      procedure genloadstore(list: TAsmList; op: tasmop; reg: tregister; var ref: treference; postfix: toppostfix; opsize: tcgsize);
+        begin
+          list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),postfix));
+          updaterefafterloadstore(ref,tcgsize2size[opsize]);
+        end;
+
+      { generate a dual load/store (ldp/stp) and adjust the reference offset to
+        the next memory location if necessary }
+      procedure gendualloadstore(list: TAsmList; op: tasmop; reg1, reg2: tregister; var ref: treference; postfix: toppostfix; opsize: tcgsize);
+        begin
+          list.concat(setoppostfix(taicpu.op_reg_reg_ref(op,reg1,reg2,ref),postfix));
+          updaterefafterloadstore(ref,tcgsize2size[opsize]*2);
+        end;
+
+      { turn a reference into a pre- or post-indexed reference for use in a
+        load/store of a particular size }
+      procedure makesimpleforcopy(list: TAsmList; var scaledop: tasmop; opsize: tcgsize; postfix: toppostfix; forcepostindexing: boolean; var ref: treference; var basereplaced: boolean);
+        var
+          tmpreg: tregister;
+          scaledoffset: longint;
+          orgaddressmode: taddressmode;
+        begin
+          scaledoffset:=tcgsize2size[opsize];
+          if scaledop in [A_LDP,A_STP] then
+            scaledoffset:=scaledoffset*2;
+          { can we use the reference as post-indexed without changes? }
+          if forcepostindexing then
+            begin
+              orgaddressmode:=ref.addressmode;
+              ref.addressmode:=AM_POSTINDEXED;
+              if (orgaddressmode=AM_POSTINDEXED) or
+                 ((ref.offset=0) and
+                  (simple_ref_type(scaledop,opsize,postfix,ref)=sr_simple)) then
+                begin
+                  { just change the post-indexed offset to the access size }
+                  ref.offset:=scaledoffset;
+                  { and replace the base register if that didn't happen yet
+                    (could be sp or a regvar) }
+                  if not basereplaced then
+                    begin
+                      tmpreg:=getaddressregister(list);
+                      a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg);
+                      ref.base:=tmpreg;
+                      basereplaced:=true;
+                    end;
+                  exit;
+                end;
+              ref.addressmode:=orgaddressmode;
+            end;
+{$ifdef dummy}
+          This could in theory be useful in case you have a concatcopy from
+          e.g. x1+255 to x1+267 *and* the reference is aligned, but this seems
+          very unlikely. Disabled because it still needs fixes, as it
+          also generates pre-indexed loads right now at the very end for the
+          left-over gencopies
+
+          { can we turn it into a pre-indexed reference for free? (after the
+            first operation, it will be turned into an offset one) }
+          if not forcepostindexing and
+             (ref.offset<>0) then
+            begin
+              orgaddressmode:=ref.addressmode;
+              ref.addressmode:=AM_PREINDEXED;
+              tmpreg:=ref.base;
+              if not basereplaced and
+                 (ref.base=tmpreg) then
+                begin
+                  tmpreg:=getaddressregister(list);
+                  a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg);
+                  ref.base:=tmpreg;
+                  basereplaced:=true;
+                end;
+              if simple_ref_type(scaledop,opsize,postfix,ref)<>sr_simple then
+                make_simple_ref(list,scaledop,opsize,postfix,ref,NR_NO);
+              exit;
+            end;
+{$endif dummy}
+          if not forcepostindexing then
+            begin
+              ref.addressmode:=AM_OFFSET;
+              make_simple_ref(list,scaledop,opsize,postfix,ref,NR_NO);
+              { this may still cause problems if the final offset is no longer
+                a simple ref; it's a bit complicated to pass all information
+                through at all places and check that here, so play safe: we
+                currently never generate unrolled copies for more than 64
+                bytes (32 with non-double-register copies) }
+              if ref.index=NR_NO then
+                begin
+                  if ((scaledop in [A_LDP,A_STP]) and
+                      (ref.offset<((64-8)*tcgsize2size[opsize]))) or
+                     ((scaledop in [A_LDUR,A_STUR]) and
+                      (ref.offset<(255-8*tcgsize2size[opsize]))) or
+                     ((scaledop in [A_LDR,A_STR]) and
+                      (ref.offset<((4096-8)*tcgsize2size[opsize]))) then
+                    exit;
+                end;
+            end;
+          tmpreg:=getaddressregister(list);
+          a_loadaddr_ref_reg(list,ref,tmpreg);
+          basereplaced:=true;
+          if forcepostindexing then
+            begin
+              reference_reset_base(ref,tmpreg,scaledoffset,ref.alignment);
+              ref.addressmode:=AM_POSTINDEXED;
+            end
+          else
+            begin
+              reference_reset_base(ref,tmpreg,0,ref.alignment);
+              ref.addressmode:=AM_OFFSET;
+            end
+        end;
+
+      { prepare a reference for use by gencopy. This is done both after the
+        unrolled and regular copy loop -> get rid of post-indexing mode, make
+        sure ref is valid }
+      procedure preparecopy(list: tasmlist; scaledop, unscaledop: tasmop; var ref: treference; opsize: tcgsize; postfix: toppostfix; out op: tasmop; var basereplaced: boolean);
+        var
+          simplify: boolean;
+        begin
+          if ref.addressmode=AM_POSTINDEXED then
+            ref.offset:=tcgsize2size[opsize];
+          getmemop(scaledop,scaledop,ref,ref,opsize,postfix,op,simplify);
+          if simplify then
+            begin
+              makesimpleforcopy(list,scaledop,opsize,postfix,false,ref,basereplaced);
+              op:=scaledop;
+            end;
+        end;
+
+      { generate a copy from source to dest of size opsize/postfix }
+      procedure gencopy(list: TAsmList; var source, dest: treference; postfix: toppostfix; opsize: tcgsize);
+        var
+          reg: tregister;
+          loadop, storeop: tasmop;
+        begin
+          preparecopy(list,A_LDR,A_LDUR,source,opsize,postfix,loadop,sourcebasereplaced);
+          preparecopy(list,A_STR,A_STUR,dest,opsize,postfix,storeop,destbasereplaced);
+          reg:=getintregister(list,opsize);
+          genloadstore(list,loadop,reg,source,postfix,opsize);
+          genloadstore(list,storeop,reg,dest,postfix,opsize);
+        end;
+
+
+      { copy the leftovers after an unrolled or regular copy loop }
+      procedure gencopyleftovers(list: TAsmList; var source, dest: treference; len: longint);
+        begin
+          { stop post-indexing if we did so in the loop, since in that case all
+            offsets definitely can be represented now }
+          if source.addressmode=AM_POSTINDEXED then
+            begin
+              source.addressmode:=AM_OFFSET;
+              source.offset:=0;
+            end;
+          if dest.addressmode=AM_POSTINDEXED then
+            begin
+              dest.addressmode:=AM_OFFSET;
+              dest.offset:=0;
+            end;
+          { transfer the leftovers }
+          if len>=8 then
+            begin
+              dec(len,8);
+              gencopy(list,source,dest,PF_NONE,OS_64);
+            end;
+          if len>=4 then
+            begin
+              dec(len,4);
+              gencopy(list,source,dest,PF_NONE,OS_32);
+            end;
+          if len>=2 then
+            begin
+              dec(len,2);
+              gencopy(list,source,dest,PF_H,OS_16);
+            end;
+          if len>=1 then
+            begin
+              dec(len);
+              gencopy(list,source,dest,PF_B,OS_8);
+            end;
+        end;
+
+
+      const
+        { load_length + loop dec + cbnz }
+        loopoverhead=12;
+        { loop overhead + load + store }
+        totallooplen=loopoverhead + 8;
+      var
+        totalalign: longint;
+        maxlenunrolled: tcgint;
+        loadop, storeop: tasmop;
+        opsize: tcgsize;
+        postfix: toppostfix;
+        tmpsource, tmpdest: treference;
+        scaledstoreop, unscaledstoreop,
+        scaledloadop, unscaledloadop: tasmop;
+        regs: array[1..8] of tregister;
+        countreg: tregister;
+        i, regcount: longint;
+        hl: tasmlabel;
+        simplifysource, simplifydest: boolean;
+      begin
+        if len=0 then
+          exit;
+        sourcebasereplaced:=false;
+        destbasereplaced:=false;
+        { maximum common alignment }
+        totalalign:=max(1,newalignment(source.alignment,dest.alignment));
+        { use a simple load/store? }
+        if (len in [1,2,4,8]) and
+           ((totalalign>=(len div 2)) or
+            (source.alignment=len) or
+            (dest.alignment=len)) then
+          begin
+            opsize:=int_cgsize(len);
+            a_load_ref_ref(list,opsize,opsize,source,dest);
+            exit;
+          end;
+
+        { alignment > length is not useful, and would break some checks below }
+        while totalalign>len do
+          totalalign:=totalalign div 2;
+
+        { operation sizes to use based on common alignment }
+        case totalalign of
+          1:
+            begin
+              postfix:=PF_B;
+              opsize:=OS_8;
+            end;
+          2:
+            begin
+              postfix:=PF_H;
+              opsize:=OS_16;
+            end;
+          4:
+            begin
+              postfix:=PF_None;
+              opsize:=OS_32;
+            end
+          else
+            begin
+              totalalign:=8;
+              postfix:=PF_None;
+              opsize:=OS_64;
+            end;
+        end;
+        { maximum length to handled with an unrolled loop (4 loads + 4 stores) }
+        maxlenunrolled:=min(totalalign,8)*4;
+        { ldp/stp -> 2 registers per instruction }
+        if (totalalign>=4) and
+           (len>=totalalign*2) then
+          begin
+            maxlenunrolled:=maxlenunrolled*2;
+            scaledstoreop:=A_STP;
+            scaledloadop:=A_LDP;
+            unscaledstoreop:=A_NONE;
+            unscaledloadop:=A_NONE;
+          end
+        else
+          begin
+            scaledstoreop:=A_STR;
+            scaledloadop:=A_LDR;
+            unscaledstoreop:=A_STUR;
+            unscaledloadop:=A_LDUR;
+          end;
+        { we only need 4 instructions extra to call FPC_MOVE }
+        if cs_opt_size in current_settings.optimizerswitches then
+          maxlenunrolled:=maxlenunrolled div 2;
+        if (len>maxlenunrolled) and
+           (len>totalalign*8) then
+          begin
+            g_concatcopy_move(list,source,dest,len);
+            exit;
+          end;
+
+        simplifysource:=true;
+        simplifydest:=true;
+        tmpsource:=source;
+        tmpdest:=dest;
+        { can we directly encode all offsets in an unrolled loop? }
+        if len<=maxlenunrolled then
+          begin
+{$ifdef extdebug}
+            list.concat(tai_comment.Create(strpnew('concatcopy unrolled loop; len/opsize/align: '+tostr(len)+'/'+tostr(tcgsize2size[opsize])+'/'+tostr(totalalign))));
+{$endif extdebug}
+            { the leftovers will be handled separately -> -(len mod opsize) }
+            inc(tmpsource.offset,len-(len mod tcgsize2size[opsize]));
+            { additionally, the last regular load/store will be at
+              offset+len-opsize (if len-(len mod opsize)>len) }
+            if tmpsource.offset>source.offset then
+              dec(tmpsource.offset,tcgsize2size[opsize]);
+            getmemop(scaledloadop,unscaledloadop,source,tmpsource,opsize,postfix,loadop,simplifysource);
+            inc(tmpdest.offset,len-(len mod tcgsize2size[opsize]));
+            if tmpdest.offset>dest.offset then
+              dec(tmpdest.offset,tcgsize2size[opsize]);
+            getmemop(scaledstoreop,unscaledstoreop,dest,tmpdest,opsize,postfix,storeop,simplifydest);
+            tmpsource:=source;
+            tmpdest:=dest;
+            { if we can't directly encode all offsets, simplify }
+            if simplifysource then
+              begin
+                loadop:=scaledloadop;
+                makesimpleforcopy(list,loadop,opsize,postfix,false,tmpsource,sourcebasereplaced);
+              end;
+            if simplifydest then
+              begin
+                storeop:=scaledstoreop;
+                makesimpleforcopy(list,storeop,opsize,postfix,false,tmpdest,destbasereplaced);
+              end;
+            regcount:=len div tcgsize2size[opsize];
+            { in case we transfer two registers at a time, we copy an even
+              number of registers }
+            if loadop=A_LDP then
+              regcount:=regcount and not(1);
+            { initialise for dfa }
+            regs[low(regs)]:=NR_NO;
+            { max 4 loads/stores -> max 8 registers (in case of ldp/stdp) }
+            for i:=1 to regcount do
+              regs[i]:=getintregister(list,opsize);
+            if loadop=A_LDP then
+              begin
+                { load registers }
+                for i:=1 to (regcount div 2) do
+                  gendualloadstore(list,loadop,regs[i*2-1],regs[i*2],tmpsource,postfix,opsize);
+                { store registers }
+                for i:=1 to (regcount div 2) do
+                  gendualloadstore(list,storeop,regs[i*2-1],regs[i*2],tmpdest,postfix,opsize);
+              end
+            else
+              begin
+                for i:=1 to regcount do
+                  genloadstore(list,loadop,regs[i],tmpsource,postfix,opsize);
+                for i:=1 to regcount do
+                  genloadstore(list,storeop,regs[i],tmpdest,postfix,opsize);
+              end;
+            { leftover }
+            len:=len-regcount*tcgsize2size[opsize];
+{$ifdef extdebug}
+            list.concat(tai_comment.Create(strpnew('concatcopy unrolled loop leftover: '+tostr(len))));
+{$endif extdebug}
+          end
+        else
+          begin
+{$ifdef extdebug}
+            list.concat(tai_comment.Create(strpnew('concatcopy regular loop; len/align: '+tostr(len)+'/'+tostr(totalalign))));
+{$endif extdebug}
+            { regular loop -> definitely use post-indexing }
+            loadop:=scaledloadop;
+            makesimpleforcopy(list,loadop,opsize,postfix,true,tmpsource,sourcebasereplaced);
+            storeop:=scaledstoreop;
+            makesimpleforcopy(list,storeop,opsize,postfix,true,tmpdest,destbasereplaced);
+            current_asmdata.getjumplabel(hl);
+            countreg:=getintregister(list,OS_32);
+            if loadop=A_LDP then
+              a_load_const_reg(list,OS_32,len div tcgsize2size[opsize]*2,countreg)
+            else
+              a_load_const_reg(list,OS_32,len div tcgsize2size[opsize],countreg);
+            a_label(list,hl);
+            a_op_const_reg(list,OP_SUB,OS_32,1,countreg);
+            if loadop=A_LDP then
+              begin
+                regs[1]:=getintregister(list,opsize);
+                regs[2]:=getintregister(list,opsize);
+                gendualloadstore(list,loadop,regs[1],regs[2],tmpsource,postfix,opsize);
+                gendualloadstore(list,storeop,regs[1],regs[2],tmpdest,postfix,opsize);
+              end
+            else
+              begin
+                regs[1]:=getintregister(list,opsize);
+                genloadstore(list,loadop,regs[1],tmpsource,postfix,opsize);
+                genloadstore(list,storeop,regs[1],tmpdest,postfix,opsize);
+              end;
+            list.concat(taicpu.op_reg_sym_ofs(A_CBNZ,countreg,hl,0));
+            len:=len mod tcgsize2size[opsize];
+          end;
+        gencopyleftovers(list,tmpsource,tmpdest,len);
+      end;
+
+
+    procedure tcgaarch64.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
+      begin
+        { This method is integrated into g_intf_wrapper and shouldn't be called separately }
+        InternalError(2013020102);
+      end;
+
+
+    procedure tcgaarch64.g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);
+      var
+        make_global: boolean;
+        href: treference;
+        hsym: tsym;
+        paraloc: pcgparalocation;
+        op: tasmop;
+      begin
+        if not(procdef.proctypeoption in [potype_function,potype_procedure]) then
+          Internalerror(200006137);
+        if not assigned(procdef.struct) or
+           (procdef.procoptions*[po_classmethod, po_staticmethod,
+             po_methodpointer, po_interrupt, po_iocheck]<>[]) then
+          Internalerror(200006138);
+        if procdef.owner.symtabletype<>ObjectSymtable then
+          Internalerror(200109191);
+
+        make_global:=false;
+        if (not current_module.is_unit) or create_smartlink_library or
+           (procdef.owner.defowner.owner.symtabletype=globalsymtable) then
+          make_global:=true;
+
+        if make_global then
+          list.concat(Tai_symbol.Createname_global(labelname,AT_FUNCTION,0))
+        else
+          list.concat(Tai_symbol.Createname(labelname,AT_FUNCTION,0));
+
+        { set param1 interface to self  }
+        procdef.init_paraloc_info(callerside);
+        hsym:=tsym(procdef.parast.Find('self'));
+        if not(assigned(hsym) and
+          (hsym.typ=paravarsym)) then
+          internalerror(2010103101);
+        paraloc:=tparavarsym(hsym).paraloc[callerside].location;
+        if assigned(paraloc^.next) then
+          InternalError(2013020101);
+
+        case paraloc^.loc of
+          LOC_REGISTER:
+            handle_reg_imm12_reg(list,A_SUB,paraloc^.size,paraloc^.register,ioffset,paraloc^.register,NR_IP0,false,true);
+          else
+            internalerror(2010103102);
+        end;
+
+        if (po_virtualmethod in procdef.procoptions) and
+            not is_objectpascal_helper(procdef.struct) then
+          begin
+            if (procdef.extnumber=$ffff) then
+              Internalerror(200006139);
+            { mov  0(%rdi),%rax ; load vmt}
+            reference_reset_base(href,paraloc^.register,0,sizeof(pint));
+            getcpuregister(list,NR_IP0);
+            a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_IP0);
+            { jmp *vmtoffs(%eax) ; method offs }
+            reference_reset_base(href,NR_IP0,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),sizeof(pint));
+            op:=A_LDR;
+            make_simple_ref(list,op,OS_ADDR,PF_None,href,NR_IP0);
+            list.concat(taicpu.op_reg_ref(op,NR_IP0,href));
+            ungetcpuregister(list,NR_IP0);
+            list.concat(taicpu.op_reg(A_BR,NR_IP0));
+          end
+        else
+          a_jmp_name(list,procdef.mangledname);
+        list.concat(Tai_symbol_end.Createname(labelname));
+      end;
+
+
+    procedure create_codegen;
+      begin
+        cg:=tcgaarch64.Create;
+        cg128:=tcg128.Create;
+      end;
+
+end.

+ 200 - 38
compiler/aarch64/cpubase.pas

@@ -1,7 +1,8 @@
 {
 {
     Copyright (c) 1998-2012 by Florian Klaempfl and Peter Vreman
     Copyright (c) 1998-2012 by Florian Klaempfl and Peter Vreman
+    Copyright (c) 2014 by Jonas Maebe and Florian Klaempfl
 
 
-    Contains the base types for ARM64
+    Contains the base types for Aarch64
 
 
     This program is free software; you can redistribute it and/or modify
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
     it under the terms of the GNU General Public License as published by
@@ -68,14 +69,22 @@ unit cpubase;
       { Available Superregisters }
       { Available Superregisters }
       {$i ra64sup.inc}
       {$i ra64sup.inc}
 
 
+      RS_IP0 = RS_X16;
+      RS_IP1 = RS_X17;
+
       R_SUBWHOLE = R_SUBQ;
       R_SUBWHOLE = R_SUBQ;
 
 
       { Available Registers }
       { Available Registers }
       {$i ra64con.inc}
       {$i ra64con.inc}
 
 
+      NR_IP0 = NR_X16;
+      NR_IP1 = NR_X17;
+
       { Integer Super registers first and last }
       { Integer Super registers first and last }
       first_int_supreg = RS_X0;
       first_int_supreg = RS_X0;
-      first_int_imreg = $20;
+      { xzr and sp take up a separate super register because some instructions
+        are ambiguous otherwise }
+      first_int_imreg = $21;
 
 
       { Integer Super registers first and last }
       { Integer Super registers first and last }
       first_fpu_supreg = RS_S0;
       first_fpu_supreg = RS_S0;
@@ -92,7 +101,7 @@ unit cpubase;
         The value of this constant is equal to the constant
         The value of this constant is equal to the constant
         PARM_BOUNDARY / BITS_PER_UNIT in the GCC source.
         PARM_BOUNDARY / BITS_PER_UNIT in the GCC source.
       }
       }
-      std_param_align = 4;
+      std_param_align = 8;
 
 
       { TODO: Calculate bsstart}
       { TODO: Calculate bsstart}
       regnumber_count_bsstart = 128;
       regnumber_count_bsstart = 128;
@@ -109,7 +118,7 @@ unit cpubase;
         {$i ra64dwa.inc}
         {$i ra64dwa.inc}
       );
       );
       { registers which may be destroyed by calls }
       { registers which may be destroyed by calls }
-      VOLATILE_INTREGISTERS = [RS_X0..RS_X18,RS_X29..RS_X30];
+      VOLATILE_INTREGISTERS = [RS_X0..RS_X18,RS_X30];
       VOLATILE_MMREGISTERS =  [RS_D0..RS_D7,RS_D16..RS_D31];
       VOLATILE_MMREGISTERS =  [RS_D0..RS_D7,RS_D16..RS_D31];
 
 
     type
     type
@@ -126,16 +135,23 @@ unit cpubase;
       TOpPostfix = (PF_None,
       TOpPostfix = (PF_None,
         { update condition flags }
         { update condition flags }
         PF_S,
         PF_S,
-        { load/store }
-        PF_B,PF_SB,PF_H,PF_SH
+        { load/store sizes }
+        PF_B,PF_SB,PF_H,PF_SH,PF_W,PF_SW
       );
       );
 
 
       TOpPostfixes = set of TOpPostfix;
       TOpPostfixes = set of TOpPostfix;
 
 
     const
     const
-      oppostfix2str : array[TOpPostfix] of string[2] = ('',
+      tcgsizep2size: array[OS_NO..OS_F128] of byte =
+        {OS_NO }
+        (0,
+        {OS_8,OS_16,OS_32,OS_64,OS_128,OS_S8,OS_S16,OS_S32,OS_S64,OS_S128}
+            0,    1,    2,    3,     4,    0,     1,     2,     3,      4,
+        {OS_F32,OS_F64,OS_F80,OS_C64,OS_F128,}
+             2,      3,     0,     3,      4);
+      oppostfix2str: array[TOpPostfix] of string[2] = ('',
         's',
         's',
-        'b','sb','h','sh');
+        'b','sb','h','sh','w','sw');
 
 
 {*****************************************************************************
 {*****************************************************************************
                                 Conditions
                                 Conditions
@@ -150,13 +166,15 @@ unit cpubase;
       TAsmConds = set of TAsmCond;
       TAsmConds = set of TAsmCond;
 
 
     const
     const
+      C_CS = C_HS;
+      C_CC = C_LO;
       cond2str : array[TAsmCond] of string[2]=('',
       cond2str : array[TAsmCond] of string[2]=('',
         'eq','ne','hs','lo','mi','pl','vs','vc','hi','ls',
         'eq','ne','hs','lo','mi','pl','vs','vc','hi','ls',
         'ge','lt','gt','le','al','nv'
         'ge','lt','gt','le','al','nv'
       );
       );
 
 
       uppercond2str : array[TAsmCond] of string[2]=('',
       uppercond2str : array[TAsmCond] of string[2]=('',
-        'EQ','NE','hs','LO','MI','PL','VS','VC','HI','LS',
+        'EQ','NE','HS','LO','MI','PL','VS','VC','HI','LS',
         'GE','LT','GT','LE','AL','NV'
         'GE','LT','GT','LE','AL','NV'
       );
       );
 
 
@@ -168,12 +186,28 @@ unit cpubase;
       TResFlags = (F_EQ,F_NE,F_CS,F_CC,F_MI,F_PL,F_VS,F_VC,F_HI,F_LS,
       TResFlags = (F_EQ,F_NE,F_CS,F_CC,F_MI,F_PL,F_VS,F_VC,F_HI,F_LS,
         F_GE,F_LT,F_GT,F_LE);
         F_GE,F_LT,F_GT,F_LE);
 
 
+    const
+      F_HS = F_CS;
+      F_LO = F_CC;
+
 {*****************************************************************************
 {*****************************************************************************
                                 Operands
                                 Operands
 *****************************************************************************}
 *****************************************************************************}
 
 
+    type
       taddressmode = (AM_OFFSET,AM_PREINDEXED,AM_POSTINDEXED);
       taddressmode = (AM_OFFSET,AM_PREINDEXED,AM_POSTINDEXED);
-      tshiftmode = (SM_None,SM_LSL,SM_LSR,SM_ASR,SM_ROR);
+
+      tshiftmode = (SM_None,
+                    { shifted register instructions. LSL can also be used for
+                      the index register of certain loads/stores }
+                    SM_LSL,SM_LSR,SM_ASR,
+                    { extended register instructions: zero/sign extension +
+                        optional shift (interpreted as LSL after extension)
+                       -- the index register of certain loads/stores can be
+                          extended via (s|u)xtw with a shiftval of either 0 or
+                          log2(transfer size of the load/store)
+                    }
+                    SM_UXTB,SM_UXTH,SM_UXTW,SM_UXTX,SM_SXTB,SM_SXTH,SM_SXTW,SM_SXTX);
 
 
       tupdatereg = (UR_None,UR_Update);
       tupdatereg = (UR_None,UR_Update);
 
 
@@ -184,12 +218,6 @@ unit cpubase;
         shiftimm : byte;
         shiftimm : byte;
       end;
       end;
 
 
-      tcpumodeflag = (mfA, mfI, mfF);
-      tcpumodeflags = set of tcpumodeflag;
-
-      tspecialregflag = (srC, srX, srS, srF);
-      tspecialregflags = set of tspecialregflag;
-
 {*****************************************************************************
 {*****************************************************************************
                                  Constants
                                  Constants
 *****************************************************************************}
 *****************************************************************************}
@@ -201,6 +229,10 @@ unit cpubase;
       maxfpuregs = 32;
       maxfpuregs = 32;
       maxaddrregs = 0;
       maxaddrregs = 0;
 
 
+      shiftedregmodes = [SM_LSL,SM_UXTB,SM_UXTH,SM_UXTW,SM_UXTX,SM_SXTB,SM_SXTH,SM_SXTW,SM_SXTX];
+      extendedregmodes = [SM_LSL,SM_LSR,SM_ASR];
+
+
 {*****************************************************************************
 {*****************************************************************************
                                 Operand Sizes
                                 Operand Sizes
 *****************************************************************************}
 *****************************************************************************}
@@ -232,17 +264,23 @@ unit cpubase;
                           Generic Register names
                           Generic Register names
 *****************************************************************************}
 *****************************************************************************}
 
 
-      NR_SP = NR_XZR;
-      RS_SP = RS_XZR;
-      NR_WSP = NR_WZR;
-      RS_WSP = RS_WZR;
+
+      NR_FP = NR_X29;
+      RS_FP = RS_X29;
+      NR_WFP = NR_W29;
+      RS_WFP = RS_W29;
+
+      NR_LR = NR_X30;
+      RS_LR = RS_X30;
+      NR_WLR = NR_W30;
+      RS_WLR = RS_W30;
 
 
       { Stack pointer register }
       { Stack pointer register }
       NR_STACK_POINTER_REG = NR_SP;
       NR_STACK_POINTER_REG = NR_SP;
       RS_STACK_POINTER_REG = RS_SP;
       RS_STACK_POINTER_REG = RS_SP;
-      { Frame pointer register (initialized in tarmprocinfo.init_framepointer) }
-      RS_FRAME_POINTER_REG: tsuperregister = RS_X29;
-      NR_FRAME_POINTER_REG: tregister = NR_X29;
+      { Frame pointer register }
+      NR_FRAME_POINTER_REG = NR_X29;
+      RS_FRAME_POINTER_REG = RS_X29;
       { Register for addressing absolute data in a position independant way,
       { Register for addressing absolute data in a position independant way,
         such as in PIC code. The exact meaning is ABI specific. For
         such as in PIC code. The exact meaning is ABI specific. For
         further information look at GCC source : PIC_OFFSET_TABLE_REGNUM
         further information look at GCC source : PIC_OFFSET_TABLE_REGNUM
@@ -307,6 +345,9 @@ unit cpubase;
 
 
     function dwarf_reg(r:tregister):shortint;
     function dwarf_reg(r:tregister):shortint;
 
 
+    function is_shifter_const(d: aint; size: tcgsize): boolean;
+
+
   implementation
   implementation
 
 
     uses
     uses
@@ -329,13 +370,24 @@ unit cpubase;
     function cgsize2subreg(regtype: tregistertype; s:Tcgsize):Tsubregister;
     function cgsize2subreg(regtype: tregistertype; s:Tcgsize):Tsubregister;
       begin
       begin
         case regtype of
         case regtype of
+          R_INTREGISTER:
+            begin
+              case s of
+                { there's only Wn and Xn }
+                OS_64,
+                OS_S64:
+                  cgsize2subreg:=R_SUBWHOLE;
+                else
+                  cgsize2subreg:=R_SUBD;
+                end;
+            end;
           R_MMREGISTER:
           R_MMREGISTER:
             begin
             begin
               case s of
               case s of
                 OS_F32:
                 OS_F32:
-                  cgsize2subreg:=R_SUBFS;
+                  cgsize2subreg:=R_SUBMMS;
                 OS_F64:
                 OS_F64:
-                  cgsize2subreg:=R_SUBFD;
+                  cgsize2subreg:=R_SUBMMD;
                 else
                 else
                   internalerror(2009112701);
                   internalerror(2009112701);
               end;
               end;
@@ -349,18 +401,22 @@ unit cpubase;
     function reg_cgsize(const reg: tregister): tcgsize;
     function reg_cgsize(const reg: tregister): tcgsize;
       begin
       begin
         case getregtype(reg) of
         case getregtype(reg) of
-          R_INTREGISTER :
-            reg_cgsize:=OS_32;
-          R_FPUREGISTER :
-            reg_cgsize:=OS_F80;
+          R_INTREGISTER:
+            case getsubreg(reg) of
+              R_SUBD:
+                result:=OS_32
+              else
+                result:=OS_64;
+            end;
           R_MMREGISTER :
           R_MMREGISTER :
             begin
             begin
               case getsubreg(reg) of
               case getsubreg(reg) of
-                R_SUBFD,
-                R_SUBWHOLE:
+                R_SUBMMD:
                   result:=OS_F64;
                   result:=OS_F64;
-                R_SUBFS:
+                R_SUBMMS:
                   result:=OS_F32;
                   result:=OS_F32;
+                R_SUBMMWHOLE:
+                  result:=OS_M128;
                 else
                 else
                   internalerror(2009112903);
                   internalerror(2009112903);
               end;
               end;
@@ -373,9 +429,7 @@ unit cpubase;
 
 
     function is_calljmp(o:tasmop):boolean;{$ifdef USEINLINE}inline;{$endif USEINLINE}
     function is_calljmp(o:tasmop):boolean;{$ifdef USEINLINE}inline;{$endif USEINLINE}
       begin
       begin
-        { This isn't 100% perfect because the arm allows jumps also by writing to PC=R15.
-          To overcome this problem we simply forbid that FPC generates jumps by loading R15 }
-        is_calljmp:= o in [A_B,A_BLR,A_RET];
+        is_calljmp:=o in [A_B,A_BL,A_BLR,A_RET,A_CBNZ,A_CBZ];
       end;
       end;
 
 
 
 
@@ -391,8 +445,8 @@ unit cpubase;
 
 
     function flags_to_cond(const f: TResFlags) : TAsmCond;
     function flags_to_cond(const f: TResFlags) : TAsmCond;
       const
       const
-        flag_2_cond: array[F_EQ..F_LE] of TAsmCond =
-          (C_EQ,C_NE,C_HI,C_LO,C_MI,C_PL,C_VS,C_VC,C_HI,C_LS,
+        flag_2_cond: array[TResFlags] of TAsmCond =
+          (C_EQ,C_NE,C_HS,C_LO,C_MI,C_PL,C_VS,C_VC,C_HI,C_LS,
            C_GE,C_LT,C_GT,C_LE);
            C_GE,C_LT,C_GT,C_LE);
       begin
       begin
         if f>high(flag_2_cond) then
         if f>high(flag_2_cond) then
@@ -434,7 +488,7 @@ unit cpubase;
     function inverse_cond(const c: TAsmCond): TAsmCond; {$ifdef USEINLINE}inline;{$endif USEINLINE}
     function inverse_cond(const c: TAsmCond): TAsmCond; {$ifdef USEINLINE}inline;{$endif USEINLINE}
       const
       const
         inverse: array[TAsmCond] of TAsmCond=(C_None,
         inverse: array[TAsmCond] of TAsmCond=(C_None,
-          C_NE,C_EQ,C_LO,C_HI,C_PL,C_MI,C_VC,C_VS,C_LS,C_HI,
+          C_NE,C_EQ,C_LO,C_HS,C_PL,C_MI,C_VC,C_VS,C_LS,C_HI,
           C_LT,C_GE,C_LE,C_GT,C_None,C_None
           C_LT,C_GE,C_LE,C_GT,C_None,C_None
         );
         );
       begin
       begin
@@ -456,4 +510,112 @@ unit cpubase;
       end;
       end;
 
 
 
 
+    function is_shifter_const(d: aint; size: tcgsize): boolean;
+      var
+         pattern, checkpattern: qword;
+         patternlen, maxbits, replicatedlen: longint;
+         rightmostone, rightmostzero, checkbit, secondrightmostbit: longint;
+      begin
+        result:=false;
+        { patterns with all bits 0 or 1 cannot be represented this way }
+        if (d=0) then
+          exit;
+        case size of
+          OS_64,
+          OS_S64:
+            begin
+              if d=-1 then
+                exit;
+              maxbits:=64;
+            end
+          else
+            begin
+              if longint(d)=-1 then
+                exit;
+              { we'll generate a 32 bit pattern -> ignore upper sign bits in
+                case of negative longint value }
+              d:=cardinal(d);
+              maxbits:=32;
+            end;
+        end;
+        { "The Logical (immediate) instructions accept a bitmask immediate value
+          that is a 32-bit pattern or a 64-bit pattern viewed as a vector of
+          identical elements of size e = 2, 4, 8, 16, 32 or, 64 bits. Each
+          element contains the same sub-pattern, that is a single run of
+          1 to (e - 1) nonzero bits from bit 0 followed by zero bits, then
+          rotated by 0 to (e - 1) bits." (ARMv8 ARM)
+
+          Rather than generating all possible patterns and checking whether they
+          match our constant, we check whether the lowest 2/4/8/... bits are
+          a valid pattern, and if so whether the constant consists of a
+          replication of this pattern. Such a valid pattern has the form of
+          either (regexp notation)
+            * 1+0+1*
+            * 0+1+0* }
+        patternlen:=2;
+        while patternlen<=maxbits do
+          begin
+            { try lowest <patternlen> bits of d as pattern }
+            if patternlen<>64 then
+              pattern:=qword(d) and ((qword(1) shl patternlen)-1)
+            else
+              pattern:=qword(d);
+            { valid pattern? If it contains too many 1<->0 transitions, larger
+              parts of d cannot be a valid pattern either }
+            rightmostone:=BsfQWord(pattern);
+            rightmostzero:=BsfQWord(not(pattern));
+            { pattern all ones or zeroes -> not a valid pattern (but larger ones
+              can still be valid, since we have too few transitions) }
+            if (rightmostone<patternlen) and
+               (rightmostzero<patternlen) then
+              begin
+                if rightmostone>rightmostzero then
+                  begin
+                    { we have .*1*0* -> check next zero position by shifting
+                      out the existing zeroes (shr rightmostone), inverting and
+                      then again looking for the rightmost one position }
+                    checkpattern:=not(pattern);
+                    checkbit:=rightmostone;
+                  end
+                else
+                  begin
+                    { same as above, but for .*0*1* }
+                    checkpattern:=pattern;
+                    checkbit:=rightmostzero;
+                  end;
+                secondrightmostbit:=BsfQWord(checkpattern shr checkbit)+checkbit;
+                { if this position is >= patternlen -> ok (1 transition),
+                  otherwise we now have 2 transitions and have to check for a
+                  third (if there is one, abort)
+
+                  bsf returns 255 if no 1 bit is found, so in that case it's
+                  also ok
+                  }
+                if secondrightmostbit<patternlen then
+                  begin
+                    secondrightmostbit:=BsfQWord(not(checkpattern) shr secondrightmostbit)+secondrightmostbit;
+                    if secondrightmostbit<patternlen then
+                      exit;
+                  end;
+                { ok, this is a valid pattern, now does d consist of a
+                  repetition of this pattern? }
+                replicatedlen:=patternlen;
+                checkpattern:=pattern;
+                while replicatedlen<maxbits do
+                  begin
+                    { douplicate current pattern }
+                    checkpattern:=checkpattern or (checkpattern shl replicatedlen);
+                    replicatedlen:=replicatedlen*2;
+                  end;
+                if qword(d)=checkpattern then
+                  begin
+                    { yes! }
+                    result:=true;
+                    exit;
+                  end;
+              end;
+            patternlen:=patternlen*2;
+          end;
+      end;
+
 end.
 end.

+ 2 - 2
compiler/aarch64/cpuinfo.pas

@@ -103,12 +103,12 @@ Const
                                  { no need to write info about those }
                                  { no need to write info about those }
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
                                  [cs_opt_regvar,cs_opt_loopunroll,cs_opt_tailrecursion,
                                  [cs_opt_regvar,cs_opt_loopunroll,cs_opt_tailrecursion,
-				  cs_opt_stackframe,cs_opt_nodecse,cs_opt_reorder_fields,cs_opt_fastmath];
+				  cs_opt_nodecse,cs_opt_reorder_fields,cs_opt_fastmath];
 
 
    level1optimizerswitches = genericlevel1optimizerswitches;
    level1optimizerswitches = genericlevel1optimizerswitches;
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches +
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches +
      [cs_opt_regvar,cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse];
      [cs_opt_regvar,cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse];
-   level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [cs_opt_scheduler{,cs_opt_loopunroll}];
+   level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [{,cs_opt_loopunroll}];
    level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [];
    level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [];
 
 
 Implementation
 Implementation

+ 40 - 0
compiler/aarch64/cpunode.pas

@@ -0,0 +1,40 @@
+{******************************************************************************
+    Copyright (c) 2014 by Florian Klaempfl
+
+    Includes the aarch64 code generator
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ *****************************************************************************}
+unit cpunode;
+
+{$I fpcdefs.inc}
+
+interface
+{ This unit is used to define the specific CPU implementations. All needed
+actions are included in the INITALIZATION part of these units. This explains
+the behaviour of such a unit having just a USES clause! }
+
+implementation
+
+  uses
+    ncgbas,ncgflw,ncgcal,ncgcnv,ncgld,ncgmem,ncgcon,ncgset,ncgobjc,
+    ncpuadd,ncpumat,ncpumem,ncpuinl,ncpucnv,ncpuset,
+    { this not really a node }
+    rgcpu,
+    { symtable }
+    symcpu;
+
+end.

+ 456 - 520
compiler/aarch64/cpupara.pas

@@ -1,5 +1,5 @@
 {
 {
-    Copyright (c) 2003-2012 by Florian Klaempfl and others
+    Copyright (c) 2013-2014 by Jonas Maebe, Florian Klaempfl and others
 
 
     AArch64 specific calling conventions
     AArch64 specific calling conventions
 
 
@@ -34,19 +34,24 @@ unit cpupara;
 
 
     type
     type
        taarch64paramanager = class(tparamanager)
        taarch64paramanager = class(tparamanager)
-          function get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;override;
-          function get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;override;
-          function get_volatile_registers_mm(calloption : tproccalloption):tcpuregisterset;override;
-          function push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;override;
-          function ret_in_param(def:tdef;pd:tabstractprocdef):boolean;override;
-          procedure getintparaloc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);override;
-          function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
-          function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
-          function get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
+          function get_volatile_registers_int(calloption: tproccalloption): tcpuregisterset; override;
+          function get_volatile_registers_fpu(calloption: tproccalloption): tcpuregisterset; override;
+          function get_volatile_registers_mm(calloption: tproccalloption): tcpuregisterset; override;
+          function push_addr_param(varspez: tvarspez; def: tdef; calloption: tproccalloption): boolean; override;
+          function ret_in_param(def: tdef; pd: tabstractprocdef):boolean;override;
+          function create_paraloc_info(p: tabstractprocdef; side: tcallercallee):longint;override;
+          function create_varargs_paraloc_info(p: tabstractprocdef; varargspara: tvarargsparalist):longint;override;
+          function get_funcretloc(p: tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
+          function param_use_paraloc(const cgpara: tcgpara): boolean; override;
          private
          private
-          procedure init_values(var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword; var sparesinglereg: tregister);
-          function create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
-            var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword; var sparesinglereg: tregister; isvariadic: boolean):longint;
+          curintreg,
+          curmmreg: tsuperregister;
+          curstackoffset: aword;
+
+          procedure init_para_alloc_values;
+          procedure alloc_para(out result: tcgpara; p: tabstractprocdef; varspez: tvarspez; side: tcallercallee; paradef: tdef; isvariadic, isdelphinestedcc: boolean);
+
+          procedure create_paraloc_info_intern(p: tabstractprocdef; side: tcallercallee; paras: tparalist; isvariadic: boolean);
        end;
        end;
 
 
   implementation
   implementation
@@ -56,6 +61,13 @@ unit cpupara;
        rgobj,
        rgobj,
        defutil,symsym,symtable;
        defutil,symsym,symtable;
 
 
+    const
+      RS_FIRST_INT_PARAM_SUPREG = RS_X0;
+      RS_LAST_INT_PARAM_SUPREG = RS_X7;
+      { Q0/D0/S0/H0/B0 all have the same superregister number }
+      RS_FIRST_MM_PARAM_SUPREG = RS_D0;
+      RS_LAST_MM_PARAM_SUPREG = RS_D7;
+
 
 
     function taarch64paramanager.get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;
     function taarch64paramanager.get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;
       begin
       begin
@@ -75,47 +87,90 @@ unit cpupara;
       end;
       end;
 
 
 
 
-    procedure taarch64paramanager.getintparaloc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);
+    function is_hfa_internal(p: tdef; var basedef: tdef; var elecount: longint): boolean;
       var
       var
-        paraloc : pcgparalocation;
-        def : tdef;
+        i: longint;
+        sym: tsym;
+        tmpelecount: longint;
       begin
       begin
-        if nr<1 then
-          internalerror(2002070801);
-        def:=tparavarsym(pd.paras[nr-1]).vardef;
-        cgpara.reset;
-        cgpara.size:=def_cgsize(def);
-        cgpara.intsize:=tcgsize2size[cgpara.size];
-        cgpara.alignment:=std_param_align;
-        cgpara.def:=def;
-        paraloc:=cgpara.add_location;
-        with paraloc^ do
-          begin
-            size:=OS_INT;
-            { the four first parameters are passed into registers }
-            if nr<=8 then
-              begin
-                loc:=LOC_REGISTER;
-                register:=newreg(R_INTREGISTER,RS_X0+nr-1,R_SUBWHOLE);
-              end
-            else
-              begin
-                { the other parameters are passed on the stack }
-                loc:=LOC_REFERENCE;
-                reference.index:=NR_STACK_POINTER_REG;
-                reference.offset:=(nr-9)*8;
-              end;
-          end;
+        result:=false;
+        case p.typ of
+          arraydef:
+            begin
+              if is_special_array(p) then
+                exit;
+              case tarraydef(p).elementdef.typ of
+                floatdef:
+                  begin
+                    { an array of empty records has no influence }
+                    if tarraydef(p).elementdef.size=0 then
+                      begin
+                        result:=true;
+                        exit
+                      end;
+                    tmpelecount:=0;
+                    if not is_hfa_internal(tarraydef(p).elementdef,basedef,tmpelecount) then
+                      exit;
+                    { tmpelecount now contains the number of hfa elements in a
+                      single array element (e.g. 2 if it's an array of a record
+                      containing two singles) -> multiply by number of elements
+                      in the array }
+                    inc(elecount,tarraydef(p).elecount*tmpelecount);
+                    if elecount>4 then
+                      exit;
+                  end;
+                else
+                  result:=is_hfa_internal(tarraydef(p).elementdef,basedef,elecount);
+                end;
+            end;
+          floatdef:
+            begin
+              if not assigned(basedef) then
+                basedef:=p
+              else if basedef<>p then
+                exit;
+              inc(elecount);
+              result:=true;
+            end;
+          recorddef:
+            begin
+              for i:=0 to tabstractrecorddef(p).symtable.symlist.count-1 do
+                begin
+                  sym:=tsym(tabstractrecorddef(p).symtable.symlist[i]);
+                  if sym.typ<>fieldvarsym then
+                    continue;
+                  if not is_hfa_internal(tfieldvarsym(sym).vardef,basedef,elecount) then
+                    exit
+                end;
+              result:=true;
+            end;
+          else
+            exit
+        end;
       end;
       end;
 
 
 
 
-    function Is_HFA(p : tdef) : boolean;
+    { Returns whether a def is a "homogeneous float array" at the machine level.
+      This means that in the memory layout, the def only consists of maximally
+      4 floating point values that appear consecutively in memory }
+    function is_hfa(p: tdef; out basedef: tdef) : boolean;
+      var
+        elecount: longint;
       begin
       begin
         result:=false;
         result:=false;
+        basedef:=nil;
+        elecount:=0;
+        result:=is_hfa_internal(p,basedef,elecount);
+        result:=
+          result and
+          (elecount>0) and
+          (p.size=basedef.size*elecount)
       end;
       end;
 
 
 
 
-    function getparaloc(calloption : tproccalloption; p : tdef; isvariadic: boolean) : tcgloc;
+    function getparaloc(calloption: tproccalloption; p: tdef): tcgloc;
+      var
+        hfabasedef: tdef;
       begin
       begin
          { Later, the LOC_REFERENCE is in most cases changed into LOC_REGISTER
          { Later, the LOC_REFERENCE is in most cases changed into LOC_REGISTER
            if push_addr_param for the def is true
            if push_addr_param for the def is true
@@ -134,7 +189,10 @@ unit cpupara;
             classrefdef:
             classrefdef:
               getparaloc:=LOC_REGISTER;
               getparaloc:=LOC_REGISTER;
             recorddef:
             recorddef:
-              getparaloc:=LOC_REGISTER;
+              if not is_hfa(p,hfabasedef) then
+                getparaloc:=LOC_REGISTER
+              else
+                getparaloc:=LOC_MMREGISTER;
             objectdef:
             objectdef:
               getparaloc:=LOC_REGISTER;
               getparaloc:=LOC_REGISTER;
             stringdef:
             stringdef:
@@ -147,12 +205,12 @@ unit cpupara;
             filedef:
             filedef:
               getparaloc:=LOC_REGISTER;
               getparaloc:=LOC_REGISTER;
             arraydef:
             arraydef:
-              getparaloc:=LOC_REFERENCE;
-            setdef:
-              if is_smallset(p) then
+              if not is_hfa(p,hfabasedef) then
                 getparaloc:=LOC_REGISTER
                 getparaloc:=LOC_REGISTER
               else
               else
-                getparaloc:=LOC_REFERENCE;
+                getparaloc:=LOC_MMREGISTER;
+            setdef:
+              getparaloc:=LOC_REGISTER;
             variantdef:
             variantdef:
               getparaloc:=LOC_REGISTER;
               getparaloc:=LOC_REGISTER;
             { avoid problems with errornous definitions }
             { avoid problems with errornous definitions }
@@ -164,7 +222,9 @@ unit cpupara;
       end;
       end;
 
 
 
 
-    function taarch64paramanager.push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;
+    function taarch64paramanager.push_addr_param(varspez: tvarspez; def :tdef; calloption: tproccalloption): boolean;
+      var
+        hfabasedef: tdef;
       begin
       begin
         result:=false;
         result:=false;
         if varspez in [vs_var,vs_out,vs_constref] then
         if varspez in [vs_var,vs_out,vs_constref] then
@@ -174,19 +234,36 @@ unit cpupara;
           end;
           end;
         case def.typ of
         case def.typ of
           objectdef:
           objectdef:
-            result:=not(Is_HFA(def) and (is_object(def) and ((varspez=vs_const) or (def.size=0));
+            result:=is_object(def);
           recorddef:
           recorddef:
-            { note: should this ever be changed, make sure that const records
-                are always passed by reference for calloption=pocall_mwpascal }
-            result:=(varspez=vs_const) or (def.size=0);
+            { ABI: any composite > 16 bytes that not a hfa/hva
+              Special case: MWPascal, which passes all const parameters by
+                reference for compatibility reasons
+            }
+            result:=
+              ((varspez=vs_const) and
+               (calloption=pocall_mwpascal)) or
+              (not is_hfa(def,hfabasedef) and
+               (def.size>16));
           variantdef,
           variantdef,
           formaldef:
           formaldef:
             result:=true;
             result:=true;
+          { arrays are composites and hence treated the same as records by the
+            ABI (watch out for C, where an array is a pointer)
+            Also: all other platforms pass const arrays by reference. Do the
+              same here, because there is too much hacky code out there that
+              relies on this ("array[0..0] of x" passed as const parameter and
+              then indexed beyond its bounds) }
           arraydef:
           arraydef:
-            result:=(tarraydef(def).highrange>=tarraydef(def).lowrange) or
-                             is_open_array(def) or
-                             is_array_of_const(def) or
-                             is_array_constructor(def);
+            result:=
+              (calloption in cdecl_pocalls) or
+              is_open_array(def) or
+              is_array_of_const(def) or
+              is_array_constructor(def) or
+              ((tarraydef(def).highrange>=tarraydef(def).lowrange) and
+               ((varspez=vs_const) or
+                (not is_hfa(def,hfabasedef) and
+                 (def.size>16))));
           setdef :
           setdef :
             result:=def.size>16;
             result:=def.size>16;
           stringdef :
           stringdef :
@@ -195,511 +272,370 @@ unit cpupara;
       end;
       end;
 
 
 
 
-    function taarch64paramanager.ret_in_param(def:tdef;pd:tabstractprocdef):boolean;
-      var
-        i: longint;
-        sym: tsym;
-        fpufield: boolean;
+    function taarch64paramanager.ret_in_param(def: tdef; pd: tabstractprocdef): boolean;
       begin
       begin
         if handle_common_ret_in_param(def,pd,result) then
         if handle_common_ret_in_param(def,pd,result) then
           exit;
           exit;
-        case def.typ of
-          recorddef:
-            begin
-              result:=def.size>4;
-              if not result and
-                 (target_info.abi in [abi_default,abi_armeb]) then
-                begin
-                  { in case of the old ARM abi (APCS), a struct is returned in
-                    a register only if it is simple. And what is a (non-)simple
-                    struct:
-
-                    "A non-simple type is any non-floating-point type of size
-                     greater than one word (including structures containing only
-                     floating-point fields), and certain single-word structured
-                     types."
-                       (-- ARM APCS documentation)
-
-                    So only floating point types or more than one word ->
-                    definitely non-simple (more than one word is already
-                    checked above). This includes unions/variant records with
-                    overlaid floating point and integer fields.
-
-                    Smaller than one word struct types are simple if they are
-                    "integer-like", and:
-
-                    "A structure is termed integer-like if its size is less than
-                    or equal to one word, and the offset of each of its
-                    addressable subfields is zero."
-                      (-- ARM APCS documentation)
-
-                    An "addressable subfield" is a field of which you can take
-                    the address, which in practive means any non-bitfield.
-                    In Pascal, there is no way to express the difference that
-                    you can have in C between "char" and "int :8". In this
-                    context, we use the fake distinction that a type defined
-                    inside the record itself (such as "a: 0..255;") indicates
-                    a bitpacked field while a field using a different type
-                    (such as "a: byte;") is not.
-                  }
-                  for i:=0 to trecorddef(def).symtable.SymList.count-1 do
-                    begin
-                      sym:=tsym(trecorddef(def).symtable.SymList[i]);
-                      if sym.typ<>fieldvarsym then
-                        continue;
-                      { bitfield -> ignore }
-                      if (trecordsymtable(trecorddef(def).symtable).usefieldalignment=bit_alignment) and
-                         (tfieldvarsym(sym).vardef.typ in [orddef,enumdef]) and
-                         (tfieldvarsym(sym).vardef.owner.defowner=def) then
-                        continue;
-                      { all other fields must be at offset zero }
-                      if tfieldvarsym(sym).fieldoffset<>0 then
-                        begin
-                          result:=true;
-                          exit;
-                        end;
-                      { floating point field -> also by reference }
-                      if tfieldvarsym(sym).vardef.typ=floatdef then
-                        begin
-                          result:=true;
-                          exit;
-                        end;
-                    end;
-                end;
-            end;
-          procvardef:
-            if not tprocvardef(def).is_addressonly then
-              result:=true
-            else
-              result:=false
-          else
-            result:=inherited ret_in_param(def,pd);
-        end;
-      end;
-
-
-    procedure taarch64paramanager.init_values(var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword; var sparesinglereg: tregister);
-      begin
-        curintreg:=RS_R0;
-        curfloatreg:=RS_F0;
-        curmmreg:=RS_D0;
-        cur_stack_offset:=0;
-        sparesinglereg := NR_NO;
+        { ABI: if the parameter would be passed in registers, it is returned
+            in those registers; otherwise, it's returned by reference }
+        result:=push_addr_param(vs_value,def,pd.proccalloption);
       end;
       end;
 
 
 
 
-    function taarch64paramanager.create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
-        var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword; var sparesinglereg: tregister; isvariadic: boolean):longint;
-
+    procedure taarch64paramanager.create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist; isvariadic: boolean);
       var
       var
-        nextintreg,nextfloatreg,nextmmreg : tsuperregister;
-        paradef : tdef;
-        paraloc : pcgparalocation;
-        stack_offset : aword;
-        hp : tparavarsym;
-        loc : tcgloc;
-        paracgsize   : tcgsize;
-        paralen : longint;
-        i : integer;
-        firstparaloc: boolean;
-
-      procedure assignintreg;
-        begin
-          { In case of po_delphi_nested_cc, the parent frame pointer
-            is always passed on the stack. }
-           if (nextintreg<=RS_R3) and
-              (not(vo_is_parentfp in hp.varoptions) or
-               not(po_delphi_nested_cc in p.procoptions)) then
-             begin
-               paraloc^.loc:=LOC_REGISTER;
-               paraloc^.register:=newreg(R_INTREGISTER,nextintreg,R_SUBWHOLE);
-               inc(nextintreg);
-             end
-           else
-             begin
-               paraloc^.loc:=LOC_REFERENCE;
-               paraloc^.reference.index:=NR_STACK_POINTER_REG;
-               paraloc^.reference.offset:=stack_offset;
-               inc(stack_offset,4);
-            end;
-        end;
-
-
+        hp: tparavarsym;
+        i: longint;
       begin
       begin
-        result:=0;
-        nextintreg:=curintreg;
-        nextfloatreg:=curfloatreg;
-        nextmmreg:=curmmreg;
-        stack_offset:=cur_stack_offset;
-
         for i:=0 to paras.count-1 do
         for i:=0 to paras.count-1 do
           begin
           begin
             hp:=tparavarsym(paras[i]);
             hp:=tparavarsym(paras[i]);
-            paradef:=hp.vardef;
-
-            hp.paraloc[side].reset;
-
-            { currently only support C-style array of const,
-              there should be no location assigned to the vararg array itself }
-            if (p.proccalloption in cstylearrayofconst) and
-               is_array_of_const(paradef) then
-              begin
-                paraloc:=hp.paraloc[side].add_location;
-                { hack: the paraloc must be valid, but is not actually used }
-                paraloc^.loc:=LOC_REGISTER;
-                paraloc^.register:=NR_R0;
-                paraloc^.size:=OS_ADDR;
-                break;
-              end;
-
-            if push_addr_param(hp.varspez,paradef,p.proccalloption) then
+            { hidden function result parameter is passed in X8 (doesn't have to
+              be valid on return) according to the ABI
+
+              -- don't follow the ABI for managed types, because
+               a) they are passed in registers as parameters, so we should also
+                  return them in a register to be ABI-compliant (which we can't
+                  because the entire compiler is built around the idea that
+                  they are returned by reference, for ref-counting performance
+                  and Delphi-compatibility reasons)
+               b) there are hacks in the system unit that expect that you can
+                  call
+                    function f: com_interface;
+                  as
+                    procedure p(out o: obj);
+                  That can only work in case we do not use x8 to return them
+                  from the function, but the regular first parameter register.
+
+              As the ABI says this behaviour is ok for C++ classes with a
+              non-trivial copy constructor or destructor, it seems reasonable
+              for us to do this for managed types as well.}
+            if (vo_is_funcret in hp.varoptions) and
+               not is_managed_type(hp.vardef) then
               begin
               begin
-                paradef:=getpointerdef(paradef);
-                loc:=LOC_REGISTER;
-                paracgsize := OS_ADDR;
-                paralen := tcgsize2size[OS_ADDR];
-              end
-            else
-              begin
-                if not is_special_array(paradef) then
-                  paralen := paradef.size
-                else
-                  paralen := tcgsize2size[def_cgsize(paradef)];
-                loc := getparaloc(p.proccalloption,paradef,isvariadic);
-                if (paradef.typ in [objectdef,arraydef,recorddef]) and
-                  not is_special_array(paradef) and
-                  (hp.varspez in [vs_value,vs_const]) then
-                  paracgsize := int_cgsize(paralen)
-                else
+                hp.paraloc[side].reset;
+                hp.paraloc[side].size:=OS_ADDR;
+                hp.paraloc[side].alignment:=voidpointertype.alignment;
+                hp.paraloc[side].intsize:=voidpointertype.size;
+                hp.paraloc[side].def:=getpointerdef(hp.vardef);
+                with hp.paraloc[side].add_location^ do
                   begin
                   begin
-                    paracgsize:=def_cgsize(paradef);
-                    { for things like formaldef }
-                    if (paracgsize=OS_NO) then
-                      begin
-                        paracgsize:=OS_ADDR;
-                        paralen:=tcgsize2size[OS_ADDR];
-                        paradef:=voidpointertype;
-                      end;
+                    size:=OS_ADDR;
+                    def:=hp.paraloc[side].def;
+                    loc:=LOC_REGISTER;
+                    register:=NR_X8;
                   end
                   end
-              end;
-
-             hp.paraloc[side].size:=paracgsize;
-             hp.paraloc[side].Alignment:=std_param_align;
-             hp.paraloc[side].intsize:=paralen;
-             hp.paraloc[side].def:=paradef;
-             firstparaloc:=true;
-
-{$ifdef EXTDEBUG}
-             if paralen=0 then
-               internalerror(200410311);
-{$endif EXTDEBUG}
-             while paralen>0 do
-               begin
-                 paraloc:=hp.paraloc[side].add_location;
-
-                 if (loc=LOC_REGISTER) and (paracgsize in [OS_F32,OS_F64,OS_F80]) then
-                   case paracgsize of
-                     OS_F32:
-                       paraloc^.size:=OS_32;
-                     OS_F64:
-                       paraloc^.size:=OS_32;
-                     else
-                       internalerror(2005082901);
-                   end
-                 else if (paracgsize in [OS_NO,OS_64,OS_S64]) then
-                   paraloc^.size := OS_32
-                 else
-                   paraloc^.size:=paracgsize;
-                 case loc of
-                    LOC_REGISTER:
-                      begin
-                        { align registers for eabi }
-                        if (target_info.abi in [abi_eabi,abi_eabihf]) and
-                           firstparaloc and
-                           (paradef.alignment=8) then
-                          begin
-                            if (nextintreg in [RS_R1,RS_R3]) then
-                              inc(nextintreg)
-                            else if nextintreg>RS_R3 then
-                              stack_offset:=align(stack_offset,8);
-                          end;
-                        { this is not abi compliant
-                          why? (FK) }
-                        if nextintreg<=RS_R3 then
-                          begin
-                            paraloc^.loc:=LOC_REGISTER;
-                            paraloc^.register:=newreg(R_INTREGISTER,nextintreg,R_SUBWHOLE);
-                            inc(nextintreg);
-                          end
-                        else
-                          begin
-                            { LOC_REFERENCE always contains everything that's left }
-                            paraloc^.loc:=LOC_REFERENCE;
-                            paraloc^.size:=int_cgsize(paralen);
-                            if (side=callerside) then
-                              paraloc^.reference.index:=NR_STACK_POINTER_REG;
-                            paraloc^.reference.offset:=stack_offset;
-                            inc(stack_offset,align(paralen,4));
-                            paralen:=0;
-                         end;
-                      end;
-                    LOC_FPUREGISTER:
-                      begin
-                        if nextfloatreg<=RS_F3 then
-                          begin
-                            paraloc^.loc:=LOC_FPUREGISTER;
-                            paraloc^.register:=newreg(R_FPUREGISTER,nextfloatreg,R_SUBWHOLE);
-                            inc(nextfloatreg);
-                          end
-                        else
-                          begin
-                            paraloc^.loc:=LOC_REFERENCE;
-                            paraloc^.reference.index:=NR_STACK_POINTER_REG;
-                            paraloc^.reference.offset:=stack_offset;
-                            case paraloc^.size of
-                              OS_F32:
-                                inc(stack_offset,4);
-                              OS_F64:
-                                inc(stack_offset,8);
-                              OS_F80:
-                                inc(stack_offset,10);
-                              OS_F128:
-                                inc(stack_offset,16);
-                              else
-                                internalerror(200403201);
-                            end;
-                          end;
-                      end;
-                    LOC_MMREGISTER:
-                      begin
-                        if (nextmmreg<=RS_D7) or
-                           ((paraloc^.size = OS_F32) and
-                            (sparesinglereg<>NR_NO)) then
-                          begin
-                            paraloc^.loc:=LOC_MMREGISTER;
-                            case paraloc^.size of
-                              OS_F32:
-                                if sparesinglereg = NR_NO then 
-                                  begin     
-                                    paraloc^.register:=newreg(R_MMREGISTER,nextmmreg,R_SUBFS);
-                                    sparesinglereg:=newreg(R_MMREGISTER,nextmmreg-RS_S0+RS_S1,R_SUBFS);
-                                    inc(nextmmreg);
-                                  end
-                                else
-                                  begin
-                                    paraloc^.register:=sparesinglereg;
-                                    sparesinglereg := NR_NO;
-                                  end;
-                              OS_F64:
-                                begin
-                                  paraloc^.register:=newreg(R_MMREGISTER,nextmmreg,R_SUBFD);
-                                  inc(nextmmreg);
-                                end;
-                              else
-                                internalerror(2012031601);
-                            end;
-                          end
-                        else
-                          begin
-                            { once a floating point parameters has been placed
-                            on the stack we must not pass any more in vfp regs
-                            even if there is a single precision register still
-                            free}
-                            sparesinglereg := NR_NO;
-                            { LOC_REFERENCE always contains everything that's left }
-                            paraloc^.loc:=LOC_REFERENCE;
-                            paraloc^.size:=int_cgsize(paralen);
-                            if (side=callerside) then
-                              paraloc^.reference.index:=NR_STACK_POINTER_REG;
-                            paraloc^.reference.offset:=stack_offset;
-                            inc(stack_offset,align(paralen,4));
-                            paralen:=0;
-                         end;
-                      end;
-                    LOC_REFERENCE:
-                      begin
-                        if push_addr_param(hp.varspez,paradef,p.proccalloption) then
-                          begin
-                            paraloc^.size:=OS_ADDR;
-                            assignintreg
-                          end
-                        else
-                          begin
-                            { align stack for eabi }
-                            if (target_info.abi in [abi_eabi,abi_eabihf]) and
-                               firstparaloc and
-                               (paradef.alignment=8) then
-                              stack_offset:=align(stack_offset,8);
-
-                             paraloc^.size:=paracgsize;
-                             paraloc^.loc:=LOC_REFERENCE;
-                             paraloc^.reference.index:=NR_STACK_POINTER_REG;
-                             paraloc^.reference.offset:=stack_offset;
-                             inc(stack_offset,align(paralen,4));
-                             paralen:=0
-                          end;
-                      end;
-                    else
-                      internalerror(2002071002);
-                 end;
-                 if side=calleeside then
-                   begin
-                     if paraloc^.loc=LOC_REFERENCE then
-                       begin
-                         paraloc^.reference.index:=NR_FRAME_POINTER_REG;
-                         { on non-Darwin, the framepointer contains the value
-                           of the stack pointer on entry. On Darwin, the
-                           framepointer points to the previously saved
-                           framepointer (which is followed only by the saved
-                           return address -> framepointer + 4 = stack pointer
-                           on entry }
-                         if not(target_info.system in systems_darwin) then
-                           inc(paraloc^.reference.offset,4)
-                         else
-                           inc(paraloc^.reference.offset,8);
-                       end;
-                   end;
-                 dec(paralen,tcgsize2size[paraloc^.size]);
-                 firstparaloc:=false
-               end;
+              end
+            else
+              alloc_para(hp.paraloc[side],p,hp.varspez,side,hp.vardef,isvariadic,
+                (vo_is_parentfp in hp.varoptions) and
+                (po_delphi_nested_cc in p.procoptions));
           end;
           end;
-        curintreg:=nextintreg;
-        curfloatreg:=nextfloatreg;
-        curmmreg:=nextmmreg;
-        cur_stack_offset:=stack_offset;
-        result:=cur_stack_offset;
       end;
       end;
 
 
 
 
     function  taarch64paramanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;
     function  taarch64paramanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;
       var
       var
-        paraloc : pcgparalocation;
-        retcgsize  : tcgsize;
+        retcgsize: tcgsize;
       begin
       begin
          if set_common_funcretloc_info(p,forcetempdef,retcgsize,result) then
          if set_common_funcretloc_info(p,forcetempdef,retcgsize,result) then
            exit;
            exit;
 
 
-        paraloc:=result.add_location;
-        { Return in FPU register? }
-        if result.def.typ=floatdef then
+         { in this case, it must be returned in registers as if it were passed
+           as the first parameter }
+         init_para_alloc_values;
+         alloc_para(result,p,vs_value,side,result.def,false,false);
+         { sanity check (LOC_VOID for empty records) }
+         if not assigned(result.location) or
+            not(result.location^.loc in [LOC_REGISTER,LOC_MMREGISTER,LOC_VOID]) then
+           internalerror(2014113001);
+      end;
+
+    function taarch64paramanager.param_use_paraloc(const cgpara: tcgpara): boolean;
+      begin
+        { we always set up a stack frame -> we can always access the parameters
+          this way }
+        result:=
+          (cgpara.location^.loc=LOC_REFERENCE) and
+          not assigned(cgpara.location^.next);
+      end;
+
+
+    procedure taarch64paramanager.init_para_alloc_values;
+      begin
+        curintreg:=RS_FIRST_INT_PARAM_SUPREG;
+        curmmreg:=RS_FIRST_MM_PARAM_SUPREG;
+        curstackoffset:=0;
+      end;
+
+
+    procedure taarch64paramanager.alloc_para(out result: tcgpara; p: tabstractprocdef; varspez: tvarspez; side: tcallercallee; paradef: tdef; isvariadic, isdelphinestedcc: boolean);
+      var
+        hfabasedef, locdef: tdef;
+        paraloc: pcgparalocation;
+        paralen, stackslotlen: asizeint;
+        loc: tcgloc;
+        paracgsize, locsize: tcgsize;
+        firstparaloc: boolean;
+      begin
+        result.reset;
+
+        { currently only support C-style array of const,
+          there should be no location assigned to the vararg array itself }
+        if (p.proccalloption in cstylearrayofconst) and
+           is_array_of_const(paradef) then
           begin
           begin
-            if target_info.abi = abi_eabihf then 
-              begin
-                paraloc^.loc:=LOC_MMREGISTER;
-                case retcgsize of
-                  OS_64,
-                  OS_F64:
-                    begin
-                      paraloc^.register:=NR_MM_RESULT_REG;
-                    end;
-                  OS_32,
-                  OS_F32:
-                    begin
-                      paraloc^.register:=NR_S0;
-                    end;
-                  else
-                    internalerror(2012032501);
-                end;
-                paraloc^.size:=retcgsize;
-              end
-            else if (p.proccalloption in [pocall_softfloat]) or
-               (cs_fp_emulation in current_settings.moduleswitches) or
-               (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16,fpu_fpv4_s16]) then
-              begin
-                case retcgsize of
-                  OS_64,
-                  OS_F64:
-                    begin
-                      paraloc^.loc:=LOC_REGISTER;
-                      if target_info.endian = endian_big then
-                        paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG
-                      else
-                        paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG;
-                      paraloc^.size:=OS_32;
-                      paraloc:=result.add_location;
-                      paraloc^.loc:=LOC_REGISTER;
-                      if target_info.endian = endian_big then
-                        paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG
-                      else
-                        paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG;
-                      paraloc^.size:=OS_32;
-                    end;
-                  OS_32,
-                  OS_F32:
-                    begin
-                      paraloc^.loc:=LOC_REGISTER;
-                      paraloc^.register:=NR_FUNCTION_RETURN_REG;
-                      paraloc^.size:=OS_32;
-                    end;
-                  else
-                    internalerror(2005082603);
-                end;
-              end
-            else
-              begin
-                paraloc^.loc:=LOC_FPUREGISTER;
-                paraloc^.register:=NR_FPU_RESULT_REG;
-                paraloc^.size:=retcgsize;
-              end;
+            paraloc:=result.add_location;
+            { hack: the paraloc must be valid, but is not actually used }
+            paraloc^.loc:=LOC_REGISTER;
+            paraloc^.register:=NR_X0;
+            paraloc^.size:=OS_ADDR;
+            exit;
+          end;
+
+        if push_addr_param(varspez,paradef,p.proccalloption) then
+          begin
+            paradef:=getpointerdef(paradef);
+            loc:=LOC_REGISTER;
+            paracgsize:=OS_ADDR;
+            paralen:=tcgsize2size[OS_ADDR];
           end
           end
-          { Return in register }
         else
         else
           begin
           begin
-            if retcgsize in [OS_64,OS_S64] then
-              begin
-                paraloc^.loc:=LOC_REGISTER;
-                if target_info.endian = endian_big then
-                  paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG
-                else
-                  paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG;
-                paraloc^.size:=OS_32;
-                paraloc:=result.add_location;
-                paraloc^.loc:=LOC_REGISTER;
-                if target_info.endian = endian_big then
-                  paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG
-                else
-                  paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG;
-                paraloc^.size:=OS_32;
-              end
+            if not is_special_array(paradef) then
+              paralen:=paradef.size
+            else
+              paralen:=tcgsize2size[def_cgsize(paradef)];
+            loc:=getparaloc(p.proccalloption,paradef);
+            if (paradef.typ in [objectdef,arraydef,recorddef]) and
+               not is_special_array(paradef) and
+               (varspez in [vs_value,vs_const]) then
+              paracgsize:=int_cgsize(paralen)
             else
             else
               begin
               begin
-                paraloc^.loc:=LOC_REGISTER;
-                paraloc^.register:=NR_FUNCTION_RETURN_REG;
-                if (result.intsize<>3) then
-                  paraloc^.size:=retcgsize
-                else
-                  paraloc^.size:=OS_32;
-              end;
+                paracgsize:=def_cgsize(paradef);
+                { for things like formaldef }
+                if paracgsize=OS_NO then
+                  begin
+                    paracgsize:=OS_ADDR;
+                    paralen:=tcgsize2size[OS_ADDR];
+                    paradef:=voidpointertype;
+                  end;
+              end
           end;
           end;
+
+          { get hfa basedef if applicable }
+          if not is_hfa(paradef,hfabasedef) then
+            hfabasedef:=nil;
+
+         result.size:=paracgsize;
+         result.alignment:=std_param_align;
+         result.intsize:=paralen;
+         result.def:=paradef;
+
+         { empty record: skipped (explicitly defined by Apple ABI, undefined
+           by general ABI; libffi also skips them in all cases) }
+         if not is_special_array(paradef) and
+            (paradef.size=0) then
+           begin
+             paraloc:=result.add_location;
+             paraloc^.loc:=LOC_VOID;
+             paraloc^.def:=paradef;
+             paraloc^.size:=OS_NO;
+             exit;
+           end;
+
+         { sufficient registers left? }
+         case loc of
+           LOC_REGISTER:
+             begin
+               { In case of po_delphi_nested_cc, the parent frame pointer
+                 is always passed on the stack. }
+               if isdelphinestedcc then
+                 loc:=LOC_REFERENCE
+               else if curintreg+((paralen-1) shr 3)>RS_LAST_INT_PARAM_SUPREG then
+                 begin
+                   { not enough integer registers left -> no more register
+                     parameters, copy all to stack
+                   }
+                   curintreg:=succ(RS_LAST_INT_PARAM_SUPREG);
+                   loc:=LOC_REFERENCE;
+                 end;
+             end;
+           LOC_MMREGISTER:
+             begin;
+               { every hfa element must be passed in a separate register }
+               if (assigned(hfabasedef) and
+                   (curmmreg+(paralen div hfabasedef.size)>RS_LAST_MM_PARAM_SUPREG)) or
+                  (curmmreg+((paralen-1) shr 3)>RS_LAST_MM_PARAM_SUPREG) then
+                 begin
+                   { not enough mm registers left -> no more register
+                     parameters, copy all to stack
+                   }
+                   curmmreg:=succ(RS_LAST_MM_PARAM_SUPREG);
+                   loc:=LOC_REFERENCE;
+                 end;
+             end;
+         end;
+
+         { allocate registers/stack locations }
+         firstparaloc:=true;
+         repeat
+           paraloc:=result.add_location;
+
+           { set paraloc size/def }
+           if assigned(hfabasedef) then
+             begin
+               locsize:=def_cgsize(hfabasedef);
+               locdef:=hfabasedef;
+             end
+           { make sure we don't lose whether or not the type is signed }
+           else if (loc=LOC_REGISTER) and
+                   (paradef.typ<>orddef) then
+             begin
+               locsize:=int_cgsize(paralen);
+               locdef:=get_paraloc_def(paradef,paralen,firstparaloc);
+             end
+           else
+             begin
+               locsize:=paracgsize;
+               locdef:=paradef;
+             end;
+           if locsize in [OS_NO,OS_128,OS_S128] then
+             begin
+               if paralen>4 then
+                 begin
+                   paraloc^.size:=OS_INT;
+                   paraloc^.def:=u64inttype;
+                 end
+               else
+                 begin
+                   { for 3-byte records }
+                   paraloc^.size:=OS_32;
+                   paraloc^.def:=u32inttype;
+                 end;
+             end
+           else
+             begin
+               paraloc^.size:=locsize;
+               paraloc^.def:=locdef;
+             end;
+
+           { paraloc loc }
+           paraloc^.loc:=loc;
+
+           { assign register/stack address }
+           case loc of
+             LOC_REGISTER:
+               begin
+                 paraloc^.register:=newreg(R_INTREGISTER,curintreg,cgsize2subreg(R_INTREGISTER,paraloc^.size));
+                 inc(curintreg);
+                 dec(paralen,tcgsize2size[paraloc^.size]);
+
+                 { "The general ABI specifies that it is the callee's
+                    responsibility to sign or zero-extend arguments having fewer
+                    than 32 bits, and that unused bits in a register are
+                    unspecified. In iOS, however, the caller must perform such
+                    extensions, up to 32 bits." }
+                 if (target_info.abi=abi_aarch64_darwin) and
+                    (side=callerside) and
+                    is_ordinal(paradef) and
+                    (paradef.size<4) then
+                   paraloc^.size:=OS_32;
+
+                 { in case it's a composite, "The argument is passed as though
+                   it had been loaded into the registers from a double-word-
+                   aligned address with an appropriate sequence of LDR
+                   instructions loading consecutive registers from memory" ->
+                   in case of big endian, values in not completely filled
+                   registers must be shifted to the top bits }
+                 if (target_info.endian=endian_big) and
+                    not(paraloc^.size in [OS_64,OS_S64]) and
+                    (paradef.typ in [setdef,recorddef,arraydef,objectdef]) then
+                   paraloc^.shiftval:=-(8-tcgsize2size[paraloc^.size]);
+               end;
+             LOC_MMREGISTER:
+               begin
+                 paraloc^.register:=newreg(R_MMREGISTER,curmmreg,cgsize2subreg(R_MMREGISTER,paraloc^.size));
+                 inc(curmmreg);
+                 dec(paralen,tcgsize2size[paraloc^.size]);
+               end;
+             LOC_REFERENCE:
+               begin
+                  paraloc^.size:=paracgsize;
+                  paraloc^.loc:=LOC_REFERENCE;
+
+                  { the current stack offset may not be properly aligned in
+                    case we're on Darwin have allocated a non-variadic argument
+                    < 8 bytes previously }
+                  if target_info.abi=abi_aarch64_darwin then
+                    curstackoffset:=align(curstackoffset,paraloc^.def.alignment);
+
+                  { on Darwin, non-variadic arguments take up their actual size
+                    on the stack; on other platforms, they take up a multiple of
+                    8 bytes }
+                  if (target_info.abi=abi_aarch64_darwin) and
+                     not isvariadic then
+                    stackslotlen:=paralen
+                  else
+                    stackslotlen:=align(paralen,8);
+
+                  { from the ABI: if arguments occupy partial stack space, they
+                    have to occupy the lowest significant bits of a register
+                    containing that value which is then stored to memory ->
+                    in case of big endian, skip the alignment bytes (if any) }
+                  if target_info.endian=endian_little then
+                    paraloc^.reference.offset:=curstackoffset
+                  else
+                    paraloc^.reference.offset:=curstackoffset+stackslotlen-paralen;
+                  if side=callerside then
+                    paraloc^.reference.index:=NR_STACK_POINTER_REG
+                  else
+                    begin
+                      paraloc^.reference.index:=NR_FRAME_POINTER_REG;
+                      inc(paraloc^.reference.offset,16);
+                    end;
+                  inc(curstackoffset,stackslotlen);
+                  paralen:=0
+               end;
+             else
+               internalerror(2002071002);
+           end;
+         firstparaloc:=false;
+         { <=0 for sign/zero-extended locations }
+         until paralen<=0;
       end;
       end;
 
 
 
 
-    function taarch64paramanager.create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;
-      var
-        cur_stack_offset: aword;
-        curintreg, curfloatreg, curmmreg: tsuperregister;
-        sparesinglereg:tregister;
+    function taarch64paramanager.create_paraloc_info(p: tabstractprocdef; side: tcallercallee):longint;
       begin
       begin
-        init_values(curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg);
+        init_para_alloc_values;
 
 
-        result:=create_paraloc_info_intern(p,side,p.paras,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg,false);
+        create_paraloc_info_intern(p,side,p.paras,false);
+        result:=curstackoffset;
 
 
         create_funcretloc_info(p,side);
         create_funcretloc_info(p,side);
      end;
      end;
 
 
 
 
-    function taarch64paramanager.create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;
-      var
-        cur_stack_offset: aword;
-        curintreg, curfloatreg, curmmreg: tsuperregister;
-        sparesinglereg:tregister;
+    function taarch64paramanager.create_varargs_paraloc_info(p: tabstractprocdef; varargspara: tvarargsparalist):longint;
       begin
       begin
-        init_values(curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg);
+        init_para_alloc_values;
 
 
-        result:=create_paraloc_info_intern(p,callerside,p.paras,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg,true);
-        if (p.proccalloption in cstylearrayofconst) then
-          { just continue loading the parameters in the registers }
-          result:=create_paraloc_info_intern(p,callerside,varargspara,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg,true)
+        { non-variadic parameters }
+        create_paraloc_info_intern(p,callerside,p.paras,false);
+        if p.proccalloption in cstylearrayofconst then
+          begin
+            { on Darwin, we cannot use any registers for variadic parameters }
+            if target_info.abi=abi_aarch64_darwin then
+              begin
+                curintreg:=succ(RS_LAST_INT_PARAM_SUPREG);
+                curmmreg:=succ(RS_LAST_MM_PARAM_SUPREG);
+              end;
+            { continue loading the parameters  }
+            create_paraloc_info_intern(p,callerside,varargspara,true);
+            result:=curstackoffset;
+          end
         else
         else
           internalerror(200410231);
           internalerror(200410231);
       end;
       end;

+ 68 - 0
compiler/aarch64/cpupi.pas

@@ -0,0 +1,68 @@
+{
+    Copyright (c) 2002 by Florian Klaempfl
+
+    This unit contains the CPU specific part of tprocinfo
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit cpupi;
+
+{$i fpcdefs.inc}
+
+interface
+
+  uses
+    procinfo,
+    psub;
+
+  type
+    taarch64procinfo=class(tcgprocinfo)
+      constructor create(aparent: tprocinfo); override;
+      procedure set_first_temp_offset; override;
+    end;
+
+implementation
+
+  uses
+    tgobj,
+    cpubase;
+
+  constructor taarch64procinfo.create(aparent: tprocinfo);
+    begin
+      inherited;
+      { use the stack pointer as framepointer, because
+         1) we exactly know the offsets of the temps from the stack pointer
+            after pass 1 (based on the require parameter stack size for called
+            routines), while we don't know it for the frame pointer (it depends
+            on the number of saved registers)
+         2) temp offsets from the stack pointer are positive while those from
+            the frame pointer are negative, and we can directly encode much
+            bigger positive offsets in the instructions
+      }
+      framepointer:=NR_STACK_POINTER_REG;
+    end;
+
+  procedure taarch64procinfo.set_first_temp_offset;
+    begin
+     { leave room for allocated parameters }
+     tg.setfirsttemp(align(maxpushedparasize,16));
+    end;
+
+
+begin
+  cprocinfo:=taarch64procinfo;
+end.

+ 70 - 0
compiler/aarch64/cputarg.pas

@@ -0,0 +1,70 @@
+{
+    Copyright (c) 2001-2002 by Peter Vreman
+
+    Includes the AArch64 dependent target units
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit cputarg;
+
+{$i fpcdefs.inc}
+
+interface
+
+
+implementation
+
+    uses
+      systems { prevent a syntax error when nothing is included }
+
+{**************************************
+             Targets
+**************************************}
+
+    {$ifndef NOTARGETLINUX}
+//      ,t_linux
+    {$endif}
+    {$ifndef NOTARGETBSD}
+      ,t_bsd
+    {$endif}
+
+{**************************************
+             Assemblers
+**************************************}
+
+    {$ifndef NOAGCPUGAS}
+      ,agcpugas
+    {$endif}
+
+{**************************************
+        Assembler Readers
+**************************************}
+
+  {$ifndef NoRaarmgas}
+       ,racpugas
+  {$endif NoRaarmgas}
+
+{**************************************
+             Debuginfo
+**************************************}
+
+  {$ifndef NoDbgDwarf}
+      ,dbgdwarf
+  {$endif NoDbgDwarf}
+      ;
+
+end.

+ 156 - 0
compiler/aarch64/hlcgcpu.pas

@@ -0,0 +1,156 @@
+{
+    Copyright (c) 1998-2010 by Florian Klaempfl and Jonas Maebe
+    Member of the Free Pascal development team
+
+    This unit contains routines to create a pass-through high-level code
+    generator. This is used by most regular code generators.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+
+unit hlcgcpu;
+
+{$i fpcdefs.inc}
+
+interface
+
+  uses
+    symtype,
+    aasmdata,
+    cgbase,cgutils,
+    hlcgobj, hlcg2ll;
+
+  type
+    thlcgaarch64 = class(thlcg2ll)
+      procedure a_load_subsetreg_reg(list: TAsmList; subsetsize, tosize: tdef; const sreg: tsubsetregister; destreg: tregister); override;
+      procedure a_load_subsetreg_subsetreg(list: TAsmlist; fromsubsetsize, tosubsetsize: tdef; const fromsreg, tosreg: tsubsetregister); override;
+     protected
+      procedure a_load_regconst_subsetreg_intern(list: TAsmList; fromsize, subsetsize: tdef; fromreg: tregister; const sreg: tsubsetregister; slopt: tsubsetloadopt); override;
+    end;
+
+  procedure create_hlcodegen;
+
+implementation
+
+  uses
+    defutil,
+    cpubase,aasmcpu,
+    cgobj,cgcpu;
+
+  procedure thlcgaarch64.a_load_subsetreg_reg(list: TAsmList; subsetsize, tosize: tdef; const sreg: tsubsetregister; destreg: tregister);
+    var
+      op: tasmop;
+      tocgsize: tcgsize;
+      tmpdestreg: tregister;
+    begin
+      tocgsize:=def_cgsize(tosize);
+      if (sreg.startbit<>0) or
+         not(sreg.bitlen in [32,64]) then
+        begin
+          if is_signed(subsetsize) then
+            op:=A_SBFX
+          else
+            op:=A_UBFX;
+          { source and destination register of SBFX/UBFX have to be the same size }
+          if (sreg.subsetregsize in [OS_64,OS_S64]) and
+             not(tocgsize in [OS_64,OS_S64]) then
+            tmpdestreg:=cg.getintregister(list,OS_64)
+          else if not(sreg.subsetregsize in [OS_64,OS_S64]) and
+             (tocgsize in [OS_64,OS_S64]) then
+            tmpdestreg:=cg.getintregister(list,OS_32)
+          else
+            tmpdestreg:=destreg;
+          list.concat(taicpu.op_reg_reg_const_const(op,tmpdestreg,sreg.subsetreg,sreg.startbit,sreg.bitlen));
+          { need to sign extend further or truncate? }
+          if (sreg.subsetregsize=OS_S64) and
+             not(tocgsize in [OS_64,OS_S64]) then
+            cg.a_load_reg_reg(list,OS_S64,tocgsize,tmpdestreg,destreg)
+          else if is_signed(subsetsize) and
+             (tocgsize in [OS_8,OS_16]) then
+            cg.a_load_reg_reg(list,OS_32,tocgsize,tmpdestreg,destreg)
+          else if tmpdestreg<>destreg then
+            cg.a_load_reg_reg(list,def_cgsize(subsetsize),tocgsize,tmpdestreg,destreg)
+        end
+      else
+        cg.a_load_reg_reg(list,def_cgsize(subsetsize),tocgsize,sreg.subsetreg,destreg);
+    end;
+
+
+  procedure makeregssamesize(list: tasmlist; fromsize, tosize: tcgsize; orgfromreg, orgtoreg: tregister; out newfromreg, newtoreg: tregister);
+    begin
+      if (fromsize in [OS_S64,OS_64])<>
+         (tosize in [OS_S64,OS_64]) then
+        begin
+          newfromreg:=cg.makeregsize(list,orgfromreg,OS_64);
+          newtoreg:=cg.makeregsize(list,orgtoreg,OS_64);
+        end
+      else
+        begin
+          newfromreg:=orgfromreg;
+          newtoreg:=orgtoreg;
+        end;
+    end;
+
+
+  procedure thlcgaarch64.a_load_subsetreg_subsetreg(list: TAsmlist; fromsubsetsize, tosubsetsize: tdef; const fromsreg, tosreg: tsubsetregister);
+    var
+      fromreg, toreg: tregister;
+
+    begin
+      { BFM can only insert a bitfield that starts at position 0 in the source
+        source or destination register }
+      if (tosreg.startbit=0) and
+         (fromsreg.bitlen>=tosreg.bitlen) then
+        begin
+          makeregssamesize(list,fromsreg.subsetregsize,tosreg.subsetregsize,fromsreg.subsetreg,tosreg.subsetreg,fromreg,toreg);
+          list.concat(taicpu.op_reg_reg_const_const(A_BFXIL,toreg,fromreg,fromsreg.startbit,tosreg.bitlen))
+        end
+      else if (fromsreg.startbit=0) and
+         (fromsreg.bitlen>=tosreg.bitlen) then
+        begin
+          makeregssamesize(list,fromsreg.subsetregsize,tosreg.subsetregsize,fromsreg.subsetreg,tosreg.subsetreg,fromreg,toreg);
+          list.concat(taicpu.op_reg_reg_const_const(A_BFI,toreg,fromreg,tosreg.startbit,tosreg.bitlen))
+        end
+      else
+        inherited;
+    end;
+
+
+  procedure thlcgaarch64.a_load_regconst_subsetreg_intern(list: TAsmList; fromsize, subsetsize: tdef; fromreg: tregister; const sreg: tsubsetregister; slopt: tsubsetloadopt);
+    var
+      toreg: tregister;
+    begin
+      if slopt in [SL_SETZERO,SL_SETMAX] then
+        inherited
+      else if not(sreg.bitlen in [32,64]) then
+        begin
+          makeregssamesize(list,def_cgsize(fromsize),sreg.subsetregsize,fromreg,sreg.subsetreg,fromreg,toreg);
+          list.concat(taicpu.op_reg_reg_const_const(A_BFI,toreg,fromreg,sreg.startbit,sreg.bitlen))
+        end
+      else
+        a_load_reg_reg(list,fromsize,subsetsize,fromreg,sreg.subsetreg);
+    end;
+
+
+  procedure create_hlcodegen;
+    begin
+      hlcg:=thlcgaarch64.create;
+      create_codegen;
+    end;
+
+
+end.

+ 402 - 0
compiler/aarch64/ncpuadd.pas

@@ -0,0 +1,402 @@
+{
+    Copyright (c) 2014 Jonas Maebe
+
+    Code generation for add nodes on AArch64
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit ncpuadd;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+       node,ncgadd,cpubase;
+
+    type
+       taarch64addnode = class(tcgaddnode)
+       private
+          function  GetResFlags(unsigned:Boolean):TResFlags;
+          function  GetFPUResFlags:TResFlags;
+       protected
+          procedure second_addfloat;override;
+          procedure second_cmpfloat;override;
+          procedure second_cmpboolean;override;
+          procedure second_cmpsmallset;override;
+          procedure second_cmpordinal;override;
+          procedure second_addordinal;override;
+          procedure second_add64bit; override;
+          procedure second_cmp64bit; override;
+       public
+          function use_generic_mul32to64: boolean; override;
+       end;
+
+  implementation
+
+    uses
+      systems,
+      cutils,verbose,
+      paramgr,procinfo,
+      aasmtai,aasmdata,aasmcpu,defutil,
+      cgbase,cgcpu,cgutils,
+      cpupara,
+      ncon,nset,nadd,
+      hlcgobj, ncgutil,cgobj;
+
+{*****************************************************************************
+                               taarch64addnode
+*****************************************************************************}
+
+    function taarch64addnode.GetResFlags(unsigned:Boolean):TResFlags;
+      begin
+        case NodeType of
+          equaln:
+            GetResFlags:=F_EQ;
+          unequaln:
+            GetResFlags:=F_NE;
+          else
+            if not(unsigned) then
+              begin
+                if nf_swapped in flags then
+                  case NodeType of
+                    ltn:
+                      GetResFlags:=F_GT;
+                    lten:
+                      GetResFlags:=F_GE;
+                    gtn:
+                      GetResFlags:=F_LT;
+                    gten:
+                      GetResFlags:=F_LE;
+                    else
+                      internalerror(2014082010);
+                  end
+                else
+                  case NodeType of
+                    ltn:
+                      GetResFlags:=F_LT;
+                    lten:
+                      GetResFlags:=F_LE;
+                    gtn:
+                      GetResFlags:=F_GT;
+                    gten:
+                      GetResFlags:=F_GE;
+                    else
+                      internalerror(2014082011);
+                  end;
+              end
+            else
+              begin
+                if nf_swapped in Flags then
+                  case NodeType of
+                    ltn:
+                      GetResFlags:=F_HI;
+                    lten:
+                      GetResFlags:=F_HS;
+                    gtn:
+                      GetResFlags:=F_LO;
+                    gten:
+                      GetResFlags:=F_LS;
+                    else
+                      internalerror(2014082012);
+                  end
+                else
+                  case NodeType of
+                    ltn:
+                      GetResFlags:=F_LO;
+                    lten:
+                      GetResFlags:=F_LS;
+                    gtn:
+                      GetResFlags:=F_HI;
+                    gten:
+                      GetResFlags:=F_HS;
+                    else
+                      internalerror(2014082013);
+                  end;
+              end;
+        end;
+      end;
+
+
+    function taarch64addnode.GetFPUResFlags:TResFlags;
+      begin
+        case NodeType of
+          equaln:
+            result:=F_EQ;
+          unequaln:
+            result:=F_NE;
+          else
+            begin
+              if nf_swapped in Flags then
+                case NodeType of
+                  ltn:
+                    result:=F_GT;
+                  lten:
+                    result:=F_GE;
+                  gtn:
+                    result:=F_LO;
+                  gten:
+                    result:=F_LS;
+                  else
+                    internalerror(2014082014);
+                end
+              else
+                case NodeType of
+                  ltn:
+                    result:=F_LO;
+                  lten:
+                    result:=F_LS;
+                  gtn:
+                    result:=F_GT;
+                  gten:
+                    result:=F_GE;
+                  else
+                    internalerror(2014082015);
+                end;
+            end;
+        end;
+      end;
+
+
+    procedure taarch64addnode.second_addfloat;
+      var
+        op : TAsmOp;
+      begin
+        pass_left_right;
+        if nf_swapped in flags then
+          swapleftright;
+
+        { force fpureg as location, left right doesn't matter
+          as both will be in a fpureg }
+        hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+        hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
+
+        location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
+        location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+
+        case nodetype of
+          addn :
+            begin
+              op:=A_FADD;
+            end;
+          muln :
+            begin
+              op:=A_FMUL;
+            end;
+          subn :
+            begin
+              op:=A_FSUB;
+            end;
+          slashn :
+            begin
+              op:=A_FDIV;
+            end;
+          else
+            internalerror(200306014);
+        end;
+
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,
+           location.register,left.location.register,right.location.register));
+      end;
+
+
+    procedure taarch64addnode.second_cmpfloat;
+      begin
+        pass_left_right;
+        if nf_swapped in flags then
+          swapleftright;
+
+        { force fpureg as location, left right doesn't matter
+          as both will be in a fpureg }
+        hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+        hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
+
+        location_reset(location,LOC_FLAGS,OS_NO);
+        location.resflags:=getfpuresflags;
+
+        { signalling compare so we can get exceptions }
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCMPE,
+             left.location.register,right.location.register));
+      end;
+
+
+    procedure taarch64addnode.second_cmpboolean;
+      begin
+        pass_left_right;
+        force_reg_left_right(true,true);
+
+        if right.location.loc=LOC_CONSTANT then
+          begin
+            if right.location.value>=0 then
+              Tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,A_CMP,left.location.size,left.location.register,right.location.value,NR_XZR,NR_NO,false,false)
+            else
+              { avoid overflow if value=low(int64) }
+{$push}{$r-}{$q-}
+              Tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,A_CMN,left.location.size,left.location.register,-right.location.value,NR_XZR,NR_NO,false,false)
+{$pop}
+          end
+        else
+          current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,left.location.register,right.location.register));
+
+        location_reset(location,LOC_FLAGS,OS_NO);
+        location.resflags:=getresflags(true);
+      end;
+
+
+    procedure taarch64addnode.second_cmpsmallset;
+      var
+        tmpreg : tregister;
+        op: tasmop;
+      begin
+        pass_left_right;
+
+        location_reset(location,LOC_FLAGS,OS_NO);
+
+        force_reg_left_right(true,true);
+
+        if right.location.loc=LOC_CONSTANT then
+          begin
+            { when doing a cmp/cmn on 32 bit, we care whether the *lower 32 bit*
+              is a positive/negative value -> sign extend }
+            if not(right.location.size in [OS_64,OS_S64]) then
+              right.location.value:=longint(right.location.value);
+            if right.location.value>=0 then
+              op:=A_CMP
+            else
+              op:=A_CMN;
+          end
+        else
+          { for DFA }
+          op:=A_NONE;
+
+        case nodetype of
+          equaln,
+          unequaln:
+            begin
+              if right.location.loc=LOC_CONSTANT then
+                tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,op,def_cgsize(resultdef),left.location.register,abs(right.location.value),NR_XZR,NR_NO,false,false)
+              else
+                current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,left.location.register,right.location.register));
+              location.resflags:=getresflags(true);
+            end;
+          lten,
+          gten:
+            begin
+              if (not(nf_swapped in flags) and
+                  (nodetype=lten)) or
+                 ((nf_swapped in flags) and
+                  (nodetype=gten)) then
+                swapleftright;
+              { we can't handle left as a constant yet }
+              if left.location.loc=LOC_CONSTANT then
+                hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
+              tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,left.location.size);
+              if right.location.loc=LOC_CONSTANT then
+                begin
+                  hlcg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_AND,resultdef,right.location.value,left.location.register,tmpreg);
+                  tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,op,def_cgsize(resultdef),tmpreg,abs(right.location.value),NR_XZR,NR_NO,false,false)
+                end
+              else
+                begin
+                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_AND,tmpreg,left.location.register,right.location.register));
+                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,tmpreg,right.location.register));
+                end;
+              location.resflags:=F_EQ;
+            end;
+          else
+            internalerror(2012042701);
+        end;
+      end;
+
+
+    procedure taarch64addnode.second_cmpordinal;
+      var
+        unsigned : boolean;
+      begin
+        pass_left_right;
+        force_reg_left_right(true,true);
+
+        unsigned:=not(is_signed(left.resultdef)) or
+                  not(is_signed(right.resultdef));
+
+        if right.location.loc = LOC_CONSTANT then
+          begin
+            if right.location.value>=0 then
+              Tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,A_CMP,left.location.size,left.location.register,right.location.value,NR_XZR,NR_NO,false,false)
+            else
+{$push}{$r-}{$q-}
+              Tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,A_CMN,left.location.size,left.location.register,-right.location.value,NR_XZR,NR_NO,false,false)
+{$pop}
+          end
+        else
+          current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,left.location.register,right.location.register));
+
+        location_reset(location,LOC_FLAGS,OS_NO);
+        location.resflags:=getresflags(unsigned);
+      end;
+
+
+    procedure taarch64addnode.second_addordinal;
+      const
+        multops: array[boolean] of TAsmOp = (A_SMULL,A_UMULL);
+      var
+        unsigned: boolean;
+      begin
+        { 32x32->64 multiplication }
+        if (nodetype=muln) and
+           is_32bit(left.resultdef) and
+           is_32bit(right.resultdef) and
+           is_64bit(resultdef) then
+          begin
+            unsigned:=not(is_signed(left.resultdef)) or
+                      not(is_signed(right.resultdef));
+            pass_left_right;
+            force_reg_left_right(true,true);
+            { force_reg_left_right can leave right as a LOC_CONSTANT (we can't
+              say "a constant register is okay, but an ordinal constant isn't) }
+            if right.location.loc=LOC_CONSTANT then
+              hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
+            location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
+            location.register:=cg.getintregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
+            current_asmdata.CurrAsmList.Concat(taicpu.op_reg_reg_reg(multops[unsigned],location.register,left.location.register,right.location.register));
+          end
+        else
+          inherited second_addordinal;
+      end;
+
+
+    procedure taarch64addnode.second_add64bit;
+      begin
+        second_addordinal;
+      end;
+
+
+    procedure taarch64addnode.second_cmp64bit;
+      begin
+        second_cmpordinal;
+      end;
+
+
+    function taarch64addnode.use_generic_mul32to64: boolean;
+      begin
+        result:=false;
+      end;
+
+
+begin
+  caddnode:=taarch64addnode;
+end.

+ 201 - 0
compiler/aarch64/ncpucnv.pas

@@ -0,0 +1,201 @@
+{
+    Copyright (c) 2014 by Jonas Maebe
+
+    Generate AArch64 assembler for type converting nodes
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************}
+unit ncpucnv;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+      node,ncnv,ncgcnv;
+
+    type
+      taarch64typeconvnode = class(TCgTypeConvNode)
+        protected
+         function typecheck_int_to_real: tnode; override;
+         function first_int_to_real: tnode; override;
+
+        { procedure second_int_to_int;override; }
+        { procedure second_string_to_string;override; }
+        { procedure second_cstring_to_pchar;override; }
+        { procedure second_string_to_chararray;override; }
+        { procedure second_array_to_pointer;override; }
+        { procedure second_pointer_to_array;override; }
+        { procedure second_chararray_to_string;override; }
+        { procedure second_char_to_string;override; }
+         procedure second_int_to_real;override;
+        { procedure second_real_to_real;override; }
+        { procedure second_cord_to_pointer;override; }
+        { procedure second_proc_to_procvar;override; }
+        { procedure second_bool_to_int;override; }
+         procedure second_int_to_bool;override;
+        { procedure second_load_smallset;override;  }
+        { procedure second_ansistring_to_pchar;override; }
+        { procedure second_pchar_to_string;override; }
+        { procedure second_class_to_intf;override; }
+        { procedure second_char_to_char;override; }
+      end;
+
+implementation
+
+  uses
+    verbose,globals,
+    symdef,aasmdata,aasmbase,
+    defutil,
+    cgbase,cgutils,procinfo,
+    cpubase,aasmcpu,
+    pass_2,cgobj,
+    hlcgobj;
+
+
+{*****************************************************************************
+                             FirstTypeConv
+*****************************************************************************}
+
+  function taarch64typeconvnode.typecheck_int_to_real: tnode;
+    begin
+      { aarch64 supports converting everything to floating point, even fixed
+        point! Unfortunately, it only supports fixed point with a power-of-2
+        fraction, which is not the case for currency.
+
+        Generate the division by 10000 via nodes so the 10000.0 constant can
+        be reused. }
+      if is_currency(resultdef) and
+         not(nf_is_currency in flags) then
+        begin
+          { convert the equivalent int64 value to double without conversion
+            (internal typecast -> will set nf_is_currency flag) }
+          result:=ctypeconvnode.create_internal(left,s64floattype);
+          { turn into currency with conversion, which will divide by 10000
+            (regular typecast) }
+          result:=ctypeconvnode.create(result,s64currencytype);
+          exit;
+        end;
+      { The only other thing we have to take care of: convert values < 32 bit
+        to 32 bit }
+      if left.resultdef.size<4 then
+        begin
+          if is_signed(left.resultdef) then
+            inserttypeconv(left,s32inttype)
+          else
+            inserttypeconv(left,u32inttype)
+        end;
+      result:=inherited;
+    end;
+
+
+  function taarch64typeconvnode.first_int_to_real: tnode;
+    begin
+      result:=nil;
+      expectloc:=LOC_MMREGISTER;
+    end;
+
+
+{*****************************************************************************
+                             SecondTypeConv
+*****************************************************************************}
+
+  procedure taarch64typeconvnode.second_int_to_real;
+    var
+      op: tasmop;
+    begin
+      location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
+      location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+      hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
+      if not(left.location.loc in [LOC_REGISTER,LOC_CREGISTER]) then
+        internalerror(2014120401);
+      case left.location.size of
+        OS_32,
+        OS_64:
+          op:=A_UCVTF;
+        OS_S32,
+        OS_S64,
+        { for currency and comp }
+        OS_F64:
+          op:=A_SCVTF;
+        else
+          internalerror(2014120402);
+      end;
+      current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,location.register,left.location.register));
+      { no scaling for currency, that's handled in pass_typecheck }
+    end;
+
+
+  procedure taarch64typeconvnode.second_int_to_bool;
+    var
+      resflags: tresflags;
+      hlabel,oldTrueLabel,oldFalseLabel : tasmlabel;
+    begin
+      if (nf_explicit in flags) and
+         not(left.expectloc in [LOC_FLAGS,LOC_JUMP]) then
+        begin
+          inherited;
+          exit;
+        end;
+
+      { can't use the generic code, as it assumes that OP_OR automatically sets
+        the flags. We can also do things more efficiently directly }
+
+      oldTrueLabel:=current_procinfo.CurrTrueLabel;
+      oldFalseLabel:=current_procinfo.CurrFalseLabel;
+      current_asmdata.getjumplabel(current_procinfo.CurrTrueLabel);
+      current_asmdata.getjumplabel(current_procinfo.CurrFalseLabel);
+      secondpass(left);
+      if codegenerror then
+       exit;
+
+      case left.location.loc of
+        LOC_CREFERENCE,
+        LOC_REFERENCE,
+        LOC_REGISTER,
+        LOC_CREGISTER,
+        LOC_JUMP:
+          begin
+             hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
+             current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,left.location.register,0));
+             resflags:=F_NE;
+          end;
+        LOC_FLAGS :
+          resflags:=left.location.resflags;
+        else
+          internalerror(2014122902);
+      end;
+      { load flags to register }
+      location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
+      location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
+      if is_cbool(resultdef) then
+        begin
+          current_asmdata.CurrAsmList.concat(taicpu.op_reg_cond(A_CSETM,location.register,flags_to_cond(resflags)));
+            { truncate? (in case cbools are ever made unsigned) }
+            if resultdef.size<4 then
+              cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_32,location.size,location.register,location.register);
+        end
+      else
+        cg.g_flags2reg(current_asmdata.CurrAsmList,location.size,resflags,location.register);
+      cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
+      current_procinfo.CurrTrueLabel:=oldTrueLabel;
+      current_procinfo.CurrFalseLabel:=oldFalseLabel;
+    end;
+
+
+begin
+   ctypeconvnode:=taarch64typeconvnode;
+end.

+ 184 - 0
compiler/aarch64/ncpuinl.pas

@@ -0,0 +1,184 @@
+{
+    Copyright (c) 1998-2002 by Florian Klaempfl
+
+    Generates ARM inline nodes
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit ncpuinl;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+      node,ninl,ncginl;
+
+    type
+      taarch64inlinenode = class(tcgInlineNode)
+        function first_abs_real: tnode; override;
+        function first_sqr_real: tnode; override;
+        function first_sqrt_real: tnode; override;
+        function first_round_real: tnode; override;
+        function first_trunc_real: tnode; override;
+        procedure second_abs_real; override;
+        procedure second_sqr_real; override;
+        procedure second_sqrt_real; override;
+        procedure second_abs_long; override;
+        procedure second_round_real; override;
+        procedure second_trunc_real; override;
+        procedure second_get_frame; override;
+      private
+        procedure load_fpu_location;
+      end;
+
+
+implementation
+
+    uses
+      globtype,verbose,globals,
+      cpuinfo, defutil,symdef,aasmdata,aasmcpu,
+      cgbase,cgutils,pass_1,pass_2,
+      cpubase,ncgutil,cgobj,cgcpu, hlcgobj;
+
+{*****************************************************************************
+                              taarch64inlinenode
+*****************************************************************************}
+
+    procedure taarch64inlinenode.load_fpu_location;
+      begin
+        secondpass(left);
+        hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+        location_copy(location,left.location);
+        location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+        location.loc:=LOC_MMREGISTER;
+      end;
+
+
+    function taarch64inlinenode.first_abs_real : tnode;
+      begin
+        expectloc:=LOC_MMREGISTER;
+        result:=nil;
+      end;
+
+
+    function taarch64inlinenode.first_sqr_real : tnode;
+      begin
+        expectloc:=LOC_MMREGISTER;
+        result:=nil;
+      end;
+
+
+    function taarch64inlinenode.first_sqrt_real : tnode;
+      begin
+        expectloc:=LOC_MMREGISTER;
+        result:=nil;
+      end;
+
+
+    function taarch64inlinenode.first_round_real: tnode;
+      begin
+        expectloc:=LOC_MMREGISTER;
+        result:=nil;
+      end;
+
+
+    function taarch64inlinenode.first_trunc_real: tnode;
+      begin
+        expectloc:=LOC_MMREGISTER;
+        result:=nil;
+      end;
+
+
+    procedure taarch64inlinenode.second_abs_real;
+      begin
+        load_fpu_location;
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FABS,location.register,left.location.register));
+      end;
+
+
+    procedure taarch64inlinenode.second_sqr_real;
+      begin
+        load_fpu_location;
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_FMUL,location.register,left.location.register,left.location.register));
+      end;
+
+
+    procedure taarch64inlinenode.second_sqrt_real;
+      begin
+        load_fpu_location;
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FSQRT,location.register,left.location.register));
+      end;
+
+
+    procedure taarch64inlinenode.second_abs_long;
+      var
+        opsize : tcgsize;
+        hp : taicpu;
+      begin
+        secondpass(left);
+        opsize:=def_cgsize(left.resultdef);
+        hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
+        location:=left.location;
+        location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
+
+        current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_NEG,location.register,left.location.register),PF_S));
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_cond(A_CSEL,location.register,location.register,left.location.register,C_GE));
+      end;
+
+
+    procedure taarch64inlinenode.second_round_real;
+      var
+        hreg: tregister;
+      begin
+        secondpass(left);
+        hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+        location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
+        location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
+        hreg:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
+        { round as floating point using current rounding mode }
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FRINTX,hreg,left.location.register));
+        { convert to signed integer rounding towards zero (there's no "round to
+          integer using current rounding mode") }
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCVTZS,location.register,hreg));
+      end;
+
+
+    procedure taarch64inlinenode.second_trunc_real;
+      begin
+        secondpass(left);
+        hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+        location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
+        location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
+        { convert to signed integer rounding towards zero }
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCVTZS,location.register,left.location.register));
+      end;
+
+
+    procedure taarch64inlinenode.second_get_frame;
+      begin
+        location_reset(location,LOC_CREGISTER,OS_ADDR);
+        { this routine is used to get the frame pointer for backtracing
+          purposes. current_procinfo.framepointer is set to SP because that one
+          is used to access temps. On most platforms these two frame pointers
+          are the same, but not on AArch64. }
+        location.register:=NR_FRAME_POINTER_REG;
+      end;
+
+begin
+  cinlinenode:=taarch64inlinenode;
+end.

+ 196 - 0
compiler/aarch64/ncpumat.pas

@@ -0,0 +1,196 @@
+{
+    Copyright (c) 1998-2002, 2014 by Florian Klaempfl and Jonas Maebe
+
+    Generate AArch64 assembler for math nodes
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit ncpumat;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+      node,nmat,ncgmat;
+
+    type
+      taarch64moddivnode = class(tmoddivnode)
+         function pass_1: tnode; override;
+         procedure pass_generate_code;override;
+      end;
+
+      taarch64notnode = class(tcgnotnode)
+         procedure second_boolean;override;
+      end;
+
+      taarch64unaryminusnode = class(tcgunaryminusnode)
+         procedure second_float; override;
+      end;
+
+implementation
+
+    uses
+      globtype,systems,constexp,
+      cutils,verbose,globals,
+      symconst,symdef,
+      aasmbase,aasmcpu,aasmtai,aasmdata,
+      defutil,
+      cgbase,cgobj,hlcgobj,pass_2,procinfo,
+      ncon,
+      cpubase,
+      ncgutil,cgcpu,cgutils;
+
+{*****************************************************************************
+                             taarch64moddivnode
+*****************************************************************************}
+
+    function taarch64moddivnode.pass_1: tnode;
+      begin
+        result:=inherited pass_1;
+        if not assigned(result) then
+          include(current_procinfo.flags,pi_do_call);
+      end;
+
+
+    procedure taarch64moddivnode.pass_generate_code;
+      var
+         op         : tasmop;
+         tmpreg,
+         numerator,
+         divider,
+         resultreg  : tregister;
+         hl : tasmlabel;
+         overflowloc: tlocation;
+      begin
+       secondpass(left);
+       secondpass(right);
+
+       { set result location }
+       location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
+       location.register:=cg.getintregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
+       resultreg:=location.register;
+
+       { put numerator in register }
+       hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
+       numerator:=left.location.register;
+
+       { load divider in a register }
+       hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
+       divider:=right.location.register;
+
+       { start division }
+       if is_signed(left.resultdef) then
+         op:=A_SDIV
+       else
+         op:=A_UDIV;
+       current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,location.register,numerator,divider));
+
+       { no divide-by-zero detection available in hardware, emulate (if it's a
+         constant, this will have been detected earlier already) }
+       if (right.nodetype<>ordconstn) then
+         begin
+           current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,
+             right.location.register,0));
+
+           current_asmdata.getjumplabel(hl);
+           current_asmdata.CurrAsmList.concat(taicpu.op_cond_sym(A_B,C_NE,hl));
+           cg.a_call_name(current_asmdata.CurrAsmList,'FPC_DIVBYZERO',false);
+           cg.a_label(current_asmdata.CurrAsmList,hl);
+         end;
+
+       { in case of overflow checking, also check for low(int64) div (-1)
+         (no hardware support for this either) }
+       if (cs_check_overflow in current_settings.localswitches) and
+          is_signed(left.resultdef) and
+          ((right.nodetype<>ordconstn) or
+           (tordconstnode(right).value=-1)) then
+         begin
+           { num=ffff... and div=8000... <=>
+             num xor not(div xor 8000...) = 0
+             (and we have the "eon" operation, which performs "xor not(...)" }
+           tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,left.resultdef);
+           hlcg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_XOR,left.resultdef,low(int64),left.location.register,tmpreg);
+           current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_EON,
+             tmpreg,left.location.register,tmpreg));
+           current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,tmpreg,0));
+           { now the zero/equal flag is set in case we divided low(int64) by
+             (-1) }
+           location_reset(overflowloc,LOC_FLAGS,OS_NO);
+           overflowloc.resflags:=F_EQ;
+           cg.g_overflowcheck_loc(current_asmdata.CurrAsmList,location,resultdef,overflowloc);
+         end;
+
+       { in case of modulo, multiply result again by the divider and subtract
+         from the numerator }
+       if nodetype=modn then
+         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_reg(A_MSUB,resultreg,
+           resultreg,divider,numerator));
+    end;
+
+
+{*****************************************************************************
+                               taarch64notnode
+*****************************************************************************}
+
+    procedure taarch64notnode.second_boolean;
+      begin
+        if not handle_locjump then
+          begin
+            secondpass(left);
+            case left.location.loc of
+              LOC_FLAGS :
+                begin
+                  location_copy(location,left.location);
+                  inverse_flags(location.resflags);
+                end;
+              LOC_REGISTER, LOC_CREGISTER,
+              LOC_REFERENCE, LOC_CREFERENCE,
+              LOC_SUBSETREG, LOC_CSUBSETREG,
+              LOC_SUBSETREF, LOC_CSUBSETREF:
+                begin
+                  hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
+                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,
+                    left.location.register,0));
+                  location_reset(location,LOC_FLAGS,OS_NO);
+                  location.resflags:=F_EQ;
+               end;
+              else
+                internalerror(2003042401);
+            end;
+          end;
+      end;
+
+
+{*****************************************************************************
+                                   taarch64unaryminusnode
+*****************************************************************************}
+
+    procedure taarch64unaryminusnode.second_float;
+      begin
+        secondpass(left);
+        hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+        location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
+        location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FNEG,location.register,left.location.register));
+      end;
+
+begin
+   cmoddivnode:=taarch64moddivnode;
+   cnotnode:=taarch64notnode;
+   cunaryminusnode:=taarch64unaryminusnode;
+end.

+ 142 - 0
compiler/aarch64/ncpumem.pas

@@ -0,0 +1,142 @@
+{
+    Copyright (c) 2014 by Jonas Maebe
+
+    Generate AArch64 code for in memory related nodes
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit ncpumem;
+
+{$i fpcdefs.inc}
+
+interface
+
+  uses
+    globtype,
+    cgbase,
+    symtype,
+    node,nmem,ncgmem;
+
+  type
+    taarch64loadparentfpnode = class(tcgloadparentfpnode)
+      procedure pass_generate_code; override;
+    end;
+
+    taarch64vecnode = class(tcgvecnode)
+     protected
+      function valid_index_size(size: tcgsize): boolean; override;
+     public
+       procedure update_reference_reg_mul(maybe_const_reg: tregister; regsize: tdef; l: aint); override;
+    end;
+
+implementation
+
+  uses
+    cutils,verbose,
+    defutil,
+    aasmdata,cpubase,
+    cgutils,
+    cgobj;
+
+  { taarch64loadparentfpnode }
+
+  procedure taarch64loadparentfpnode.pass_generate_code;
+    begin
+      inherited pass_generate_code;
+      { see the comments in tcgaarch64.g_proc_entry }
+      if (location.loc in [LOC_REGISTER,LOC_CREGISTER]) and
+         (location.register=NR_STACK_POINTER_REG) then
+        if (kind=lpf_forpara) then
+          location.register:=NR_FRAME_POINTER_REG
+        else
+          begin
+            { load stack pointer in a different register, as many instructions
+              cannot directly work with the stack pointer. The register
+              allocator can merge them if possible }
+            location.register:=cg.getaddressregister(current_asmdata.CurrAsmList);
+            cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,NR_STACK_POINTER_REG,location.register);
+            location.loc:=LOC_REGISTER;
+          end;
+    end;
+
+
+  { taarch64vecnode }
+
+  function taarch64vecnode.valid_index_size(size: tcgsize): boolean;
+    begin
+      { all sizes are ok if we handle the "reference reg mul", because
+         a) we use a 64 bit register for 64 bit values, and a 32 bit one (that
+            we can sign/zero-extend inside the reference) for smaller values
+         b) for values < 32 bit, the entire 32 bit register always contains the
+            sign/zero-extended version of the value }
+      result:=
+        not is_packed_array(left.resultdef) and
+        (get_mul_size in [1,2,4,8,16]);
+    end;
+
+
+  procedure taarch64vecnode.update_reference_reg_mul(maybe_const_reg: tregister; regsize: tdef; l: aint);
+    var
+      base: tregister;
+      oldoffset: asizeint;
+      shift: byte;
+    begin
+      { we can only scale the index by shl 0..4 }
+      if not(l in [1,2,4,8,16]) then
+        begin
+          inherited;
+          exit;
+        end;
+      { we need a base set and an index available }
+      if (location.reference.base=NR_NO) or
+         (location.reference.index<>NR_NO) then
+        begin
+          { don't integrate the offset yet, make_simple_ref() may be able to
+            handle it more efficiently later (unless an offset is all we have
+            -> optimization for someone that wants to add support for AArch64
+            embedded targets) }
+          oldoffset:=location.reference.offset;
+          location.reference.offset:=0;
+          base:=cg.getaddressregister(current_asmdata.CurrAsmList);
+          cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,location.reference,base);
+          reference_reset_base(location.reference,base,oldoffset,location.reference.alignment);
+        end;
+      shift:=BsfDWord(l);
+      location.reference.index:=maybe_const_reg;
+      { sign/zero-extend? }
+      if regsize.size=8 then
+        if shift<>0 then
+          location.reference.shiftmode:=SM_LSL
+        else
+          location.reference.shiftmode:=SM_NONE
+      else if is_signed(regsize) then
+        location.reference.shiftmode:=SM_SXTW
+      else if shift<>0 then
+        location.reference.shiftmode:=SM_UXTW
+      else
+        { the upper 32 bits are always already zero-extended -> just use 64 bit
+          register }
+        location.reference.index:=cg.makeregsize(current_asmdata.CurrAsmList,location.reference.index,OS_64);
+      location.reference.shiftimm:=shift;
+      location.reference.alignment:=newalignment(location.reference.alignment,l);
+    end;
+
+
+begin
+  cloadparentfpnode:=taarch64loadparentfpnode;
+  cvecnode:=taarch64vecnode;
+end.

+ 175 - 0
compiler/aarch64/ncpuset.pas

@@ -0,0 +1,175 @@
+{
+    Copyright (c) 2015 by Jonas Maebe
+
+    Generate AArch64 assembler for in set/case nodes
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit ncpuset;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+       node,nset,ncgset,cpubase,cgbase,cgobj,aasmbase,aasmtai,aasmdata,globtype;
+
+    type
+       taarch64casenode = class(tcgcasenode)
+         protected
+           procedure optimizevalues(var max_linear_list: aint; var max_dist: aword);override;
+           function  has_jumptable: boolean;override;
+           procedure genjumptable(hp: pcaselabel ;min_, max_: aint);override;
+       end;
+
+
+implementation
+
+    uses
+      systems,
+      verbose,globals,constexp,
+      symconst,symdef,defutil,
+      paramgr,
+      cpuinfo,
+      pass_2,cgcpu,
+      ncon,
+      tgobj,ncgutil,regvars,rgobj,aasmcpu,
+      procinfo,
+      cgutils;
+
+{*****************************************************************************
+                            TCGCASENODE
+*****************************************************************************}
+
+
+    procedure taarch64casenode.optimizevalues(var max_linear_list: aint; var max_dist: aword);
+      begin
+        max_linear_list:=10;
+      end;
+    
+
+    function taarch64casenode.has_jumptable: boolean;
+      begin
+        has_jumptable:=true;
+      end;
+
+
+    procedure taarch64casenode.genjumptable(hp: pcaselabel; min_, max_: aint);
+      var
+        last: TConstExprInt;
+        tablelabel: TAsmLabel;
+        basereg,indexreg,jumpreg: TRegister;
+        href: TReference;
+        opcgsize: tcgsize;
+        sectype: TAsmSectiontype;
+        jtitemconsttype: taiconst_type;
+
+      procedure genitem(list:TAsmList;t : pcaselabel);
+        var
+          i : aint;
+        begin
+          if assigned(t^.less) then
+            genitem(list,t^.less);
+          { fill possible hole }
+          i:=last.svalue+1;
+          while i<=t^._low.svalue-1 do
+            begin
+              list.concat(Tai_const.Create_rel_sym(jtitemconsttype,tablelabel,elselabel));
+              inc(i);
+            end;
+          i:=t^._low.svalue;
+          while i<=t^._high.svalue do
+            begin
+              list.concat(Tai_const.Create_rel_sym(jtitemconsttype,tablelabel,blocklabel(t^.blockid)));
+              inc(i);
+            end;
+          last:=t^._high;
+          if assigned(t^.greater) then
+            genitem(list,t^.greater);
+        end;
+
+      begin
+        if not(target_info.system in systems_darwin) then
+          jtitemconsttype:=aitconst_32bit
+        else
+          { see https://gmplib.org/list-archives/gmp-bugs/2012-December/002836.html }
+          jtitemconsttype:=aitconst_darwin_dwarf_delta32;
+
+        last:=min_;
+        opcgsize:=def_cgsize(opsize);
+        { a <= x <= b <-> unsigned(x-a) <= (b-a) }
+        cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SUB,opcgsize,aint(min_),hregister);
+        if not(jumptable_no_range) then
+          begin
+             { case expr greater than max_ => goto elselabel }
+             cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opcgsize,OC_A,aint(max_)-aint(min_),hregister,elselabel);
+             min_:=0;
+          end;
+        { local label in order to avoid using GOT }
+        current_asmdata.getlabel(tablelabel,alt_data);
+        indexreg:=cg.makeregsize(current_asmdata.CurrAsmList,hregister,OS_ADDR);
+        cg.a_load_reg_reg(current_asmdata.CurrAsmList,opcgsize,OS_ADDR,hregister,indexreg);
+        { load table address }
+        reference_reset_symbol(href,tablelabel,0,4);
+        basereg:=cg.getaddressregister(current_asmdata.CurrAsmList);
+        cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,basereg);
+        { load table slot, 32-bit sign extended }
+        reference_reset_base(href,basereg,0,4);
+        href.index:=indexreg;
+        href.shiftmode:=SM_LSL;
+        href.shiftimm:=2;
+        jumpreg:=cg.getaddressregister(current_asmdata.CurrAsmList);
+        cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_S32,OS_ADDR,href,jumpreg);
+        { add table address }
+        cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_ADD,OS_ADDR,basereg,jumpreg);
+        { and finally jump }
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_BR,jumpreg));
+        { generate jump table }
+        if not(target_info.system in systems_darwin) then
+          sectype:=sec_rodata
+        else
+          begin
+            { on Mac OS X, dead code stripping ("smart linking") happens based on
+              global symbols: every global/static symbol (symbols that do not
+              start with "L") marks the start of a new "subsection" that is
+              discarded by the linker if there are no references to this symbol.
+              This means that if you put the jump table in the rodata section, it
+              will become part of the block of data associated with the previous
+              non-L-label in the rodata section and stay or be thrown away
+              depending on whether that block of data is referenced. Therefore,
+              jump tables must be added in the code section and since aktlocaldata
+              is inserted right after the routine, it will become part of the
+              same subsection that contains the routine's code }
+            sectype:=sec_code;
+          end;
+        new_section(current_procinfo.aktlocaldata,sectype,current_procinfo.procdef.mangledname,4);
+        if target_info.system in systems_darwin then
+          begin
+            { additionally, these tables are now marked via ".data_region jt32"
+              and ".end_data_region" }
+            current_procinfo.aktlocaldata.concat(tai_directive.Create(asd_data_region,'jt32'));
+          end;
+        current_procinfo.aktlocaldata.concat(Tai_label.Create(tablelabel));
+        genitem(current_procinfo.aktlocaldata,hp);
+        if target_info.system in systems_darwin then
+          current_procinfo.aktlocaldata.concat(tai_directive.Create(asd_end_data_region,''));
+      end;
+
+
+begin
+   ccasenode:=taarch64casenode;
+end.

+ 5 - 0
compiler/aarch64/ra64con.inc

@@ -64,6 +64,8 @@ NR_W30 = tregister($0104001E);
 NR_X30 = tregister($0105001E);
 NR_X30 = tregister($0105001E);
 NR_WZR = tregister($0104001F);
 NR_WZR = tregister($0104001F);
 NR_XZR = tregister($0105001F);
 NR_XZR = tregister($0105001F);
+NR_WSP = tregister($01040020);
+NR_SP = tregister($01050020);
 NR_B0 = tregister($04010000);
 NR_B0 = tregister($04010000);
 NR_H0 = tregister($04030000);
 NR_H0 = tregister($04030000);
 NR_S0 = tregister($04090000);
 NR_S0 = tregister($04090000);
@@ -225,3 +227,6 @@ NR_S31 = tregister($0409001F);
 NR_D31 = tregister($040a001F);
 NR_D31 = tregister($040a001F);
 NR_Q31 = tregister($0405001F);
 NR_Q31 = tregister($0405001F);
 NR_NZCV = tregister($05000000);
 NR_NZCV = tregister($05000000);
+NR_FPCR = tregister($05000001);
+NR_FPSR = tregister($05000002);
+NR_TPIDR_EL0 = tregister($05000003);

+ 162 - 157
compiler/aarch64/ra64dwa.inc

@@ -64,164 +64,169 @@
 30,
 30,
 31,
 31,
 31,
 31,
+31,
+31,
+64,
+64,
+64,
+64,
+64,
+65,
+65,
+65,
+65,
+65,
+66,
+66,
+66,
+66,
+66,
+67,
+67,
+67,
+67,
+67,
+68,
+68,
+68,
+68,
+68,
+69,
+69,
+69,
+69,
+69,
+70,
+70,
+70,
+70,
+70,
+71,
+71,
+71,
+71,
+71,
+72,
+72,
+72,
+72,
+72,
+73,
+73,
+73,
+73,
+73,
+74,
+74,
+74,
+74,
+74,
+75,
+75,
+75,
+75,
+75,
+76,
+76,
+76,
+76,
+76,
+77,
+77,
+77,
+77,
+77,
+78,
+78,
+78,
+78,
+78,
+79,
+79,
+79,
+79,
+79,
+80,
+80,
+80,
+80,
+80,
+81,
+81,
+81,
+81,
+81,
+82,
+82,
+82,
+82,
+82,
+83,
+83,
+83,
+83,
+83,
+84,
+84,
+84,
+84,
+84,
+85,
+85,
+85,
+85,
+85,
+86,
+86,
+86,
+86,
+86,
+87,
+87,
+87,
+87,
+87,
+88,
+88,
+88,
+88,
+88,
+89,
+89,
+89,
+89,
+89,
+90,
+90,
+90,
+90,
+90,
+91,
+91,
+91,
+91,
+91,
+92,
+92,
+92,
+92,
+92,
+93,
+93,
+93,
+93,
+93,
+94,
+94,
+94,
+94,
+94,
+95,
+95,
+95,
+95,
+95,
 0,
 0,
 0,
 0,
 0,
 0,
-0,
-0,
-1,
-1,
-1,
-1,
-1,
-2,
-2,
-2,
-2,
-2,
-3,
-3,
-3,
-3,
-3,
-4,
-4,
-4,
-4,
-4,
-5,
-5,
-5,
-5,
-5,
-6,
-6,
-6,
-6,
-6,
-7,
-7,
-7,
-7,
-7,
-8,
-8,
-8,
-8,
-8,
-9,
-9,
-9,
-9,
-9,
-10,
-10,
-10,
-10,
-10,
-11,
-11,
-11,
-11,
-11,
-12,
-12,
-12,
-12,
-12,
-13,
-13,
-13,
-13,
-13,
-14,
-14,
-14,
-14,
-14,
-15,
-15,
-15,
-15,
-15,
-16,
-16,
-16,
-16,
-16,
-17,
-17,
-17,
-17,
-17,
-18,
-18,
-18,
-18,
-18,
-19,
-19,
-19,
-19,
-19,
-20,
-20,
-20,
-20,
-20,
-21,
-21,
-21,
-21,
-21,
-22,
-22,
-22,
-22,
-22,
-23,
-23,
-23,
-23,
-23,
-24,
-24,
-24,
-24,
-24,
-25,
-25,
-25,
-25,
-25,
-26,
-26,
-26,
-26,
-26,
-27,
-27,
-27,
-27,
-27,
-28,
-28,
-28,
-28,
-28,
-29,
-29,
-29,
-29,
-29,
-30,
-30,
-30,
-30,
-30,
-31,
-31,
-31,
-31,
-31,
 0
 0

+ 1 - 1
compiler/aarch64/ra64nor.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from a64reg.dat }
 { don't edit, this file is generated from a64reg.dat }
-226
+231

+ 6 - 1
compiler/aarch64/ra64num.inc

@@ -64,6 +64,8 @@ tregister($0104001E),
 tregister($0105001E),
 tregister($0105001E),
 tregister($0104001F),
 tregister($0104001F),
 tregister($0105001F),
 tregister($0105001F),
+tregister($01040020),
+tregister($01050020),
 tregister($04010000),
 tregister($04010000),
 tregister($04030000),
 tregister($04030000),
 tregister($04090000),
 tregister($04090000),
@@ -224,4 +226,7 @@ tregister($0403001F),
 tregister($0409001F),
 tregister($0409001F),
 tregister($040a001F),
 tregister($040a001F),
 tregister($0405001F),
 tregister($0405001F),
-tregister($05000000)
+tregister($05000000),
+tregister($05000001),
+tregister($05000002),
+tregister($05000003)

+ 101 - 96
compiler/aarch64/ra64rni.inc

@@ -32,6 +32,7 @@
 59,
 59,
 61,
 61,
 63,
 63,
+65,
 2,
 2,
 4,
 4,
 6,
 6,
@@ -64,102 +65,7 @@
 60,
 60,
 62,
 62,
 64,
 64,
-65,
-70,
-75,
-80,
-85,
-90,
-95,
-100,
-105,
-110,
-115,
-120,
-125,
-130,
-135,
-140,
-145,
-150,
-155,
-160,
-165,
-170,
-175,
-180,
-185,
-190,
-195,
-200,
-205,
-210,
-215,
-220,
 66,
 66,
-71,
-76,
-81,
-86,
-91,
-96,
-101,
-106,
-111,
-116,
-121,
-126,
-131,
-136,
-141,
-146,
-151,
-156,
-161,
-166,
-171,
-176,
-181,
-186,
-191,
-196,
-201,
-206,
-211,
-216,
-221,
-69,
-74,
-79,
-84,
-89,
-94,
-99,
-104,
-109,
-114,
-119,
-124,
-129,
-134,
-139,
-144,
-149,
-154,
-159,
-164,
-169,
-174,
-179,
-184,
-189,
-194,
-199,
-204,
-209,
-214,
-219,
-224,
 67,
 67,
 72,
 72,
 77,
 77,
@@ -224,4 +130,103 @@
 213,
 213,
 218,
 218,
 223,
 223,
-225
+71,
+76,
+81,
+86,
+91,
+96,
+101,
+106,
+111,
+116,
+121,
+126,
+131,
+136,
+141,
+146,
+151,
+156,
+161,
+166,
+171,
+176,
+181,
+186,
+191,
+196,
+201,
+206,
+211,
+216,
+221,
+226,
+69,
+74,
+79,
+84,
+89,
+94,
+99,
+104,
+109,
+114,
+119,
+124,
+129,
+134,
+139,
+144,
+149,
+154,
+159,
+164,
+169,
+174,
+179,
+184,
+189,
+194,
+199,
+204,
+209,
+214,
+219,
+224,
+70,
+75,
+80,
+85,
+90,
+95,
+100,
+105,
+110,
+115,
+120,
+125,
+130,
+135,
+140,
+145,
+150,
+155,
+160,
+165,
+170,
+175,
+180,
+185,
+190,
+195,
+200,
+205,
+210,
+215,
+220,
+225,
+227,
+228,
+229,
+230

+ 48 - 43
compiler/aarch64/ra64sri.inc

@@ -1,8 +1,39 @@
 { don't edit, this file is generated from a64reg.dat }
 { don't edit, this file is generated from a64reg.dat }
 0,
 0,
-65,
+67,
+72,
+117,
+122,
+127,
+132,
+137,
+142,
+147,
+152,
+157,
+162,
+77,
+167,
+172,
+177,
+182,
+187,
+192,
+197,
+202,
+207,
+212,
+82,
+217,
+222,
+87,
+92,
+97,
+102,
+107,
+112,
 70,
 70,
-115,
+75,
 120,
 120,
 125,
 125,
 130,
 130,
@@ -12,8 +43,8 @@
 150,
 150,
 155,
 155,
 160,
 160,
-75,
 165,
 165,
+80,
 170,
 170,
 175,
 175,
 180,
 180,
@@ -23,15 +54,18 @@
 200,
 200,
 205,
 205,
 210,
 210,
-80,
 215,
 215,
-220,
 85,
 85,
+220,
+225,
 90,
 90,
 95,
 95,
 100,
 100,
 105,
 105,
 110,
 110,
+115,
+228,
+229,
 68,
 68,
 73,
 73,
 118,
 118,
@@ -64,9 +98,9 @@
 103,
 103,
 108,
 108,
 113,
 113,
-66,
+227,
 71,
 71,
-116,
+76,
 121,
 121,
 126,
 126,
 131,
 131,
@@ -76,8 +110,8 @@
 151,
 151,
 156,
 156,
 161,
 161,
-76,
 166,
 166,
+81,
 171,
 171,
 176,
 176,
 181,
 181,
@@ -87,16 +121,16 @@
 201,
 201,
 206,
 206,
 211,
 211,
-81,
 216,
 216,
-221,
 86,
 86,
+221,
+226,
 91,
 91,
 96,
 96,
 101,
 101,
 106,
 106,
 111,
 111,
-225,
+116,
 69,
 69,
 74,
 74,
 119,
 119,
@@ -129,38 +163,8 @@
 104,
 104,
 109,
 109,
 114,
 114,
-67,
-72,
-117,
-122,
-127,
-132,
-137,
-142,
-147,
-152,
-157,
-162,
-77,
-167,
-172,
-177,
-182,
-187,
-192,
-197,
-202,
-207,
-212,
-82,
-217,
-222,
-87,
-92,
-97,
-102,
-107,
-112,
+66,
+230,
 1,
 1,
 3,
 3,
 21,
 21,
@@ -192,6 +196,7 @@
 15,
 15,
 17,
 17,
 19,
 19,
+65,
 63,
 63,
 2,
 2,
 4,
 4,

+ 162 - 157
compiler/aarch64/ra64sta.inc

@@ -64,164 +64,169 @@
 30,
 30,
 31,
 31,
 31,
 31,
+31,
+31,
+64,
+64,
+64,
+64,
+64,
+65,
+65,
+65,
+65,
+65,
+66,
+66,
+66,
+66,
+66,
+67,
+67,
+67,
+67,
+67,
+68,
+68,
+68,
+68,
+68,
+69,
+69,
+69,
+69,
+69,
+70,
+70,
+70,
+70,
+70,
+71,
+71,
+71,
+71,
+71,
+72,
+72,
+72,
+72,
+72,
+73,
+73,
+73,
+73,
+73,
+74,
+74,
+74,
+74,
+74,
+75,
+75,
+75,
+75,
+75,
+76,
+76,
+76,
+76,
+76,
+77,
+77,
+77,
+77,
+77,
+78,
+78,
+78,
+78,
+78,
+79,
+79,
+79,
+79,
+79,
+80,
+80,
+80,
+80,
+80,
+81,
+81,
+81,
+81,
+81,
+82,
+82,
+82,
+82,
+82,
+83,
+83,
+83,
+83,
+83,
+84,
+84,
+84,
+84,
+84,
+85,
+85,
+85,
+85,
+85,
+86,
+86,
+86,
+86,
+86,
+87,
+87,
+87,
+87,
+87,
+88,
+88,
+88,
+88,
+88,
+89,
+89,
+89,
+89,
+89,
+90,
+90,
+90,
+90,
+90,
+91,
+91,
+91,
+91,
+91,
+92,
+92,
+92,
+92,
+92,
+93,
+93,
+93,
+93,
+93,
+94,
+94,
+94,
+94,
+94,
+95,
+95,
+95,
+95,
+95,
 0,
 0,
 0,
 0,
 0,
 0,
-0,
-0,
-1,
-1,
-1,
-1,
-1,
-2,
-2,
-2,
-2,
-2,
-3,
-3,
-3,
-3,
-3,
-4,
-4,
-4,
-4,
-4,
-5,
-5,
-5,
-5,
-5,
-6,
-6,
-6,
-6,
-6,
-7,
-7,
-7,
-7,
-7,
-8,
-8,
-8,
-8,
-8,
-9,
-9,
-9,
-9,
-9,
-10,
-10,
-10,
-10,
-10,
-11,
-11,
-11,
-11,
-11,
-12,
-12,
-12,
-12,
-12,
-13,
-13,
-13,
-13,
-13,
-14,
-14,
-14,
-14,
-14,
-15,
-15,
-15,
-15,
-15,
-16,
-16,
-16,
-16,
-16,
-17,
-17,
-17,
-17,
-17,
-18,
-18,
-18,
-18,
-18,
-19,
-19,
-19,
-19,
-19,
-20,
-20,
-20,
-20,
-20,
-21,
-21,
-21,
-21,
-21,
-22,
-22,
-22,
-22,
-22,
-23,
-23,
-23,
-23,
-23,
-24,
-24,
-24,
-24,
-24,
-25,
-25,
-25,
-25,
-25,
-26,
-26,
-26,
-26,
-26,
-27,
-27,
-27,
-27,
-27,
-28,
-28,
-28,
-28,
-28,
-29,
-29,
-29,
-29,
-29,
-30,
-30,
-30,
-30,
-30,
-31,
-31,
-31,
-31,
-31,
 0
 0

+ 6 - 1
compiler/aarch64/ra64std.inc

@@ -64,6 +64,8 @@
 'x30',
 'x30',
 'wzr',
 'wzr',
 'xzr',
 'xzr',
+'wsp',
+'sp',
 'b0',
 'b0',
 'h0',
 'h0',
 's0',
 's0',
@@ -224,4 +226,7 @@
 's31',
 's31',
 'd31',
 'd31',
 'q31',
 'q31',
-'nzcv'
+'nzcv',
+'fpcr',
+'fpsr',
+'tpidr_el0'

+ 5 - 0
compiler/aarch64/ra64sup.inc

@@ -64,6 +64,8 @@ RS_W30 = $1E;
 RS_X30 = $1E;
 RS_X30 = $1E;
 RS_WZR = $1F;
 RS_WZR = $1F;
 RS_XZR = $1F;
 RS_XZR = $1F;
+RS_WSP = $20;
+RS_SP = $20;
 RS_B0 = $00;
 RS_B0 = $00;
 RS_H0 = $00;
 RS_H0 = $00;
 RS_S0 = $00;
 RS_S0 = $00;
@@ -225,3 +227,6 @@ RS_S31 = $1F;
 RS_D31 = $1F;
 RS_D31 = $1F;
 RS_Q31 = $1F;
 RS_Q31 = $1F;
 RS_NZCV = $00;
 RS_NZCV = $00;
+RS_FPCR = $01;
+RS_FPSR = $02;
+RS_TPIDR_EL0 = $03;

+ 88 - 0
compiler/aarch64/racpu.pas

@@ -0,0 +1,88 @@
+{
+    Copyright (c) 1998-2003 by Carl Eric Codere and Peter Vreman
+    Copyright (c) 2014 by Jonas Maebe
+
+    Handles the common AArch64 assembler reader routines
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit racpu;
+
+{$i fpcdefs.inc}
+
+  interface
+
+    uses
+      cgbase,
+      cpubase,
+      aasmtai,aasmdata,
+      rautils;
+
+    type
+      TAArch64Operand=class(TOperand)
+      end;
+
+      TAArch64Instruction=class(TInstruction)
+        oppostfix : toppostfix;
+        function ConcatInstruction(p:TAsmList) : tai;override;
+        function Is64bit: boolean;
+        function cgsize: tcgsize;
+      end;
+
+  implementation
+
+    uses
+      verbose,
+      aasmcpu;
+
+    function TAArch64Instruction.ConcatInstruction(p:TAsmList) : tai;
+      begin
+        result:=inherited ConcatInstruction(p);
+        taicpu(result).oppostfix:=oppostfix;
+      end;
+
+
+    function TAArch64Instruction.Is64bit: boolean;
+      begin
+        result:=
+          (operands[1].opr.typ=OPR_REGISTER) and
+          (getsubreg(operands[1].opr.reg)=R_SUBQ);
+      end;
+
+    function TAArch64Instruction.cgsize: tcgsize;
+      begin
+        if ops<1 then
+          internalerror(2014122001);
+        if operands[1].opr.typ<>OPR_REGISTER then
+          internalerror(2014122002);
+        result:=reg_cgsize(operands[1].opr.reg);
+        { a 32 bit integer register could actually be 16 or 8 bit }
+        if result=OS_32 then
+          case oppostfix of
+            PF_B:
+              result:=OS_8;
+            PF_SB:
+              result:=OS_S8;
+            PF_H:
+              result:=OS_16;
+            PF_SH:
+              result:=OS_S16;
+          end;
+      end;
+
+
+end.

+ 1053 - 0
compiler/aarch64/racpugas.pas

@@ -0,0 +1,1053 @@
+{
+    Copyright (c) 1998-2002 by Carl Eric Codere and Peter Vreman
+    Copyright (c) 2014 by Jonas Maebe
+
+    Does the parsing for the AArch64 GNU AS styled inline assembler.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+Unit racpugas;
+
+{$i fpcdefs.inc}
+
+  Interface
+
+    uses
+      raatt,racpu,
+      cpubase;
+
+    type
+      taarch64attreader = class(tattreader)
+        actoppostfix : TOpPostfix;
+        function is_asmopcode(const s: string):boolean;override;
+        function is_register(const s:string):boolean;override;
+        procedure handleopcode;override;
+        procedure BuildReference(oper: taarch64operand; is64bit: boolean);
+        procedure BuildOperand(oper: taarch64operand; is64bit: boolean);
+        function TryBuildShifterOp(instr: taarch64instruction; opnr: longint) : boolean;
+        procedure BuildOpCode(instr: taarch64instruction);
+        procedure ReadSym(oper: taarch64operand; is64bit: boolean);
+        procedure ConvertCalljmp(instr: taarch64instruction);
+        function ToConditionCode(const hs: string; is_operand: boolean): tasmcond;
+      end;
+
+
+  Implementation
+
+    uses
+      { helpers }
+      cutils,
+      { global }
+      globtype,verbose,
+      systems,aasmbase,aasmtai,aasmdata,aasmcpu,
+      { symtable }
+      symconst,symsym,
+      procinfo,
+      rabase,rautils,
+      cgbase,cgutils;
+
+
+    function taarch64attreader.is_register(const s:string):boolean;
+      type
+        treg2str = record
+          name : string[3];
+          reg : tregister;
+        end;
+
+      const
+        extraregs : array[0..3] of treg2str = (
+          (name: 'FP' ; reg: NR_FP),
+          (name: 'LR' ; reg: NR_LR),
+          (name: 'IP0'; reg: NR_IP0),
+          (name: 'IP1'; reg: NR_IP1));
+
+      var
+        i : longint;
+
+      begin
+        result:=inherited is_register(s);
+        { reg found?
+          possible aliases are always 2 or 3 chars
+        }
+        if result or not(length(s) in [2,3]) then
+          exit;
+        for i:=low(extraregs) to high(extraregs) do
+          begin
+            if s=extraregs[i].name then
+              begin
+                actasmregister:=extraregs[i].reg;
+                result:=true;
+                actasmtoken:=AS_REGISTER;
+                exit;
+              end;
+          end;
+      end;
+
+
+    procedure taarch64attreader.ReadSym(oper: taarch64operand; is64bit: boolean);
+      var
+         tempstr, mangledname : string;
+         typesize,l,k: aint;
+      begin
+        tempstr:=actasmpattern;
+        Consume(AS_ID);
+        { typecasting? }
+        if (actasmtoken=AS_LPAREN) and
+           SearchType(tempstr,typesize) then
+          begin
+            oper.hastype:=true;
+            Consume(AS_LPAREN);
+            BuildOperand(oper,is64bit);
+            Consume(AS_RPAREN);
+            if oper.opr.typ in [OPR_REFERENCE,OPR_LOCAL] then
+              oper.SetSize(typesize,true);
+          end
+        else
+          if not oper.SetupVar(tempstr,false) then
+            Message1(sym_e_unknown_id,tempstr);
+        { record.field ? }
+        if actasmtoken=AS_DOT then
+          begin
+            BuildRecordOffsetSize(tempstr,l,k,mangledname,false);
+            if (mangledname<>'') then
+              Message(asmr_e_invalid_reference_syntax);
+            inc(oper.opr.ref.offset,l);
+          end;
+      end;
+
+
+    Procedure taarch64attreader.BuildReference(oper: taarch64operand; is64bit: boolean);
+
+      procedure do_error;
+        begin
+          Message(asmr_e_invalid_reference_syntax);
+          RecoverConsume(false);
+        end;
+
+
+      procedure test_end(require_rbracket : boolean);
+        begin
+          if require_rbracket then begin
+            if not(actasmtoken=AS_RBRACKET) then
+              begin
+                do_error;
+                exit;
+              end
+            else
+              Consume(AS_RBRACKET);
+            if (actasmtoken=AS_NOT) then
+              begin
+                oper.opr.ref.addressmode:=AM_PREINDEXED;
+                Consume(AS_NOT);
+              end;
+          end;
+          if not(actasmtoken in [AS_SEPARATOR,AS_end]) then
+            do_error
+          else
+            begin
+{$IFDEF debugasmreader}
+              writeln('TEST_end_FINAL_OK. Created the following ref:');
+              writeln('oper.opr.ref.shiftimm=',oper.opr.ref.shiftimm);
+              writeln('oper.opr.ref.shiftmode=',ord(oper.opr.ref.shiftmode));
+              writeln('oper.opr.ref.index=',ord(oper.opr.ref.index));
+              writeln('oper.opr.ref.base=',ord(oper.opr.ref.base));
+              writeln('oper.opr.ref.signindex=',ord(oper.opr.ref.signindex));
+              writeln('oper.opr.ref.addressmode=',ord(oper.opr.ref.addressmode));
+              writeln;
+{$endIF debugasmreader}
+            end;
+        end;
+
+
+      function is_shifter_ref_operation(var a : tshiftmode) : boolean;
+        begin
+          a:=SM_NONE;
+          if (actasmpattern='LSL') then
+            a:=SM_LSL
+          else if (actasmpattern='UXTW') then
+            a:=SM_UXTW
+          else if (actasmpattern='SXTW') then
+            a:=SM_SXTW
+          else if (actasmpattern='SXTX') then
+            a:=SM_SXTX;
+          is_shifter_ref_operation:=not(a=SM_NONE);
+        end;
+
+
+      procedure read_index_shift(require_rbracket : boolean);
+        var
+          shift: aint;
+        begin
+          case actasmtoken of
+            AS_COMMA :
+              begin
+                Consume(AS_COMMA);
+                if not(actasmtoken=AS_ID) then
+                  do_error;
+                if is_shifter_ref_operation(oper.opr.ref.shiftmode) then
+                  begin
+                    Consume(actasmtoken);
+                    if actasmtoken=AS_HASH then
+                      begin
+                        Consume(AS_HASH);
+                        shift:=BuildConstExpression(false,true);
+                        if not(shift in [0,2+ord(is64bit)]) then
+                          do_error;
+                        oper.opr.ref.shiftimm:=shift;
+                        test_end(require_rbracket);
+                      end;
+                   end
+                 else
+                   begin
+                     do_error;
+                     exit;
+                   end;
+              end;
+            AS_RBRACKET :
+              if require_rbracket then
+                test_end(require_rbracket)
+              else
+                begin
+                  do_error;
+                  exit;
+                end;
+            AS_SEPARATOR,AS_END :
+              if not require_rbracket then
+                test_end(false)
+               else
+                 do_error;
+            else
+              begin
+                do_error;
+                exit;
+              end;
+          end;
+        end;
+
+
+      procedure read_index(require_rbracket : boolean);
+        var
+          recname : string;
+          o_int,s_int : aint;
+        begin
+          case actasmtoken of
+            AS_REGISTER :
+              begin
+                if getsupreg(actasmregister)=RS_XZR then
+                  Message1(asmr_e_invalid_ref_register,actasmpattern);
+                oper.opr.ref.index:=actasmregister;
+                Consume(AS_REGISTER);
+                read_index_shift(require_rbracket);
+                exit;
+              end;
+            AS_HASH : // constant
+              begin
+                Consume(AS_HASH);
+(*
+                if actasmtoken=AS_COLON then
+                  begin
+                    consume(AS_COLON);
+                    { GNU-style lower 12 bits of address of non-GOT-based
+                      access }
+                    if (actasmpattern='LO12') then
+                      begin
+                        consume(actasmtoken);
+                        consume(AS_COLON);
+                        if not oper.SetupVar(actasmpattern,false) then
+                          begin
+                            do_error;
+                            exit
+                          end;
+                        consume(AS_ID);
+                        oper.opr.ref.refaddr:=addr_??? (not gotpageoffset);
+                      end
+                    else
+                      begin
+                        do_error;
+                        exit
+                      end;
+                  end
+                else
+*)
+                  begin
+                    o_int:=BuildConstExpression(false,true);
+                    inc(oper.opr.ref.offset,o_int);
+                  end;
+                test_end(require_rbracket);
+                exit;
+              end;
+            AS_ID :
+              begin
+                recname:=actasmpattern;
+                Consume(AS_ID);
+                { Apple-style got page offset }
+                if actasmtoken=AS_AT then
+                  begin
+                    if not oper.SetupVar(recname,false) then
+                      begin
+                        do_error;
+                        exit
+                      end;
+                    consume(AS_AT);
+                    if actasmpattern='GOTPAGEOFF' then
+                      begin
+                        consume(actasmtoken);
+                        oper.opr.ref.refaddr:=addr_gotpageoffset;
+                      end
+                    else if actasmpattern='PAGEOFF' then
+                      begin
+                        consume(actasmtoken);
+                        oper.opr.ref.refaddr:=addr_pageoffset;
+                      end
+                    else
+                      begin
+                        do_error;
+                        exit
+                      end;
+                  end
+                else
+                  begin
+                    BuildRecordOffsetSize(recname,o_int,s_int,recname,false);
+                    inc(oper.opr.ref.offset,o_int);
+                  end;
+                test_end(require_rbracket);
+                exit;
+              end;
+            AS_AT:
+              begin
+                do_error;
+                exit;
+              end;
+            AS_RBRACKET :
+              begin
+                if require_rbracket then
+                  begin
+                    test_end(require_rbracket);
+                    exit;
+                  end
+                else
+                  begin
+                    do_error; // unexpected rbracket
+                    exit;
+                  end;
+              end;
+            AS_SEPARATOR,AS_end :
+              begin
+                if not require_rbracket then
+                  begin
+                    test_end(false);
+                    exit;
+                  end
+                else
+                  begin
+                    do_error;
+                    exit;
+                  end;
+              end;
+            else
+              begin
+                // unexpected token
+                do_error;
+                exit;
+              end;
+          end; // case
+        end;
+
+
+      procedure try_prepostindexed;
+        begin
+          Consume(AS_RBRACKET);
+          case actasmtoken of
+            AS_COMMA :
+              begin // post-indexed
+                Consume(AS_COMMA);
+                oper.opr.ref.addressmode:=AM_POSTINDEXED;
+                read_index(false);
+                exit;
+              end;
+            AS_NOT :
+              begin   // pre-indexed
+                Consume(AS_NOT);
+                oper.opr.ref.addressmode:=AM_PREINDEXED;
+                test_end(false);
+                exit;
+              end;
+            else
+              begin
+                test_end(false);
+                exit;
+              end;
+          end; // case
+        end;
+
+      begin
+        Consume(AS_LBRACKET);
+        oper.opr.ref.addressmode:=AM_OFFSET; // assume "neither PRE nor POST inc"
+        if actasmtoken=AS_REGISTER then
+          begin
+            if getsupreg(actasmregister)=RS_XZR then
+              Message1(asmr_e_invalid_ref_register,actasmpattern);
+            oper.opr.ref.base:=actasmregister;
+            Consume(AS_REGISTER);
+            case actasmtoken of
+              AS_RBRACKET :
+                begin
+                  try_prepostindexed;
+                  exit;
+                end;
+              AS_COMMA :
+                begin
+                  Consume(AS_COMMA);
+                  read_index(true);
+                  exit;
+                end;
+              else
+                begin
+                  Message(asmr_e_invalid_reference_syntax);
+                  RecoverConsume(false);
+                end;
+            end;
+          end
+        else
+          Begin
+            case actasmtoken of
+              AS_ID :
+                begin
+                  { TODO: local variables and parameters }
+                  Message(asmr_e_invalid_reference_syntax);
+                  RecoverConsume(false);
+                  exit;
+                end;
+              else
+                begin // elsecase
+                  Message(asmr_e_invalid_reference_syntax);
+                  RecoverConsume(false);
+                  exit;
+                end;
+            end;
+          end;
+      end;
+
+
+    function taarch64attreader.TryBuildShifterOp(instr: taarch64instruction; opnr: longint): boolean;
+
+      procedure handlepara(sm : tshiftmode);
+        begin
+          consume(AS_ID);
+          fillchar(instr.operands[opnr].opr,sizeof(instr.operands[opnr].opr),0);
+          instr.operands[opnr].opr.typ:=OPR_SHIFTEROP;
+          instr.operands[opnr].opr.shifterop.shiftmode:=sm;
+          if (sm=SM_LSL) or
+             (actasmtoken=AS_HASH) then
+            begin
+              consume(AS_HASH);
+              instr.operands[opnr].opr.shifterop.shiftimm:=BuildConstExpression(false,false);
+            end;
+        end;
+
+      const
+        shiftmode2str: array[SM_LSL..SM_SXTX] of string[4] =
+          ('LSL','LSR','ASR',
+           'UXTB','UXTH','UXTW','UXTX',
+           'SXTB','SXTH','SXTW','SXTX');
+      var
+        sm: tshiftmode;
+        i: longint;
+        usessp,
+        useszr: boolean;
+      begin
+        result:=false;
+        if (actasmtoken=AS_ID) then
+          begin
+            for sm:=low(shiftmode2str) to high(shiftmode2str) do
+              if actasmpattern=shiftmode2str[sm] then
+                begin
+                  handlepara(sm);
+                  if instr.operands[1].opr.typ=OPR_REGISTER then
+                    begin
+                      { the possible shifter ops depend on whether this
+                        instruction uses sp and/or zr }
+                      usessp:=false;
+                      useszr:=false;
+                      for i:=low(instr.operands) to pred(opnr) do
+                        begin
+                          if (instr.operands[1].opr.typ=OPR_REGISTER) then
+                            case getsupreg(instr.operands[1].opr.reg) of
+                              RS_XZR:
+                                useszr:=true;
+                              RS_SP:
+                                usessp:=true;
+                            end;
+                        end;
+                      result:=valid_shifter_operand(instr.opcode,useszr,usessp,instr.Is64bit,sm,instr.operands[opnr].opr.shifterop.shiftimm);
+                    end
+                end;
+          end;
+      end;
+
+
+    function taarch64attreader.ToConditionCode(const hs: string; is_operand: boolean): tasmcond;
+      begin
+        case actopcode of
+          A_CSEL,A_CSINC,A_CSINV,A_CSNEG,A_CSET,A_CSETM,
+          A_CINC,A_CINV,A_CNEG,A_CCMN,A_CCMP,
+          A_B:
+            begin
+              { search for condition, conditions are always 2 chars }
+              if (is_operand<>(actopcode=A_B)) and
+                 (length(hs)>1) then
+                begin
+                  { workaround for DFA bug }
+                  result:=low(tasmcond);
+                  for result:=low(tasmcond) to high(tasmcond) do
+                    begin
+                      if hs=uppercond2str[result] then
+                        exit;
+                    end;
+                end;
+            end;
+        end;
+        result:=C_None;;
+      end;
+
+
+    Procedure taarch64attreader.BuildOperand(oper: taarch64operand; is64bit: boolean);
+      var
+        expr: string;
+        typesize, l: aint;
+
+        procedure MaybeAddGotAddrMode;
+          begin
+            if actasmtoken=AS_AT then
+              begin
+                consume(AS_AT);
+                if actasmpattern='GOTPAGE' then
+                  oper.opr.ref.refaddr:=addr_gotpage
+                else if actasmpattern='GOTPAGEOFF' then
+                  oper.opr.ref.refaddr:=addr_gotpageoffset
+                else if actasmpattern='PAGE' then
+                  oper.opr.ref.refaddr:=addr_page
+                else if actasmpattern='PAGEOFF' then
+                  oper.opr.ref.refaddr:=addr_pageoffset
+                else
+                  Message(asmr_e_expr_illegal);
+                consume(actasmtoken);
+              end
+            else
+              oper.opr.ref.refaddr:=addr_pic;
+          end;
+
+        procedure AddLabelOperand(hl:tasmlabel);
+          begin
+            if not(actasmtoken in [AS_PLUS,AS_MINUS,AS_LPAREN]) and
+               is_calljmp(actopcode) then
+             begin
+               oper.opr.typ:=OPR_SYMBOL;
+               oper.opr.symbol:=hl;
+             end
+            else if (actopcode=A_ADR) or
+               (actopcode=A_ADRP) then
+              begin
+                oper.InitRef;
+                MaybeAddGotAddrMode;
+                oper.opr.ref.symbol:=hl;
+                if (actasmtoken in [AS_PLUS, AS_MINUS]) then
+                  begin
+                    l:=BuildConstExpression(true,false);
+                    oper.opr.ref.offset:=l;
+                  end;
+              end;
+          end;
+
+
+        procedure MaybeRecordOffset;
+          var
+            mangledname: string;
+            hasdot  : boolean;
+            l,
+            toffset,
+            tsize   : aint;
+          begin
+            if not(actasmtoken in [AS_DOT,AS_PLUS,AS_MINUS]) then
+              exit;
+            l:=0;
+            hasdot:=(actasmtoken=AS_DOT);
+            if hasdot then
+              begin
+                if expr<>'' then
+                  begin
+                    BuildRecordOffsetSize(expr,toffset,tsize,mangledname,false);
+                    if (oper.opr.typ<>OPR_CONSTANT) and
+                       (mangledname<>'') then
+                      Message(asmr_e_wrong_sym_type);
+                    inc(l,toffset);
+                    oper.SetSize(tsize,true);
+                  end;
+              end;
+            if actasmtoken in [AS_PLUS,AS_MINUS] then
+              inc(l,BuildConstExpression(true,false));
+            case oper.opr.typ of
+              OPR_LOCAL :
+                begin
+                  { don't allow direct access to fields of parameters, because that
+                    will generate buggy code. Allow it only for explicit typecasting }
+                  if hasdot and
+                     (not oper.hastype) and
+                     (tabstractnormalvarsym(oper.opr.localsym).owner.symtabletype=parasymtable) and
+                     (current_procinfo.procdef.proccalloption<>pocall_register) then
+                    Message(asmr_e_cannot_access_field_directly_for_parameters);
+                  inc(oper.opr.localsymofs,l)
+                end;
+              OPR_CONSTANT :
+                inc(oper.opr.val,l);
+              OPR_REFERENCE :
+                if (mangledname<>'') then
+                  begin
+                    if (oper.opr.val<>0) then
+                      Message(asmr_e_wrong_sym_type);
+                    oper.opr.typ:=OPR_SYMBOL;
+                    oper.opr.symbol:=current_asmdata.RefAsmSymbol(mangledname);
+                  end
+                else
+                  inc(oper.opr.val,l);
+              OPR_SYMBOL:
+                Message(asmr_e_invalid_symbol_ref);
+              else
+                internalerror(200309221);
+            end;
+          end;
+
+
+        function MaybeBuildReference(is64bit: boolean):boolean;
+          { Try to create a reference, if not a reference is found then false
+            is returned }
+          begin
+            MaybeBuildReference:=true;
+            case actasmtoken of
+              AS_INTNUM,
+              AS_MINUS,
+              AS_PLUS:
+                Begin
+                  oper.opr.ref.offset:=BuildConstExpression(True,False);
+                  if actasmtoken<>AS_LPAREN then
+                    Message(asmr_e_invalid_reference_syntax)
+                  else
+                    BuildReference(oper,is64bit);
+                end;
+              AS_LPAREN:
+                BuildReference(oper,is64bit);
+              AS_ID: { only a variable is allowed ... }
+                Begin
+                  ReadSym(oper,is64bit);
+                  case actasmtoken of
+                    AS_end,
+                    AS_SEPARATOR,
+                    AS_COMMA: ;
+                    AS_LPAREN:
+                      BuildReference(oper,is64bit);
+                  else
+                    Begin
+                      Message(asmr_e_invalid_reference_syntax);
+                      Consume(actasmtoken);
+                    end;
+                  end; {end case }
+                end;
+              else
+               MaybeBuildReference:=false;
+            end; { end case }
+          end;
+
+
+      var
+        tempreg: tregister;
+        hl: tasmlabel;
+        icond: tasmcond;
+      Begin
+        expr:='';
+        case actasmtoken of
+          AS_LBRACKET: { Memory reference or constant expression }
+            Begin
+              oper.InitRef;
+              BuildReference(oper,is64bit);
+            end;
+
+          AS_HASH: { Constant expression  }
+            Begin
+              Consume(AS_HASH);
+              BuildConstantOperand(oper);
+            end;
+
+          (*
+          AS_INTNUM,
+          AS_MINUS,
+          AS_PLUS:
+            Begin
+              { Constant memory offset }
+              { This must absolutely be followed by (  }
+              oper.InitRef;
+              oper.opr.ref.offset:=BuildConstExpression(True,False);
+              if actasmtoken<>AS_LPAREN then
+                begin
+                  ofs:=oper.opr.ref.offset;
+                  BuildConstantOperand(oper);
+                  inc(oper.opr.val,ofs);
+                end
+              else
+                BuildReference(oper,is64bit);
+            end;
+          *)
+          AS_ID: { A constant expression, or a Variable ref.  }
+            Begin
+              { Condition code? }
+              icond:=ToConditionCode(actasmpattern,true);
+              if icond<>C_None then
+                begin
+                  oper.opr.typ:=OPR_COND;
+                  oper.opr.cc:=icond;
+                  consume(AS_ID);
+                end
+              else
+              { Local Label ? }
+              if is_locallabel(actasmpattern) then
+               begin
+                 CreateLocalLabel(actasmpattern,hl,false);
+                 Consume(AS_ID);
+                 AddLabelOperand(hl);
+               end
+              else
+               { Check for label }
+               if SearchLabel(actasmpattern,hl,false) then
+                 begin
+                   Consume(AS_ID);
+                   AddLabelOperand(hl);
+                 end
+              else
+               { probably a variable or normal expression }
+               { or a procedure (such as in CALL ID)      }
+               begin
+                 { is it a constant ? }
+                 if SearchIConstant(actasmpattern,l) then
+                  begin
+                    if not (oper.opr.typ in [OPR_NONE,OPR_CONSTANT]) then
+                      Message(asmr_e_invalid_operand_type);
+                    BuildConstantOperand(oper);
+                  end
+                 else
+                  begin
+                    expr:=actasmpattern;
+                    Consume(AS_ID);
+                    { typecasting? }
+                    if (actasmtoken=AS_LPAREN) and
+                       SearchType(expr,typesize) then
+                     begin
+                       oper.hastype:=true;
+                       Consume(AS_LPAREN);
+                       BuildOperand(oper,is64bit);
+                       Consume(AS_RPAREN);
+                       if oper.opr.typ in [OPR_REFERENCE,OPR_LOCAL] then
+                         oper.SetSize(typesize,true);
+                     end
+                    else
+                     begin
+                       if not(oper.SetupVar(expr,false)) then
+                        Begin
+                          { look for special symbols ... }
+                          if expr= '__HIGH' then
+                            begin
+                              consume(AS_LPAREN);
+                              if not oper.setupvar('high'+actasmpattern,false) then
+                                Message1(sym_e_unknown_id,'high'+actasmpattern);
+                              consume(AS_ID);
+                              consume(AS_RPAREN);
+                            end
+                          else
+                           if expr = '__RESULT' then
+                            oper.SetUpResult
+                          else
+                           if expr = '__SELF' then
+                            oper.SetupSelf
+                          else
+                           if expr = '__OLDEBP' then
+                            oper.SetupOldEBP
+                          else
+                            Message1(sym_e_unknown_id,expr);
+                        end
+                       else
+                         MaybeAddGotAddrMode;
+                     end;
+                  end;
+                  if actasmtoken=AS_DOT then
+                    MaybeRecordOffset;
+                  { add a constant expression? }
+                  if (actasmtoken=AS_PLUS) then
+                   begin
+                     l:=BuildConstExpression(true,false);
+                     case oper.opr.typ of
+                       OPR_CONSTANT :
+                         inc(oper.opr.val,l);
+                       OPR_LOCAL :
+                         inc(oper.opr.localsymofs,l);
+                       OPR_REFERENCE :
+                         inc(oper.opr.ref.offset,l);
+                       else
+                         internalerror(200309202);
+                     end;
+                   end
+               end;
+              { Do we have a indexing reference, then parse it also }
+              if actasmtoken=AS_LPAREN then
+                BuildReference(oper,is64bit);
+            end;
+
+          { Register, a variable reference or a constant reference  }
+          AS_REGISTER:
+            Begin
+              { save the type of register used. }
+              tempreg:=actasmregister;
+              Consume(AS_REGISTER);
+              if (actasmtoken in [AS_end,AS_SEPARATOR,AS_COMMA]) then
+                Begin
+                  if not (oper.opr.typ in [OPR_NONE,OPR_REGISTER]) then
+                    Message(asmr_e_invalid_operand_type);
+                  oper.opr.typ:=OPR_REGISTER;
+                  oper.opr.reg:=tempreg;
+                end
+              else
+                Message(asmr_e_syn_operand);
+            end;
+
+          AS_end,
+          AS_SEPARATOR,
+          AS_COMMA: ;
+        else
+          Begin
+            Message(asmr_e_syn_operand);
+            Consume(actasmtoken);
+          end;
+        end; { end case }
+      end;
+
+{*****************************************************************************
+                                taarch64attreader
+*****************************************************************************}
+
+    procedure taarch64attreader.BuildOpCode(instr: taarch64instruction);
+      var
+        operandnum : longint;
+      Begin
+        { opcode }
+        if (actasmtoken<>AS_OPCODE) then
+         Begin
+           Message(asmr_e_invalid_or_missing_opcode);
+           RecoverConsume(true);
+           exit;
+         end;
+        { Fill the instr object with the current state }
+        with instr do
+          begin
+            Opcode:=ActOpcode;
+            condition:=ActCondition;
+            oppostfix:=actoppostfix;
+          end;
+        Consume(AS_OPCODE);
+
+        { We are reading operands, so opcode will be an AS_ID }
+        operandnum:=1;
+        { Zero operand opcode ?  }
+        if actasmtoken in [AS_SEPARATOR,AS_end] then
+         begin
+           instr.Ops:=0;
+           exit;
+         end;
+        { Read the operands }
+        repeat
+          case actasmtoken of
+            AS_COMMA: { Operand delimiter }
+              Begin
+                { operandnum and not operandnum+1, because tinstruction is
+                  one-based and taicpu is zero-based)
+                }
+                if can_be_shifter_operand(instr.opcode,operandnum) then
+                  begin
+                    Consume(AS_COMMA);
+                    if not TryBuildShifterOp(instr,operandnum+1) then
+                      Message(asmr_e_illegal_shifterop_syntax);
+                    Inc(operandnum);
+                  end
+                else
+                  begin
+                    if operandnum>Max_Operands then
+                      Message(asmr_e_too_many_operands)
+                    else
+                      Inc(operandnum);
+                    Consume(AS_COMMA);
+                  end;
+              end;
+            AS_SEPARATOR,
+            AS_end : { End of asm operands for this opcode  }
+              begin
+                break;
+              end;
+          else
+            begin
+              BuildOperand(taarch64operand(instr.operands[operandnum]),instr.Is64bit);
+              instr.Ops:=operandnum;
+              if instr.operands[operandnum].opr.typ=OPR_REFERENCE then
+                if simple_ref_type(instr.opcode,instr.cgsize,instr.oppostfix,instr.operands[operandnum].opr.ref)<>sr_simple then
+                  Message(asmr_e_invalid_reference_syntax);
+                ;
+            end;
+          end; { end case }
+        until false;
+      end;
+
+
+    function taarch64attreader.is_asmopcode(const s: string):boolean;
+
+      const
+        { sorted by length so longer postfixes will match first }
+        postfix2strsorted : array[1..7] of string[3] = (
+          'SB','SH','SW',
+          'B','H','W',
+          'S');
+
+        postfixsorted : array[1..7] of TOpPostfix = (
+          PF_SB,PF_SH,PF_SW,
+          PF_B,PF_H,PF_W,
+          PF_S);
+
+      var
+        j  : longint;
+        hs : string;
+        maxlen : longint;
+        icond : tasmcond;
+      Begin
+        { making s a value parameter would break other assembler readers }
+        hs:=s;
+        is_asmopcode:=false;
+
+        { clear opcode }
+        actopcode:=A_None;
+        actcondition:=C_None;
+
+        { b.cond ? }
+        if (length(hs)=4) and
+           (hs[1]='B') and
+           (hs[2]='.') then
+          begin
+            actopcode:=A_B;
+            actasmtoken:=AS_OPCODE;
+            actcondition:=ToConditionCode(copy(hs,3,length(actasmpattern)-2),false);
+            if actcondition<>C_None then
+              is_asmopcode:=true;
+            exit;
+          end;
+
+        maxlen:=max(length(hs),7);
+        actopcode:=A_NONE;
+        for j:=maxlen downto 1 do
+          begin
+            actopcode:=tasmop(PtrUInt(iasmops.Find(copy(hs,1,j))));
+            if actopcode<>A_NONE then
+              begin
+                actasmtoken:=AS_OPCODE;
+                { strip op code }
+                delete(hs,1,j);
+                break;
+              end;
+          end;
+        if actopcode=A_NONE then
+          exit;
+
+        { check for postfix }
+        if length(hs)>0 then
+          begin
+            for j:=low(postfixsorted) to high(postfixsorted) do
+              begin
+                if copy(hs,1,length(postfix2strsorted[j]))=postfix2strsorted[j] then
+                  begin
+                    actoppostfix:=postfixsorted[j];
+                    { strip postfix }
+                    delete(hs,1,length(postfix2strsorted[j]));
+                    break;
+                  end;
+              end;
+          end;
+        { if we stripped all postfixes, it's a valid opcode }
+        is_asmopcode:=length(hs)=0;
+      end;
+
+
+    procedure taarch64attreader.ConvertCalljmp(instr: taarch64instruction);
+      var
+        newopr : toprrec;
+      begin
+        if instr.Operands[1].opr.typ=OPR_REFERENCE then
+          begin
+            newopr.typ:=OPR_SYMBOL;
+            newopr.symbol:=instr.Operands[1].opr.ref.symbol;
+            newopr.symofs:=instr.Operands[1].opr.ref.offset;
+            if (instr.Operands[1].opr.ref.base<>NR_NO) or
+              (instr.Operands[1].opr.ref.index<>NR_NO) or
+              (instr.Operands[1].opr.ref.refaddr<>addr_pic) then
+              Message(asmr_e_syn_operand);
+            instr.Operands[1].opr:=newopr;
+          end;
+      end;
+
+    procedure taarch64attreader.handleopcode;
+      var
+        instr: taarch64instruction;
+      begin
+        instr:=taarch64instruction.Create(taarch64operand);
+        BuildOpcode(instr);
+        if is_calljmp(instr.opcode) then
+          ConvertCalljmp(instr);
+        {
+        instr.AddReferenceSizes;
+        instr.SetInstructionOpsize;
+        instr.CheckOperandSizes;
+        }
+        instr.ConcatInstruction(curlist);
+        instr.Free;
+        actoppostfix:=PF_None;
+      end;
+
+
+{*****************************************************************************
+                                     Initialize
+*****************************************************************************}
+
+const
+  asmmode_arm_att_info : tasmmodeinfo =
+          (
+            id    : asmmode_arm_gas;
+            idtxt : 'GAS';
+            casmreader : taarch64attreader;
+          );
+
+  asmmode_arm_standard_info : tasmmodeinfo =
+          (
+            id    : asmmode_standard;
+            idtxt : 'STANDARD';
+            casmreader : taarch64attreader;
+          );
+
+initialization
+  RegisterAsmMode(asmmode_arm_att_info);
+  RegisterAsmMode(asmmode_arm_standard_info);
+end.

+ 171 - 0
compiler/aarch64/rgcpu.pas

@@ -0,0 +1,171 @@
+{
+    Copyright (c) 1998-2002 by Florian Klaempfl
+
+    This unit implements the SPARC specific class for the register
+    allocator
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************}
+unit rgcpu;
+
+{$i fpcdefs.inc}
+
+  interface
+
+    uses
+      aasmbase,aasmcpu,aasmtai,aasmdata,
+      cgbase,cgutils,
+      cpubase,
+      globtype,
+      rgobj;
+
+    type
+      trgcpu=class(trgobj)
+        procedure do_spill_read(list:TAsmList;pos:tai;const spilltemp:treference;tempreg:tregister);override;
+        procedure do_spill_written(list:TAsmList;pos:tai;const spilltemp:treference;tempreg:tregister);override;
+       protected
+        procedure do_spill_op(list: tasmlist; op: tasmop; pos: tai; const spilltemp: treference; tempreg: tregister);
+      end;
+
+      trgintcpu=class(trgcpu)
+        procedure add_cpu_interferences(p: tai); override;
+      end;
+
+
+implementation
+
+    uses
+      verbose,cutils,
+      cgobj;
+
+    procedure trgcpu.do_spill_read(list:TAsmList;pos:tai;const spilltemp:treference;tempreg:tregister);
+      begin
+        do_spill_op(list,A_LDR,pos,spilltemp,tempreg);
+      end;
+
+
+    procedure trgcpu.do_spill_written(list:TAsmList;pos:tai;const spilltemp:treference;tempreg:tregister);
+      begin
+        do_spill_op(list,A_STR,pos,spilltemp,tempreg);
+      end;
+
+
+    procedure trgcpu.do_spill_op(list: tasmlist; op: tasmop; pos: tai; const spilltemp: treference; tempreg: tregister);
+      var
+        helpins  : tai;
+        tmpref   : treference;
+        helplist : TAsmList;
+        hreg     : tregister;
+        isload   : boolean;
+      begin
+        isload:=op=A_LDR;
+        { offset out of range for regular load/store? }
+        if simple_ref_type(op,reg_cgsize(tempreg),PF_None,spilltemp)<>sr_simple then
+          begin
+            helplist:=TAsmList.create;
+
+            if getregtype(tempreg)=R_INTREGISTER then
+              hreg:=tempreg
+            else
+              hreg:=cg.getaddressregister(helplist);
+
+            cg.a_load_const_reg(helplist,OS_ADDR,spilltemp.offset,hreg);
+            reference_reset_base(tmpref,spilltemp.base,0,sizeof(pint));
+            tmpref.index:=hreg;
+            if isload then
+              helpins:=spilling_create_load(tmpref,tempreg)
+            else
+              helpins:=spilling_create_store(tempreg,tmpref);
+            helplist.concat(helpins);
+            add_cpu_interferences(helpins);
+            list.insertlistafter(pos,helplist);
+            helplist.free;
+          end
+        else if isload then
+          inherited do_spill_read(list,pos,spilltemp,tempreg)
+        else
+          inherited do_spill_written(list,pos,spilltemp,tempreg)
+      end;
+
+
+    procedure trgintcpu.add_cpu_interferences(p: tai);
+     var
+       i, j: longint;
+     begin
+       if p.typ=ait_instruction then
+         begin
+           { add interferences for instructions that can have SP as a register
+             operand }
+           case taicpu(p).opcode of
+             A_MOV:
+               { all operands can be SP }
+               exit;
+             A_ADD,
+             A_SUB,
+             A_CMP,
+             A_CMN:
+               { ok as destination or first source in immediate or extended
+                 register form }
+               if (taicpu(p).oper[taicpu(p).ops-1]^.typ<>top_shifterop) or
+                  valid_shifter_operand(taicpu(p).opcode,false,true,
+                    reg_cgsize(taicpu(p).oper[0]^.reg) in [OS_64,OS_S64],
+                    taicpu(p).oper[taicpu(p).ops-1]^.shifterop^.shiftmode,
+                    taicpu(p).oper[taicpu(p).ops-1]^.shifterop^.shiftimm) then
+                 begin
+                   if taicpu(p).oper[taicpu(p).ops-1]^.typ=top_shifterop then
+                     i:=taicpu(p).ops-2
+                   else
+                     i:=taicpu(p).ops-1;
+                   if (taicpu(p).oper[i]^.typ=top_reg) then
+                     add_edge(getsupreg(taicpu(p).oper[i]^.reg),RS_SP);
+                   exit;
+                 end;
+             A_AND,
+             A_EOR,
+             A_ORR,
+             A_TST:
+               { ok in immediate form }
+               if taicpu(p).oper[taicpu(p).ops-1]^.typ=top_const then
+                 exit;
+           end;
+           { add interferences for other registers }
+           for i:=0 to taicpu(p).ops-1 do
+             begin
+               case taicpu(p).oper[i]^.typ of
+                 top_reg:
+                   if getregtype(taicpu(p).oper[i]^.reg)=R_INTREGISTER then
+                     add_edge(getsupreg(taicpu(p).oper[i]^.reg),RS_SP);
+                 top_ref:
+                   begin
+                     { sp can always be base, never be index }
+                     if taicpu(p).oper[i]^.ref^.index<>NR_NO then
+                       add_edge(getsupreg(taicpu(p).oper[i]^.ref^.index),RS_SP);
+                     { in case of write back, the base register must be
+                       different from the loaded/stored register }
+                     if (taicpu(p).oper[i]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
+                        (taicpu(p).oper[i]^.ref^.base<>NR_NO) then
+                       begin
+                         for j:=pred(i) downto 0 do
+                           if taicpu(p).oper[j]^.typ=TOP_REG then
+                             add_edge(getsupreg(taicpu(p).oper[j]^.reg),getsupreg(taicpu(p).oper[i]^.ref^.base));
+                       end;
+                   end;
+               end;
+             end;
+         end;
+     end;
+
+end.

+ 21 - 4
compiler/aasmtai.pas

@@ -219,11 +219,11 @@ interface
 {$ifdef arm}
 {$ifdef arm}
        { ARM only }
        { ARM only }
        ,top_regset
        ,top_regset
-       ,top_conditioncode
        ,top_modeflags
        ,top_modeflags
        ,top_specialreg
        ,top_specialreg
 {$endif arm}
 {$endif arm}
 {$if defined(arm) or defined(aarch64)}
 {$if defined(arm) or defined(aarch64)}
+       ,top_conditioncode
        ,top_shifterop
        ,top_shifterop
 {$endif defined(arm) or defined(aarch64)}
 {$endif defined(arm) or defined(aarch64)}
 {$ifdef m68k}
 {$ifdef m68k}
@@ -266,12 +266,12 @@ interface
           top_local  : (localoper:plocaloper);
           top_local  : (localoper:plocaloper);
       {$ifdef arm}
       {$ifdef arm}
           top_regset : (regset:^tcpuregisterset; regtyp: tregistertype; subreg: tsubregister; usermode: boolean);
           top_regset : (regset:^tcpuregisterset; regtyp: tregistertype; subreg: tsubregister; usermode: boolean);
-          top_conditioncode : (cc : TAsmCond);
           top_modeflags : (modeflags : tcpumodeflags);
           top_modeflags : (modeflags : tcpumodeflags);
           top_specialreg : (specialreg:tregister; specialflags:tspecialregflags);
           top_specialreg : (specialreg:tregister; specialflags:tspecialregflags);
       {$endif arm}
       {$endif arm}
       {$if defined(arm) or defined(aarch64)}
       {$if defined(arm) or defined(aarch64)}
           top_shifterop : (shifterop : pshifterop);
           top_shifterop : (shifterop : pshifterop);
+          top_conditioncode : (cc : TAsmCond);
       {$endif defined(arm) or defined(aarch64)}
       {$endif defined(arm) or defined(aarch64)}
       {$ifdef m68k}
       {$ifdef m68k}
           top_regset : (dataregset,addrregset,fpuregset:^tcpuregisterset);
           top_regset : (dataregset,addrregset,fpuregset:^tcpuregisterset);
@@ -331,7 +331,7 @@ interface
         mark_Position
         mark_Position
       );
       );
 
 
-      TRegAllocType = (ra_alloc,ra_dealloc,ra_sync,ra_resize);
+      TRegAllocType = (ra_alloc,ra_dealloc,ra_sync,ra_resize,ra_markused);
 
 
       TStabType = (stab_stabs,stab_stabn,stab_stabd,
       TStabType = (stab_stabs,stab_stabn,stab_stabd,
                    { AIX/XCOFF stab types }
                    { AIX/XCOFF stab types }
@@ -354,6 +354,8 @@ interface
         asd_jclass,asd_jinterface,asd_jsuper,asd_jfield,asd_jlimit,asd_jline,
         asd_jclass,asd_jinterface,asd_jsuper,asd_jfield,asd_jlimit,asd_jline,
         { .ent/.end for MIPS and Alpha }
         { .ent/.end for MIPS and Alpha }
         asd_ent,asd_ent_end,
         asd_ent,asd_ent_end,
+        { supported by recent clang-based assemblers for data-in-code  }
+        asd_data_region, asd_end_data_region,
         { .thumb_func for ARM }
         { .thumb_func for ARM }
         asd_thumb_func
         asd_thumb_func
       );
       );
@@ -368,7 +370,7 @@ interface
 
 
 
 
     const
     const
-      regallocstr : array[tregalloctype] of string[10]=('allocated','released','sync','resized');
+      regallocstr : array[tregalloctype] of string[10]=('allocated','released','sync','resized','used');
       tempallocstr : array[boolean] of string[10]=('released','allocated');
       tempallocstr : array[boolean] of string[10]=('released','allocated');
       stabtypestr : array[TStabType] of string[8]=(
       stabtypestr : array[TStabType] of string[8]=(
         'stabs','stabn','stabd',
         'stabs','stabn','stabd',
@@ -385,6 +387,8 @@ interface
         'class','interface','super','field','limit','line',
         'class','interface','super','field','limit','line',
         { .ent/.end for MIPS and Alpha }
         { .ent/.end for MIPS and Alpha }
         'ent','end',
         'ent','end',
+        { supported by recent clang-based assemblers for data-in-code }
+        'data_region','end_data_region',
         { .thumb_func for ARM }
         { .thumb_func for ARM }
         'thumb_func'
         'thumb_func'
       );
       );
@@ -711,6 +715,7 @@ interface
           constructor dealloc(r : tregister;ainstr:tai);
           constructor dealloc(r : tregister;ainstr:tai);
           constructor sync(r : tregister);
           constructor sync(r : tregister);
           constructor resize(r : tregister);
           constructor resize(r : tregister);
+          constructor markused(r : tregister);
           constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
           constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
           procedure ppuwrite(ppufile:tcompilerppufile);override;
           procedure ppuwrite(ppufile:tcompilerppufile);override;
        end;
        end;
@@ -2421,6 +2426,15 @@ implementation
       end;
       end;
 
 
 
 
+    constructor tai_regalloc.markused(r : tregister);
+      begin
+        inherited create;
+        typ:=ait_regalloc;
+        ratype:=ra_markused;
+        reg:=r;
+      end;
+
+
     constructor tai_regalloc.ppuload(t:taitype;ppufile:tcompilerppufile);
     constructor tai_regalloc.ppuload(t:taitype;ppufile:tcompilerppufile);
       begin
       begin
         inherited ppuload(t,ppufile);
         inherited ppuload(t,ppufile);
@@ -2553,6 +2567,9 @@ implementation
 {$ifdef ARM}
 {$ifdef ARM}
               and not(r.base=NR_R15)
               and not(r.base=NR_R15)
 {$endif ARM}
 {$endif ARM}
+{$ifdef aarch64}
+              and not(r.refaddr in [addr_full,addr_gotpageoffset,addr_gotpage])
+{$endif aarch64}
               then
               then
               internalerror(200502052);
               internalerror(200502052);
             typ:=top_ref;
             typ:=top_ref;

+ 4 - 1
compiler/aggas.pas

@@ -532,6 +532,8 @@ implementation
          system_powerpc64_darwin,
          system_powerpc64_darwin,
          system_x86_64_darwin,
          system_x86_64_darwin,
          system_arm_darwin,
          system_arm_darwin,
+         system_aarch64_darwin,
+         system_x86_64_iphonesim,
          system_powerpc_aix,
          system_powerpc_aix,
          system_powerpc64_aix:
          system_powerpc64_aix:
            begin
            begin
@@ -567,7 +569,8 @@ implementation
                     AsmWriteln('__TEXT,__picsymbolstub4,symbol_stubs,none,16')
                     AsmWriteln('__TEXT,__picsymbolstub4,symbol_stubs,none,16')
                   else
                   else
                     AsmWriteln('__TEXT,__symbol_stub4,symbol_stubs,none,12')
                     AsmWriteln('__TEXT,__symbol_stub4,symbol_stubs,none,12')
-                { darwin/x86-64 uses RIP-based GOT addressing, no symbol stubs }
+                { darwin/(x86-64/AArch64) uses PC-based GOT addressing, no
+                  explicit symbol stubs }
                 else
                 else
                   internalerror(2006031101);
                   internalerror(2006031101);
               end;
               end;

+ 19 - 5
compiler/aoptobj.pas

@@ -361,7 +361,7 @@ Unit AoptObj;
 
 
     function JumpTargetOp(ai: taicpu): poper; inline;
     function JumpTargetOp(ai: taicpu): poper; inline;
       begin
       begin
-{$ifdef MIPS}
+{$if defined(MIPS)}
         { MIPS branches can have 1,2 or 3 operands, target label is the last one. }
         { MIPS branches can have 1,2 or 3 operands, target label is the last one. }
         result:=ai.oper[ai.ops-1];
         result:=ai.oper[ai.ops-1];
 {$else MIPS}
 {$else MIPS}
@@ -1179,9 +1179,9 @@ Unit AoptObj;
     function IsJumpToLabel(hp: taicpu): boolean;
     function IsJumpToLabel(hp: taicpu): boolean;
       begin
       begin
         result:=(hp.opcode=aopt_uncondjmp) and
         result:=(hp.opcode=aopt_uncondjmp) and
-{$ifdef arm}
+{$if defined(arm) or defined(aarch64)}
           (hp.condition=c_None) and
           (hp.condition=c_None) and
-{$endif arm}
+{$endif arm or aarch64}
           (JumpTargetOp(hp)^.typ = top_ref) and
           (JumpTargetOp(hp)^.typ = top_ref) and
           (JumpTargetOp(hp)^.ref^.symbol is TAsmLabel);
           (JumpTargetOp(hp)^.ref^.symbol is TAsmLabel);
       end;
       end;
@@ -1260,6 +1260,14 @@ Unit AoptObj;
                     exit;
                     exit;
                   if not GetFinalDestination(taicpu(p1),succ(level)) then
                   if not GetFinalDestination(taicpu(p1),succ(level)) then
                     exit;
                     exit;
+{$if defined(aarch64)}
+                  { can't have conditional branches to
+                    global labels on AArch64, because the
+                    offset may become too big }
+                  if not(taicpu(hp).condition in [C_None,C_AL,C_NV]) and
+                     (tasmlabel(JumpTargetOp(taicpu(p1))^.ref^.symbol).bind<>AB_LOCAL) then
+                    exit;
+{$endif aarch64}
                   tasmlabel(JumpTargetOp(hp)^.ref^.symbol).decrefs;
                   tasmlabel(JumpTargetOp(hp)^.ref^.symbol).decrefs;
                   JumpTargetOp(hp)^.ref^.symbol:=JumpTargetOp(taicpu(p1))^.ref^.symbol;
                   JumpTargetOp(hp)^.ref^.symbol:=JumpTargetOp(taicpu(p1))^.ref^.symbol;
                   tasmlabel(JumpTargetOp(hp)^.ref^.symbol).increfs;
                   tasmlabel(JumpTargetOp(hp)^.ref^.symbol).increfs;
@@ -1398,9 +1406,15 @@ Unit AoptObj;
                                     FindLabel(tasmlabel(JumpTargetOp(taicpu(p))^.ref^.symbol), hp2) then
                                     FindLabel(tasmlabel(JumpTargetOp(taicpu(p))^.ref^.symbol), hp2) then
                                   begin
                                   begin
                                     if (taicpu(p).opcode=aopt_condjmp)
                                     if (taicpu(p).opcode=aopt_condjmp)
-  {$ifdef arm}
+  {$if defined(arm) or defined(aarch64)}
                                       and (taicpu(p).condition<>C_None)
                                       and (taicpu(p).condition<>C_None)
-  {$endif arm}
+  {$endif arm or aarch64}
+  {$if defined(aarch64)}
+                                      { can't have conditional branches to
+                                        global labels on AArch64, because the
+                                        offset may become too big }
+                                      and (tasmlabel(JumpTargetOp(taicpu(hp1))^.ref^.symbol).bind=AB_LOCAL)
+  {$endif aarch64}
                                     then
                                     then
                                       begin
                                       begin
                                         taicpu(p).condition:=inverse_cond(taicpu(p).condition);
                                         taicpu(p).condition:=inverse_cond(taicpu(p).condition);

+ 45 - 12
compiler/arm/aasmcpu.pas

@@ -908,6 +908,20 @@ implementation
               limit:=254;
               limit:=254;
         end;
         end;
 
 
+      function is_case_dispatch(hp: taicpu): boolean;
+        begin
+          result:=
+            ((taicpu(hp).opcode in [A_ADD,A_LDR]) and
+             not(GenerateThumbCode or GenerateThumb2Code) and
+             (taicpu(hp).oper[0]^.typ=top_reg) and
+             (taicpu(hp).oper[0]^.reg=NR_PC)) or
+             ((taicpu(hp).opcode=A_MOV) and (GenerateThumbCode) and
+              (taicpu(hp).oper[0]^.typ=top_reg) and
+              (taicpu(hp).oper[0]^.reg=NR_PC)) or
+             (taicpu(hp).opcode=A_TBH) or
+             (taicpu(hp).opcode=A_TBB);
+        end;
+
       var
       var
         curinspos,
         curinspos,
         penalty,
         penalty,
@@ -916,7 +930,8 @@ implementation
         currentsize,
         currentsize,
         extradataoffset,
         extradataoffset,
         curop : longint;
         curop : longint;
-        curtai : tai;
+        curtai,
+        inserttai : tai;
         ai_label : tai_label;
         ai_label : tai_label;
         curdatatai,hp,hp2 : tai;
         curdatatai,hp,hp2 : tai;
         curdata : TAsmList;
         curdata : TAsmList;
@@ -1076,15 +1091,11 @@ implementation
                 case taicpu(hp).opcode of
                 case taicpu(hp).opcode of
                   A_MOV,
                   A_MOV,
                   A_LDR,
                   A_LDR,
-                  A_ADD:
+                  A_ADD,
+                  A_TBH,
+                  A_TBB:
                     { approximation if we hit a case jump table }
                     { approximation if we hit a case jump table }
-                    if ((taicpu(hp).opcode in [A_ADD,A_LDR]) and not(GenerateThumbCode or GenerateThumb2Code) and
-                       (taicpu(hp).oper[0]^.typ=top_reg) and
-                      (taicpu(hp).oper[0]^.reg=NR_PC)) or
-                      ((taicpu(hp).opcode=A_MOV) and (GenerateThumbCode) and
-                       (taicpu(hp).oper[0]^.typ=top_reg) and
-                       (taicpu(hp).oper[0]^.reg=NR_PC))
-                       then
+                    if is_case_dispatch(taicpu(hp)) then
                       begin
                       begin
                         penalty:=multiplier;
                         penalty:=multiplier;
                         hp:=tai(hp.next);
                         hp:=tai(hp.next);
@@ -1178,12 +1189,34 @@ implementation
                 else
                 else
                   limit:=1016;
                   limit:=1016;
 
 
+                { if this is an add/tbh/tbb-based jumptable, go back to the
+                  previous instruction, because inserting data between the
+                  dispatch instruction and the table would mess up the
+                  addresses }
+                inserttai:=curtai;
+                if is_case_dispatch(taicpu(inserttai)) and
+                   ((taicpu(inserttai).opcode=A_ADD) or
+                    (taicpu(inserttai).opcode=A_TBH) or
+                    (taicpu(inserttai).opcode=A_TBB)) then
+                  begin
+                    repeat
+                      inserttai:=tai(inserttai.previous);
+                    until inserttai.typ=ait_instruction;
+                    { if it's an add-based jump table, then also skip the
+                      pc-relative load }
+                    if taicpu(curtai).opcode=A_ADD then
+                      repeat
+                        inserttai:=tai(inserttai.previous);
+                      until inserttai.typ=ait_instruction;
+                  end
+                else
+
                 { on arm thumb, insert the data always after all labels etc. following an instruction so it
                 { on arm thumb, insert the data always after all labels etc. following an instruction so it
                   is prevent that a bxx yyy; bl xxx; yyyy: sequence gets separated ( we never insert on arm thumb after
                   is prevent that a bxx yyy; bl xxx; yyyy: sequence gets separated ( we never insert on arm thumb after
                   bxx) and the distance of bxx gets too long }
                   bxx) and the distance of bxx gets too long }
                 if GenerateThumbCode then
                 if GenerateThumbCode then
-                  while assigned(tai(curtai.Next)) and (tai(curtai.Next).typ in SkipInstr+[ait_label]) do
-                    curtai:=tai(curtai.next);
+                  while assigned(tai(inserttai.Next)) and (tai(inserttai.Next).typ in SkipInstr+[ait_label]) do
+                    inserttai:=tai(inserttai.next);
 
 
                 doinsert:=false;
                 doinsert:=false;
                 current_asmdata.getjumplabel(l);
                 current_asmdata.getjumplabel(l);
@@ -1210,7 +1243,7 @@ implementation
                   is then equal curdata.last.previous) we could over see one
                   is then equal curdata.last.previous) we could over see one
                   instruction }
                   instruction }
                 hp:=tai(curdata.Last);
                 hp:=tai(curdata.Last);
-                list.insertlistafter(curtai,curdata);
+                list.insertlistafter(inserttai,curdata);
                 curtai:=hp;
                 curtai:=hp;
               end
               end
             else
             else

+ 1 - 2
compiler/arm/agarmgas.pas

@@ -114,8 +114,7 @@ unit agarmgas;
           result:='-march='+cputype_to_gas_march[current_settings.cputype]+' -mthumb -mthumb-interwork '+result
           result:='-march='+cputype_to_gas_march[current_settings.cputype]+' -mthumb -mthumb-interwork '+result
         else if GenerateThumbCode then
         else if GenerateThumbCode then
           result:='-march='+cputype_to_gas_march[current_settings.cputype]+' -mthumb -mthumb-interwork '+result
           result:='-march='+cputype_to_gas_march[current_settings.cputype]+' -mthumb -mthumb-interwork '+result
-        // EDSP instructions in RTL require armv5te at least to not generate error
-        else if current_settings.cputype >= cpu_armv5te then
+        else
           result:='-march='+cputype_to_gas_march[current_settings.cputype]+' '+result;
           result:='-march='+cputype_to_gas_march[current_settings.cputype]+' '+result;
 
 
         if target_info.abi = abi_eabihf then
         if target_info.abi = abi_eabihf then

+ 5 - 3
compiler/arm/cgcpu.pas

@@ -104,7 +104,7 @@ unit cgcpu;
 
 
         procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle); override;
         procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle); override;
         { Transform unsupported methods into Internal errors }
         { Transform unsupported methods into Internal errors }
-        procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: TCGSize; src, dst: TRegister); override;
+        procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); override;
 
 
         { try to generate optimized 32 Bit multiplication, returns true if successful generated }
         { try to generate optimized 32 Bit multiplication, returns true if successful generated }
         function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
         function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
@@ -640,7 +640,9 @@ unit cgcpu;
         sym : TAsmSymbol;
         sym : TAsmSymbol;
       begin
       begin
         { check not really correct: should only be used for non-Thumb cpus }
         { check not really correct: should only be used for non-Thumb cpus }
-        if CPUARM_HAS_BLX_LABEL in cpu_capabilities[current_settings.cputype] then
+        if (CPUARM_HAS_BLX_LABEL in cpu_capabilities[current_settings.cputype]) and
+          { WinCE GNU AS (not sure if this applies in general) does not support BLX imm }
+          (target_info.system<>system_arm_wince) then
           branchopcode:=A_BLX
           branchopcode:=A_BLX
         else
         else
           branchopcode:=A_BL;
           branchopcode:=A_BL;
@@ -1704,7 +1706,7 @@ unit cgcpu;
       end;
       end;
 
 
 
 
-    procedure tbasecgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: TCGSize; src, dst: TRegister);
+    procedure tbasecgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister);
       begin
       begin
         if reverse then
         if reverse then
           begin
           begin

+ 6 - 0
compiler/arm/cpuinfo.pas

@@ -337,6 +337,9 @@ Type
       ct_xmc4502x768,
       ct_xmc4502x768,
       ct_xmc4504x512,
       ct_xmc4504x512,
 
 
+      { Allwinner }
+      ct_allwinner_a20,
+
       // generic Thumb2 target
       // generic Thumb2 target
       ct_thumb2bare
       ct_thumb2bare
      );
      );
@@ -702,6 +705,9 @@ Const
       (controllertypestr:'XMC4502X768';   controllerunitstr:'XMC4502'; flashbase:$08000000;	flashsize:$000C0000;	srambase:$20000000;	sramsize:$00010000),
       (controllertypestr:'XMC4502X768';   controllerunitstr:'XMC4502'; flashbase:$08000000;	flashsize:$000C0000;	srambase:$20000000;	sramsize:$00010000),
       (controllertypestr:'XMC4504X512';   controllerunitstr:'XMC4504'; flashbase:$08000000;	flashsize:$00080000;	srambase:$20000000;	sramsize:$00010000),
       (controllertypestr:'XMC4504X512';   controllerunitstr:'XMC4504'; flashbase:$08000000;	flashsize:$00080000;	srambase:$20000000;	sramsize:$00010000),
 
 
+      { Allwinner }
+      (controllertypestr:'ALLWINNER_A20'; controllerunitstr:'ALLWINNER_A20';     flashbase:$00000000; flashsize:$00000000;  srambase:$40000000; sramsize:$80000000),
+
       { Bare bones }
       { Bare bones }
       (controllertypestr:'THUMB2_BARE';	controllerunitstr:'THUMB2_BARE';	flashbase:$00000000;	flashsize:$00002000;	srambase:$20000000;	sramsize:$00000400)
       (controllertypestr:'THUMB2_BARE';	controllerunitstr:'THUMB2_BARE';	flashbase:$00000000;	flashsize:$00002000;	srambase:$20000000;	sramsize:$00000400)
     );
     );

+ 4 - 3
compiler/arm/narmmem.pas

@@ -27,6 +27,7 @@ interface
 
 
     uses
     uses
       globtype,
       globtype,
+      symtype,
       cgbase,cpubase,nmem,ncgmem;
       cgbase,cpubase,nmem,ncgmem;
 
 
     type
     type
@@ -36,7 +37,7 @@ interface
 
 
 
 
       tarmvecnode = class(tcgvecnode)
       tarmvecnode = class(tcgvecnode)
-        procedure update_reference_reg_mul(maybe_const_reg: tregister; l: aint);override;
+        procedure update_reference_reg_mul(maybe_const_reg: tregister; regsize: tdef; l: aint);override;
       end;
       end;
 
 
 implementation
 implementation
@@ -70,7 +71,7 @@ implementation
                              TARMVECNODE
                              TARMVECNODE
 *****************************************************************************}
 *****************************************************************************}
 
 
-     procedure tarmvecnode.update_reference_reg_mul(maybe_const_reg:tregister;l:aint);
+     procedure tarmvecnode.update_reference_reg_mul(maybe_const_reg: tregister; regsize: tdef; l: aint);
        var
        var
          hreg: tregister;
          hreg: tregister;
          hl : longint;
          hl : longint;
@@ -79,7 +80,7 @@ implementation
             (GenerateThumbCode) or
             (GenerateThumbCode) or
             { simple constant? }
             { simple constant? }
             (l=1) or ispowerof2(l,hl) or ispowerof2(l+1,hl) or ispowerof2(l-1,hl) then
             (l=1) or ispowerof2(l,hl) or ispowerof2(l+1,hl) or ispowerof2(l-1,hl) then
-           inherited update_reference_reg_mul(maybe_const_reg,l)
+           inherited update_reference_reg_mul(maybe_const_reg,regsize,l)
          else if (location.reference.base<>NR_NO) then
          else if (location.reference.base<>NR_NO) then
            begin
            begin
              hreg:=cg.getaddressregister(current_asmdata.CurrAsmList);
              hreg:=cg.getaddressregister(current_asmdata.CurrAsmList);

+ 1 - 1
compiler/arm/raarmgas.pas

@@ -1286,7 +1286,7 @@ Unit raarmgas;
                   end;
                   end;
               end;
               end;
           end;
           end;
-        maxlen:=max(length(hs),5);
+        maxlen:=min(length(hs),5);
         actopcode:=A_NONE;
         actopcode:=A_NONE;
         j2:=maxlen;
         j2:=maxlen;
         hs2:=hs;
         hs2:=hs;

+ 2 - 1
compiler/assemble.pas

@@ -83,6 +83,7 @@ interface
         lastsectype : TAsmSectionType;
         lastsectype : TAsmSectionType;
         procedure WriteSourceLine(hp: tailineinfo);
         procedure WriteSourceLine(hp: tailineinfo);
         procedure WriteTempalloc(hp: tai_tempalloc);
         procedure WriteTempalloc(hp: tai_tempalloc);
+        Function DoPipe:boolean;
       public
       public
         {# Returns the complete path and executable name of the assembler
         {# Returns the complete path and executable name of the assembler
            program.
            program.
@@ -271,7 +272,7 @@ Implementation
                                  TExternalAssembler
                                  TExternalAssembler
 *****************************************************************************}
 *****************************************************************************}
 
 
-    Function DoPipe:boolean;
+    Function TExternalAssembler.DoPipe:boolean;
       begin
       begin
         DoPipe:=(cs_asm_pipe in current_settings.globalswitches) and
         DoPipe:=(cs_asm_pipe in current_settings.globalswitches) and
                 (([cs_asm_extern,cs_asm_leave,cs_link_on_target] * current_settings.globalswitches) = []) and
                 (([cs_asm_extern,cs_asm_leave,cs_link_on_target] * current_settings.globalswitches) = []) and

+ 13 - 0
compiler/cgbase.pas

@@ -101,6 +101,12 @@ interface
          ,addr_dgroup      // the data segment group
          ,addr_dgroup      // the data segment group
          ,addr_seg         // used for getting the segment of an object, e.g. 'mov ax, SEG symbol'
          ,addr_seg         // used for getting the segment of an object, e.g. 'mov ax, SEG symbol'
          {$ENDIF}
          {$ENDIF}
+         {$IFDEF AARCH64}
+         ,addr_page
+         ,addr_pageoffset
+         ,addr_gotpage
+         ,addr_gotpageoffset
+         {$ENDIF AARCH64}
          );
          );
 
 
 
 
@@ -329,6 +335,13 @@ interface
           OS_M8,OS_M16,OS_M32,OS_M64,OS_M128,OS_M256,OS_M8,OS_M16,OS_M32,
           OS_M8,OS_M16,OS_M32,OS_M64,OS_M128,OS_M256,OS_M8,OS_M16,OS_M32,
           OS_M64,OS_M128,OS_M256);
           OS_M64,OS_M128,OS_M256);
 
 
+       tcgsize2signed : array[tcgsize] of tcgsize = (OS_NO,
+          OS_S8,OS_S16,OS_S32,OS_S64,OS_S128,OS_S8,OS_S16,OS_S32,OS_S64,OS_S128,
+          OS_F32,OS_F64,OS_F80,OS_C64,OS_F128,
+          OS_M8,OS_M16,OS_M32,OS_M64,OS_M128,OS_M256,OS_M8,OS_M16,OS_M32,
+          OS_M64,OS_M128,OS_M256);
+
+
        tcgloc2str : array[TCGLoc] of string[12] = (
        tcgloc2str : array[TCGLoc] of string[12] = (
             'LOC_INVALID',
             'LOC_INVALID',
             'LOC_VOID',
             'LOC_VOID',

+ 2 - 2
compiler/cghlcpu.pas

@@ -65,14 +65,14 @@ uses
       procedure a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel); override;
       procedure a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel); override;
       procedure a_call_reg(list: TAsmList; reg: tregister); override;
       procedure a_call_reg(list: TAsmList; reg: tregister); override;
       procedure a_call_name(list: TAsmList; const s: string; weak: boolean); override;
       procedure a_call_name(list: TAsmList; const s: string; weak: boolean); override;
-      procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tcgsize; src, dst: TRegister); override;
+      procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); override;
     end;
     end;
 
 
 implementation
 implementation
 
 
    { thlbasecgcpu }
    { thlbasecgcpu }
 
 
-    procedure thlbasecgcpu.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tcgsize; src, dst: TRegister);
+    procedure thlbasecgcpu.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister);
       begin
       begin
         internalerror(2012042801);
         internalerror(2012042801);
       end;
       end;

+ 19 - 8
compiler/cgobj.pas

@@ -247,7 +247,7 @@ unit cgobj;
           procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);virtual; abstract;
           procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);virtual; abstract;
 
 
           { bit scan instructions }
           { bit scan instructions }
-          procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tcgsize; src, dst: TRegister); virtual;
+          procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); virtual;
 
 
           { Multiplication with doubling result size.
           { Multiplication with doubling result size.
             dstlo or dsthi may be NR_NO, in which case corresponding half of result is discarded. }
             dstlo or dsthi may be NR_NO, in which case corresponding half of result is discarded. }
@@ -865,7 +865,14 @@ implementation
          ref : treference;
          ref : treference;
          tmpreg : tregister;
          tmpreg : tregister;
       begin
       begin
-         cgpara.check_simple_location;
+         if assigned(cgpara.location^.next) then
+           begin
+             tg.gethltemp(list,cgpara.def,cgpara.def.size,tt_persistent,ref);
+             a_load_reg_ref(list,size,size,r,ref);
+             a_load_ref_cgpara(list,size,ref,cgpara);
+             tg.ungettemp(list,ref);
+             exit;
+           end;
          paramanager.alloccgpara(list,cgpara);
          paramanager.alloccgpara(list,cgpara);
          if cgpara.location^.shiftval<0 then
          if cgpara.location^.shiftval<0 then
            begin
            begin
@@ -1333,8 +1340,10 @@ implementation
                       dec(tmpref.offset)
                       dec(tmpref.offset)
                     else
                     else
                       inc(tmpref.offset);
                       inc(tmpref.offset);
-                    a_load_ref_reg(list,OS_8,OS_16,tmpref,register);
-                    a_op_reg_reg(list,OP_OR,OS_16,tmpreg,register);
+                    tmpreg2:=makeregsize(list,register,OS_16);
+                    a_load_ref_reg(list,OS_8,OS_16,tmpref,tmpreg2);
+                    a_op_reg_reg(list,OP_OR,OS_16,tmpreg,tmpreg2);
+                    a_load_reg_reg(list,OS_16,tosize,tmpreg2,register);
                   end;
                   end;
               OS_32,OS_S32:
               OS_32,OS_S32:
                 if ref.alignment=2 then
                 if ref.alignment=2 then
@@ -1348,8 +1357,10 @@ implementation
                       dec(tmpref.offset,2)
                       dec(tmpref.offset,2)
                     else
                     else
                       inc(tmpref.offset,2);
                       inc(tmpref.offset,2);
-                    a_load_ref_reg(list,OS_16,OS_32,tmpref,register);
-                    a_op_reg_reg(list,OP_OR,OS_32,tmpreg,register);
+                    tmpreg2:=makeregsize(list,register,OS_32);
+                    a_load_ref_reg(list,OS_16,OS_32,tmpref,tmpreg2);
+                    a_op_reg_reg(list,OP_OR,OS_32,tmpreg,tmpreg2);
+                    a_load_reg_reg(list,OS_32,tosize,tmpreg2,register);
                   end
                   end
                 else
                 else
                   begin
                   begin
@@ -1368,7 +1379,7 @@ implementation
                         a_load_ref_reg(list,OS_8,OS_32,tmpref,tmpreg2);
                         a_load_ref_reg(list,OS_8,OS_32,tmpref,tmpreg2);
                         a_op_reg_reg(list,OP_OR,OS_32,tmpreg2,tmpreg);
                         a_op_reg_reg(list,OP_OR,OS_32,tmpreg2,tmpreg);
                       end;
                       end;
-                    a_load_reg_reg(list,OS_32,OS_32,tmpreg,register);
+                    a_load_reg_reg(list,OS_32,tosize,tmpreg,register);
                   end
                   end
               else
               else
                 a_load_ref_reg(list,fromsize,tosize,tmpref,register);
                 a_load_ref_reg(list,fromsize,tosize,tmpref,register);
@@ -2517,7 +2528,7 @@ implementation
       end;
       end;
 
 
 
 
-    procedure tcg.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tcgsize; src, dst: TRegister);
+    procedure tcg.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister);
       begin
       begin
         internalerror(2014070601);
         internalerror(2014070601);
       end;
       end;

+ 6 - 0
compiler/cgutils.pas

@@ -63,6 +63,12 @@ unit cgutils;
          addressmode : taddressmode;
          addressmode : taddressmode;
          shiftmode   : tshiftmode;
          shiftmode   : tshiftmode;
 {$endif arm}
 {$endif arm}
+{$ifdef aarch64}
+         symboldata  : tlinkedlistitem;
+         shiftimm    : byte;
+         addressmode : taddressmode;
+         shiftmode   : tshiftmode;
+{$endif aarch64}
 {$ifdef avr}
 {$ifdef avr}
          addressmode : taddressmode;
          addressmode : taddressmode;
 {$endif avr}
 {$endif avr}

+ 1 - 1
compiler/finput.pas

@@ -454,7 +454,7 @@ uses
         fileopen:=false;
         fileopen:=false;
         try
         try
           f:=CFileStreamClass.Create(filename,fmOpenRead);
           f:=CFileStreamClass.Create(filename,fmOpenRead);
-          fileopen:=true;
+          fileopen:=CStreamError=0;
         except
         except
         end;
         end;
       end;
       end;

+ 1 - 0
compiler/fpcdefs.inc

@@ -242,6 +242,7 @@
   {$define cpurox}
   {$define cpurox}
   {$define cputargethasfixedstack}
   {$define cputargethasfixedstack}
   {$define cpurefshaveindexreg}
   {$define cpurefshaveindexreg}
+  {$define SUPPORT_GET_FRAME}
 {$endif aarch64}
 {$endif aarch64}
 
 
 {$IFDEF MACOS}
 {$IFDEF MACOS}

+ 3 - 1
compiler/fppu.pas

@@ -23,6 +23,8 @@ unit fppu;
 
 
 {$i fpcdefs.inc}
 {$i fpcdefs.inc}
 
 
+{ $define DEBUG_UNIT_CRC_CHANGES}
+
 { close ppufiles on system that are
 { close ppufiles on system that are
   short on file handles like DOS system PM }
   short on file handles like DOS system PM }
 {$ifdef GO32V2}
 {$ifdef GO32V2}
@@ -1477,7 +1479,7 @@ var
         { we can now derefence all pointers to the implementation parts }
         { we can now derefence all pointers to the implementation parts }
         tstoredsymtable(globalsymtable).derefimpl;
         tstoredsymtable(globalsymtable).derefimpl;
         if assigned(localsymtable) then
         if assigned(localsymtable) then
-          tstoredsymtable(localsymtable).derefimpl;
+            tstoredsymtable(localsymtable).derefimpl;
 
 
          { read whole program optimisation-related information }
          { read whole program optimisation-related information }
          wpoinfo:=tunitwpoinfo.ppuload(ppufile);
          wpoinfo:=tunitwpoinfo.ppuload(ppufile);

+ 2 - 2
compiler/globals.pas

@@ -445,8 +445,8 @@ interface
         fputype : fpu_hard;
         fputype : fpu_hard;
   {$endif sparc}
   {$endif sparc}
   {$ifdef arm}
   {$ifdef arm}
-        cputype : cpu_armv3;
-        optimizecputype : cpu_armv3;
+        cputype : cpu_armv4;
+        optimizecputype : cpu_armv4;
         fputype : fpu_fpa;
         fputype : fpu_fpa;
   {$endif arm}
   {$endif arm}
   {$ifdef x86_64}
   {$ifdef x86_64}

+ 3 - 3
compiler/hlcg2ll.pas

@@ -171,7 +171,7 @@ unit hlcg2ll;
           procedure a_loadaddr_ref_reg(list : TAsmList;fromsize, tosize : tdef;const ref : treference;r : tregister);override;
           procedure a_loadaddr_ref_reg(list : TAsmList;fromsize, tosize : tdef;const ref : treference;r : tregister);override;
 
 
           { bit scan instructions }
           { bit scan instructions }
-          procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tdef; src, dst: tregister); override;
+          procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tdef; src, dst: tregister); override;
 
 
           { fpu move instructions }
           { fpu move instructions }
           procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister); override;
           procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister); override;
@@ -586,9 +586,9 @@ implementation
       cg.a_loadaddr_ref_reg(list,ref,r);
       cg.a_loadaddr_ref_reg(list,ref,r);
     end;
     end;
 
 
-  procedure thlcg2ll.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tdef; src, dst: tregister);
+  procedure thlcg2ll.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tdef; src, dst: tregister);
     begin
     begin
-      cg.a_bit_scan_reg_reg(list,reverse,def_cgsize(size),src,dst);
+      cg.a_bit_scan_reg_reg(list,reverse,def_cgsize(srcsize),def_cgsize(dstsize),src,dst);
     end;
     end;
 
 
   procedure thlcg2ll.a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister);
   procedure thlcg2ll.a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister);

+ 19 - 7
compiler/hlcgobj.pas

@@ -301,7 +301,7 @@ unit hlcgobj;
          public
          public
 
 
           { bit scan instructions (still need transformation to thlcgobj) }
           { bit scan instructions (still need transformation to thlcgobj) }
-          procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tdef; src, dst: tregister); virtual; abstract;
+          procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tdef; src, dst: tregister); virtual; abstract;
 
 
           { fpu move instructions }
           { fpu move instructions }
           procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister); virtual; abstract;
           procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister); virtual; abstract;
@@ -536,9 +536,9 @@ unit hlcgobj;
          public
          public
 
 
           procedure gen_load_para_value(list:TAsmList);virtual;
           procedure gen_load_para_value(list:TAsmList);virtual;
-         protected
           { helpers called by gen_load_para_value }
           { helpers called by gen_load_para_value }
           procedure g_copyvalueparas(p:TObject;arg:pointer);virtual;
           procedure g_copyvalueparas(p:TObject;arg:pointer);virtual;
+         protected
           procedure gen_loadfpu_loc_cgpara(list: TAsmList; size: tdef; const l: tlocation;const cgpara: tcgpara;locintsize: longint);virtual;
           procedure gen_loadfpu_loc_cgpara(list: TAsmList; size: tdef; const l: tlocation;const cgpara: tcgpara;locintsize: longint);virtual;
           procedure init_paras(p:TObject;arg:pointer);
           procedure init_paras(p:TObject;arg:pointer);
          protected
          protected
@@ -4523,7 +4523,11 @@ implementation
          if (tparavarsym(p).varspez=vs_value) then
          if (tparavarsym(p).varspez=vs_value) then
           begin
           begin
             include(current_procinfo.flags,pi_needs_implicit_finally);
             include(current_procinfo.flags,pi_needs_implicit_finally);
-            location_get_data_ref(list,tparavarsym(p).vardef,tparavarsym(p).localloc,href,is_open_array(tparavarsym(p).vardef),sizeof(pint));
+            location_get_data_ref(list,tparavarsym(p).vardef,tparavarsym(p).localloc,href,
+              is_open_array(tparavarsym(p).vardef) or
+              ((target_info.system in systems_caller_copy_addr_value_para) and
+               paramanager.push_addr_param(vs_value,tparavarsym(p).vardef,current_procinfo.procdef.proccalloption)),
+              sizeof(pint));
             if is_open_array(tparavarsym(p).vardef) then
             if is_open_array(tparavarsym(p).vardef) then
               begin
               begin
                 if paramanager.push_high_param(tparavarsym(p).varspez,tparavarsym(p).vardef,current_procinfo.procdef.proccalloption) then
                 if paramanager.push_high_param(tparavarsym(p).varspez,tparavarsym(p).vardef,current_procinfo.procdef.proccalloption) then
@@ -4543,7 +4547,8 @@ implementation
           end;
           end;
        end;
        end;
       { open arrays can contain elements requiring init/final code, so the else has been removed here }
       { open arrays can contain elements requiring init/final code, so the else has been removed here }
-      if (tparavarsym(p).varspez=vs_value) and
+      if not(target_info.system in systems_caller_copy_addr_value_para) and
+         (tparavarsym(p).varspez=vs_value) and
          (is_open_array(tparavarsym(p).vardef) or
          (is_open_array(tparavarsym(p).vardef) or
           is_array_of_const(tparavarsym(p).vardef)) then
           is_array_of_const(tparavarsym(p).vardef)) then
         begin
         begin
@@ -4581,7 +4586,11 @@ implementation
                  if not((tparavarsym(p).vardef.typ=variantdef) and
                  if not((tparavarsym(p).vardef.typ=variantdef) and
                    paramanager.push_addr_param(tparavarsym(p).varspez,tparavarsym(p).vardef,current_procinfo.procdef.proccalloption)) then
                    paramanager.push_addr_param(tparavarsym(p).varspez,tparavarsym(p).vardef,current_procinfo.procdef.proccalloption)) then
                    begin
                    begin
-                     location_get_data_ref(list,tparavarsym(p).vardef,tparavarsym(p).initialloc,href,is_open_array(tparavarsym(p).vardef),sizeof(pint));
+                     location_get_data_ref(list,tparavarsym(p).vardef,tparavarsym(p).initialloc,href,
+                       is_open_array(tparavarsym(p).vardef) or
+                       ((target_info.system in systems_caller_copy_addr_value_para) and
+                        paramanager.push_addr_param(vs_value,tparavarsym(p).vardef,current_procinfo.procdef.proccalloption)),
+                       sizeof(pint));
                      if is_open_array(tparavarsym(p).vardef) then
                      if is_open_array(tparavarsym(p).vardef) then
                        begin
                        begin
                          if paramanager.push_high_param(tparavarsym(p).varspez,tparavarsym(p).vardef,current_procinfo.procdef.proccalloption) then
                          if paramanager.push_high_param(tparavarsym(p).varspez,tparavarsym(p).vardef,current_procinfo.procdef.proccalloption) then
@@ -4682,8 +4691,11 @@ implementation
     begin
     begin
       list:=TAsmList(arg);
       list:=TAsmList(arg);
       if (tsym(p).typ=paravarsym) and
       if (tsym(p).typ=paravarsym) and
-         (tparavarsym(p).varspez=vs_value) and
-        (paramanager.push_addr_param(tparavarsym(p).varspez,tparavarsym(p).vardef,current_procinfo.procdef.proccalloption)) then
+         ((vo_has_local_copy in tparavarsym(p).varoptions) or
+          (not(target_info.system in systems_caller_copy_addr_value_para) and
+           (is_open_array(tparavarsym(p).vardef) or
+            is_array_of_const(tparavarsym(p).vardef)) and
+           (tparavarsym(p).varspez=vs_value))) then
         begin
         begin
           { we have no idea about the alignment at the caller side }
           { we have no idea about the alignment at the caller side }
           location_get_data_ref(list,tparavarsym(p).vardef,tparavarsym(p).initialloc,href,true,1);
           location_get_data_ref(list,tparavarsym(p).vardef,tparavarsym(p).initialloc,href,true,1);

+ 9 - 8
compiler/i386/cpuinfo.pas

@@ -151,7 +151,8 @@ Const
 
 
 type
 type
    tcpuflags =
    tcpuflags =
-      (CPUX86_HAS_SSEUNIT,
+      (CPUX86_HAS_CMOV,
+       CPUX86_HAS_SSEUNIT,
        CPUX86_HAS_BMI1,
        CPUX86_HAS_BMI1,
        CPUX86_HAS_BMI2,
        CPUX86_HAS_BMI2,
        CPUX86_HAS_POPCNT,
        CPUX86_HAS_POPCNT,
@@ -167,13 +168,13 @@ type
      { cpu_none      } [],
      { cpu_none      } [],
      { cpu_386       } [],
      { cpu_386       } [],
      { cpu_Pentium   } [],
      { cpu_Pentium   } [],
-     { cpu_Pentium2  } [],
-     { cpu_Pentium3  } [CPUX86_HAS_SSEUNIT],
-     { cpu_Pentium4  } [CPUX86_HAS_SSEUNIT],
-     { cpu_PentiumM  } [CPUX86_HAS_SSEUNIT],
-     { cpu_core_i    } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT],
-     { cpu_core_avx  } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT],
-     { cpu_core_avx2 } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT,CPUX86_HAS_BMI1,CPUX86_HAS_BMI2,CPUX86_HAS_LZCNT,CPUX86_HAS_MOVBE,CPUX86_HAS_FMA]
+     { cpu_Pentium2  } [CPUX86_HAS_CMOV],
+     { cpu_Pentium3  } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT],
+     { cpu_Pentium4  } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT],
+     { cpu_PentiumM  } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT],
+     { cpu_core_i    } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT],
+     { cpu_core_avx  } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT],
+     { cpu_core_avx2 } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT,CPUX86_HAS_BMI1,CPUX86_HAS_BMI2,CPUX86_HAS_LZCNT,CPUX86_HAS_MOVBE,CPUX86_HAS_FMA]
    );
    );
 
 
 
 

+ 8 - 0
compiler/i386/i386att.inc

@@ -956,6 +956,14 @@
 'vpsravd',
 'vpsravd',
 'vpsrlvd',
 'vpsrlvd',
 'vpsrlvq',
 'vpsrlvq',
+'vgatherdpd',
+'vgatherdps',
+'vgatherqpd',
+'vgatherqps',
+'vpgatherdd',
+'vpgatherdq',
+'vpgatherqd',
+'vpgatherqq',
 'vfmadd132pd',
 'vfmadd132pd',
 'vfmadd213pd',
 'vfmadd213pd',
 'vfmadd231pd',
 'vfmadd231pd',

+ 8 - 0
compiler/i386/i386atts.inc

@@ -1016,5 +1016,13 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufNONE
 attsufNONE
 );
 );

+ 8 - 0
compiler/i386/i386int.inc

@@ -956,6 +956,14 @@
 'vpsravd',
 'vpsravd',
 'vpsrlvd',
 'vpsrlvd',
 'vpsrlvq',
 'vpsrlvq',
+'vgatherdpd',
+'vgatherdps',
+'vgatherqpd',
+'vgatherqps',
+'vpgatherdd',
+'vpgatherdq',
+'vpgatherqd',
+'vpgatherqq',
 'vfmadd132pd',
 'vfmadd132pd',
 'vfmadd213pd',
 'vfmadd213pd',
 'vfmadd231pd',
 'vfmadd231pd',

+ 1 - 1
compiler/i386/i386nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
 { don't edit, this file is generated from x86ins.dat }
-1926;
+1942;

+ 8 - 0
compiler/i386/i386op.inc

@@ -956,6 +956,14 @@ A_VPSLLVQ,
 A_VPSRAVD,
 A_VPSRAVD,
 A_VPSRLVD,
 A_VPSRLVD,
 A_VPSRLVQ,
 A_VPSRLVQ,
+A_VGATHERDPD,
+A_VGATHERDPS,
+A_VGATHERQPD,
+A_VGATHERQPS,
+A_VPGATHERDD,
+A_VPGATHERDQ,
+A_VPGATHERQD,
+A_VPGATHERQQ,
 A_VFMADD132PD,
 A_VFMADD132PD,
 A_VFMADD213PD,
 A_VFMADD213PD,
 A_VFMADD231PD,
 A_VFMADD231PD,

+ 9 - 1
compiler/i386/i386prop.inc

@@ -910,7 +910,7 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
-(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
@@ -956,6 +956,14 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),

+ 112 - 0
compiler/i386/i386tab.inc

@@ -12796,6 +12796,118 @@
     code    : #241#242#243#249#1#69#61#80;
     code    : #241#242#243#249#1#69#61#80;
     flags   : if_avx2
     flags   : if_avx2
   ),
   ),
+  (
+    opcode  : A_VGATHERDPD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERDPD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_xmem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERDPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERDPS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem32,ot_ymmreg,ot_none);
+    code    : #241#242#244#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_ymem32,ot_xmmreg,ot_none);
+    code    : #241#242#244#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem32,ot_ymmreg,ot_none);
+    code    : #241#242#244#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDQ;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_xmem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#145#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_ymem32,ot_xmmreg,ot_none);
+    code    : #241#242#244#249#1#145#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#145#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQQ;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#145#62#72;
+    flags   : if_avx2
+  ),
   (
   (
     opcode  : A_VFMADD132PD;
     opcode  : A_VFMADD132PD;
     ops     : 3;
     ops     : 3;

+ 1 - 1
compiler/i386/popt386.pas

@@ -2076,7 +2076,7 @@ begin
                             end;
                             end;
                         end;
                         end;
                     end;
                     end;
-                  if (current_settings.cputype>=cpu_Pentium2) then
+                  if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
                     begin
                     begin
                        { check for
                        { check for
                               jCC   xxx
                               jCC   xxx

+ 8 - 0
compiler/i8086/i8086att.inc

@@ -956,6 +956,14 @@
 'vpsravd',
 'vpsravd',
 'vpsrlvd',
 'vpsrlvd',
 'vpsrlvq',
 'vpsrlvq',
+'vgatherdpd',
+'vgatherdps',
+'vgatherqpd',
+'vgatherqps',
+'vpgatherdd',
+'vpgatherdq',
+'vpgatherqd',
+'vpgatherqq',
 'add4s',
 'add4s',
 'brkem',
 'brkem',
 'clr1',
 'clr1',

+ 8 - 0
compiler/i8086/i8086atts.inc

@@ -1030,5 +1030,13 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufNONE
 attsufNONE
 );
 );

+ 8 - 0
compiler/i8086/i8086int.inc

@@ -956,6 +956,14 @@
 'vpsravd',
 'vpsravd',
 'vpsrlvd',
 'vpsrlvd',
 'vpsrlvq',
 'vpsrlvq',
+'vgatherdpd',
+'vgatherdps',
+'vgatherqpd',
+'vgatherqps',
+'vpgatherdd',
+'vpgatherdq',
+'vpgatherqd',
+'vpgatherqq',
 'add4s',
 'add4s',
 'brkem',
 'brkem',
 'clr1',
 'clr1',

+ 1 - 1
compiler/i8086/i8086nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
 { don't edit, this file is generated from x86ins.dat }
-1954;
+1970;

+ 8 - 0
compiler/i8086/i8086op.inc

@@ -956,6 +956,14 @@ A_VPSLLVQ,
 A_VPSRAVD,
 A_VPSRAVD,
 A_VPSRLVD,
 A_VPSRLVD,
 A_VPSRLVQ,
 A_VPSRLVQ,
+A_VGATHERDPD,
+A_VGATHERDPS,
+A_VGATHERQPD,
+A_VGATHERQPS,
+A_VPGATHERDD,
+A_VPGATHERDQ,
+A_VPGATHERQD,
+A_VPGATHERQQ,
 A_ADD4S,
 A_ADD4S,
 A_BRKEM,
 A_BRKEM,
 A_CLR1,
 A_CLR1,

+ 9 - 1
compiler/i8086/i8086prop.inc

@@ -910,7 +910,7 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
-(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
@@ -958,6 +958,14 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),

+ 112 - 0
compiler/i8086/i8086tab.inc

@@ -12796,6 +12796,118 @@
     code    : #241#242#243#249#1#69#61#80;
     code    : #241#242#243#249#1#69#61#80;
     flags   : if_avx2
     flags   : if_avx2
   ),
   ),
+  (
+    opcode  : A_VGATHERDPD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERDPD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_xmem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERDPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERDPS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem32,ot_ymmreg,ot_none);
+    code    : #241#242#244#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_ymem32,ot_xmmreg,ot_none);
+    code    : #241#242#244#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem32,ot_ymmreg,ot_none);
+    code    : #241#242#244#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDQ;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_xmem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#145#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_ymem32,ot_xmmreg,ot_none);
+    code    : #241#242#244#249#1#145#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#145#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQQ;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#145#62#72;
+    flags   : if_avx2
+  ),
   (
   (
     opcode  : A_ADD4S;
     opcode  : A_ADD4S;
     ops     : 0;
     ops     : 0;

+ 5 - 4
compiler/i8086/n8086mem.pas

@@ -27,6 +27,7 @@ interface
 
 
     uses
     uses
       globtype,
       globtype,
+      symtype,
       cgbase,cpuinfo,cpubase,
       cgbase,cpuinfo,cpubase,
       node,nmem,ncgmem,nx86mem,ni86mem;
       node,nmem,ncgmem,nx86mem,ni86mem;
 
 
@@ -45,7 +46,7 @@ interface
        ti8086vecnode = class(tcgvecnode)
        ti8086vecnode = class(tcgvecnode)
         protected
         protected
          function first_arraydef: tnode;override;
          function first_arraydef: tnode;override;
-         procedure update_reference_reg_mul(maybe_const_reg:tregister;l:aint);override;
+         procedure update_reference_reg_mul(maybe_const_reg: tregister; regsize: tdef; l: aint);override;
        end;
        end;
 
 
 implementation
 implementation
@@ -53,7 +54,7 @@ implementation
     uses
     uses
       systems,globals,constexp,
       systems,globals,constexp,
       cutils,verbose,
       cutils,verbose,
-      symbase,symconst,symdef,symtable,symtype,symsym,symx86,symcpu,
+      symbase,symconst,symdef,symtable,symsym,symx86,symcpu,
       parabase,paramgr,
       parabase,paramgr,
       aasmtai,aasmdata,
       aasmtai,aasmdata,
       nld,ncon,nadd,ncal,ncnv,
       nld,ncon,nadd,ncal,ncnv,
@@ -212,13 +213,13 @@ implementation
       end;
       end;
 
 
 
 
-    procedure ti8086vecnode.update_reference_reg_mul(maybe_const_reg:tregister;l:aint);
+    procedure ti8086vecnode.update_reference_reg_mul(maybe_const_reg: tregister; regsize: tdef; l: aint);
       var
       var
         saveseg: TRegister;
         saveseg: TRegister;
       begin
       begin
         saveseg:=location.reference.segment;
         saveseg:=location.reference.segment;
         location.reference.segment:=NR_NO;
         location.reference.segment:=NR_NO;
-        inherited update_reference_reg_mul(maybe_const_reg,l);
+        inherited;
         location.reference.segment:=saveseg;
         location.reference.segment:=saveseg;
       end;
       end;
 
 

+ 2 - 2
compiler/jvm/hlcgcpu.pas

@@ -114,7 +114,7 @@ uses
       procedure gen_exit_code(list: TAsmList); override;
       procedure gen_exit_code(list: TAsmList); override;
 
 
       { unimplemented/unnecessary routines }
       { unimplemented/unnecessary routines }
-      procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tdef; src, dst: tregister); override;
+      procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tdef; src, dst: tregister); override;
       procedure a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tdef; const loc: tlocation; const reg: tregister; shuffle: pmmshuffle); override;
       procedure a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tdef; const loc: tlocation; const reg: tregister; shuffle: pmmshuffle); override;
       procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister; shuffle: pmmshuffle); override;
       procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister; shuffle: pmmshuffle); override;
       procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tdef; const ref: treference; reg: tregister; shuffle: pmmshuffle); override;
       procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tdef; const ref: treference; reg: tregister; shuffle: pmmshuffle); override;
@@ -1884,7 +1884,7 @@ implementation
       { nothing }
       { nothing }
     end;
     end;
 
 
-  procedure thlcgjvm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tdef; src, dst: tregister);
+  procedure thlcgjvm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tdef; src, dst: tregister);
     begin
     begin
       internalerror(2012090201);
       internalerror(2012090201);
     end;
     end;

+ 2 - 2
compiler/link.pas

@@ -620,8 +620,8 @@ Implementation
         FillChar(Info,sizeof(Info),0);
         FillChar(Info,sizeof(Info),0);
         if cs_link_on_target in current_settings.globalswitches then
         if cs_link_on_target in current_settings.globalswitches then
           begin
           begin
-            Info.ResName:=outputexedir+ChangeFileExt(inputfilename,'_link.res');
-            Info.ScriptName:=outputexedir+ChangeFileExt(inputfilename,'_script.res');
+            Info.ResName:=ChangeFileExt(inputfilename,'_link.res');
+            Info.ScriptName:=ChangeFileExt(inputfilename,'_script.res');
           end
           end
         else
         else
           begin
           begin

+ 10 - 1
compiler/m68k/aasmcpu.pas

@@ -496,13 +496,22 @@ type
           // FPU opcodes
           // FPU opcodes
           A_FSXX, A_FSEQ, A_FSNE, A_FSLT, A_FSLE, A_FSGT, A_FSGE:
           A_FSXX, A_FSEQ, A_FSNE, A_FSLT, A_FSLE, A_FSGT, A_FSGE:
              result:=operand_write;
              result:=operand_write;
+          A_FABS,A_FSQRT,A_FNEG:
+             if ops = 1 then
+               begin
+                 if opnr = 0 then
+                   result:=operand_readwrite;
+               end
+             else
+               if opnr = 1 then
+                 result:=operand_write;
           A_FMOVE:
           A_FMOVE:
              if opnr=1 then
              if opnr=1 then
                result:=operand_write;
                result:=operand_write;
           A_FADD, A_FSUB, A_FMUL, A_FDIV:
           A_FADD, A_FSUB, A_FMUL, A_FDIV:
              if opnr=1 then
              if opnr=1 then
                result:=operand_readwrite;
                result:=operand_readwrite;
-          A_FCMP:
+          A_FCMP, A_FTST:
              begin end; { operand_read }
              begin end; { operand_read }
 
 
           else begin
           else begin

+ 4 - 4
compiler/m68k/cgcpu.pas

@@ -1840,7 +1840,7 @@ unit cgcpu;
             if saved_fpu_registers[r] in rg[R_FPUREGISTER].used_in_proc then
             if saved_fpu_registers[r] in rg[R_FPUREGISTER].used_in_proc then
               begin
               begin
                 hfreg:=newreg(R_FPUREGISTER,saved_fpu_registers[r],R_SUBWHOLE);
                 hfreg:=newreg(R_FPUREGISTER,saved_fpu_registers[r],R_SUBWHOLE);
-                inc(fsize,10{sizeof(extended)});
+                inc(fsize,12{sizeof(extended)});
                 fpuregs:=fpuregs + [saved_fpu_registers[r]];
                 fpuregs:=fpuregs + [saved_fpu_registers[r]];
               end;
               end;
 
 
@@ -1873,7 +1873,7 @@ unit cgcpu;
               begin
               begin
                 { size is always longword aligned, while fsize is not }
                 { size is always longword aligned, while fsize is not }
                 inc(href.offset,size);
                 inc(href.offset,size);
-                if fsize = 10{sizeof(extended)} then
+                if fsize = 12{sizeof(extended)} then
                   list.concat(taicpu.op_reg_ref(A_FMOVE,S_FX,hfreg,href))
                   list.concat(taicpu.op_reg_ref(A_FMOVE,S_FX,hfreg,href))
                 else
                 else
                   list.concat(taicpu.op_regset_ref(A_FMOVEM,S_FX,[],[],fpuregs,href));
                   list.concat(taicpu.op_regset_ref(A_FMOVEM,S_FX,[],[],fpuregs,href));
@@ -1931,7 +1931,7 @@ unit cgcpu;
           for r:=low(saved_address_registers) to high(saved_address_registers) do
           for r:=low(saved_address_registers) to high(saved_address_registers) do
             if saved_fpu_registers[r] in rg[R_FPUREGISTER].used_in_proc then
             if saved_fpu_registers[r] in rg[R_FPUREGISTER].used_in_proc then
               begin
               begin
-                inc(fsize,10{sizeof(extended)});
+                inc(fsize,12{sizeof(extended)});
                 hfreg:=newreg(R_FPUREGISTER,saved_address_registers[r],R_SUBWHOLE);
                 hfreg:=newreg(R_FPUREGISTER,saved_address_registers[r],R_SUBWHOLE);
                 { Allocate register so the optimizer does not remove the load }
                 { Allocate register so the optimizer does not remove the load }
                 a_reg_alloc(list,hfreg);
                 a_reg_alloc(list,hfreg);
@@ -1961,7 +1961,7 @@ unit cgcpu;
           begin
           begin
             { size is always longword aligned, while fsize is not }
             { size is always longword aligned, while fsize is not }
             inc(href.offset,size);
             inc(href.offset,size);
-            if fsize = 10{sizeof(extended)} then
+            if fsize = 12{sizeof(extended)} then
               list.concat(taicpu.op_ref_reg(A_FMOVE,S_FX,href,hfreg))
               list.concat(taicpu.op_ref_reg(A_FMOVE,S_FX,href,hfreg))
             else
             else
               list.concat(taicpu.op_ref_regset(A_FMOVEM,S_FX,href,[],[],fpuregs));
               list.concat(taicpu.op_ref_regset(A_FMOVEM,S_FX,href,[],[],fpuregs));

+ 1 - 2
compiler/m68k/cpunode.pas

@@ -40,11 +40,10 @@ unit cpunode;
 //       nppcflw,
 //       nppcflw,
          n68kmem,
          n68kmem,
 //       nppcset,
 //       nppcset,
-//       nppcinl,
+         n68kinl,
 //       nppcopt,
 //       nppcopt,
        { this not really a node }
        { this not really a node }
 //       nppcobj,
 //       nppcobj,
-//       nppcmat,
          n68kmat,
          n68kmat,
          n68kcnv,
          n68kcnv,
          { symtable }
          { symtable }

+ 36 - 18
compiler/m68k/n68kadd.pas

@@ -117,6 +117,7 @@ implementation
     procedure t68kaddnode.second_addfloat;
     procedure t68kaddnode.second_addfloat;
       var
       var
         op    : TAsmOp;
         op    : TAsmOp;
+        href  : TReference;
       begin
       begin
         pass_left_right;
         pass_left_right;
 
 
@@ -141,17 +142,27 @@ implementation
         case current_settings.fputype of
         case current_settings.fputype of
           fpu_68881:
           fpu_68881:
             begin
             begin
-              // put both operands in a register
-              hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
+              { have left in the register, right can be a memory location }
               hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
               hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
 
 
-              // initialize the result
+              { initialize the result }
               location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
               location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
               location.register := cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
               location.register := cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
 
 
-              // emit the actual operation
-              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FMOVE,S_FX,left.location.register,location.register));
-              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_FX,right.location.register,location.register));
+              { emit the actual operation }
+              cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmlist,OS_NO,OS_NO,left.location.register,location.register);
+              case right.location.loc of
+                LOC_FPUREGISTER,LOC_CFPUREGISTER:
+                    current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_FX,right.location.register,location.register));
+                LOC_REFERENCE,LOC_CREFERENCE:
+                    begin
+                      href:=right.location.reference;
+                      tcg68k(cg).fixref(current_asmdata.CurrAsmList,href);
+                      current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,tcgsize2opsize[right.location.size],href,location.register));
+                    end
+                else
+                  internalerror(2015021501);
+              end;
             end;
             end;
           else
           else
             // softfpu should be handled in pass1, others are not yet supported...
             // softfpu should be handled in pass1, others are not yet supported...
@@ -164,6 +175,7 @@ implementation
       var
       var
         tmpreg : tregister;
         tmpreg : tregister;
         ai: taicpu;
         ai: taicpu;
+        href  : TReference;
       begin
       begin
         pass_left_right;
         pass_left_right;
         if (nf_swapped in flags) then
         if (nf_swapped in flags) then
@@ -172,28 +184,34 @@ implementation
         case current_settings.fputype of
         case current_settings.fputype of
           fpu_68881:
           fpu_68881:
             begin
             begin
-              location_reset(location,LOC_FLAGS,OS_NO);
-
-              { force fpureg as location, left right doesn't matter
-                as both will be in a fpureg }
+              { force left fpureg as register, right can be reference }
               hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
               hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
-              hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
 
 
-              // emit compare
-              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCMP,S_FX,right.location.register,left.location.register));
-
-              location.resflags:=getresflags(false);
+              { emit compare }
+              case right.location.loc of
+                LOC_FPUREGISTER,LOC_CFPUREGISTER:
+                    current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCMP,S_FX,right.location.register,left.location.register));
+                LOC_REFERENCE,LOC_CREFERENCE:
+                    begin
+                      href:=right.location.reference;
+                      tcg68k(cg).fixref(current_asmdata.CurrAsmList,href);
+                      current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_FCMP,tcgsize2opsize[right.location.size],href,left.location.register));
+                    end
+                else
+                  internalerror(2015021502);
+              end;
 
 
               // temporary(?) hack, move condition result back to the CPU from the FPU.
               // temporary(?) hack, move condition result back to the CPU from the FPU.
               // 6888x has its own FBcc branch instructions and FScc flags->reg instruction,
               // 6888x has its own FBcc branch instructions and FScc flags->reg instruction,
               // which we don't support yet in the rest of the cg. (KB)
               // which we don't support yet in the rest of the cg. (KB)
               tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,OS_8);
               tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,OS_8);
               ai:=taicpu.op_reg(A_FSxx,S_B,tmpreg);
               ai:=taicpu.op_reg(A_FSxx,S_B,tmpreg);
-              ai.SetCondition(flags_to_cond(location.resflags));
+              ai.SetCondition(flags_to_cond(getresflags(false)));
               current_asmdata.CurrAsmList.concat(ai);
               current_asmdata.CurrAsmList.concat(ai);
-              current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_TST,S_B,tmpreg));
-              location.resflags:=F_E;
+              current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_NEG,S_B,tmpreg));
 
 
+              location_reset(location,LOC_REGISTER,OS_8);
+              location.register:=tmpreg;
             end;
             end;
           else
           else
             // softfpu should be handled in pass1, others are not yet supported...
             // softfpu should be handled in pass1, others are not yet supported...

+ 212 - 0
compiler/m68k/n68kinl.pas

@@ -0,0 +1,212 @@
+{
+    Copyright (c) 2015 by the Free Pascal Development team
+
+    Generates Motorola 68k inline nodes
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit n68kinl;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+      node,ninl,ncginl,cpubase;
+
+    type
+      t68kinlinenode = class(tcgInlineNode)
+        function first_abs_real: tnode; override;
+        function first_sqr_real: tnode; override;
+        function first_sqrt_real: tnode; override;
+        {function first_arctan_real: tnode; override;
+        function first_ln_real: tnode; override;
+        function first_cos_real: tnode; override;
+        function first_sin_real: tnode; override;}
+
+        procedure second_abs_real; override;
+        procedure second_sqr_real; override;
+        procedure second_sqrt_real; override;
+        {procedure second_arctan_real; override;
+        procedure second_ln_real; override;
+        procedure second_cos_real; override;
+        procedure second_sin_real; override;
+        procedure second_prefetch; override;
+        procedure second_abs_long; override;}
+      private
+        procedure second_do_operation(op: TAsmOp);
+      end;
+
+
+implementation
+
+    uses
+      globtype,verbose,globals,cutils,
+      cpuinfo,defutil,symdef,aasmdata,aasmcpu,aasmtai,
+      cgbase,cgutils,pass_1,pass_2,
+      ncgutil,cgobj,cgcpu,hlcgobj;
+
+{*****************************************************************************
+                              t68kinlinenode
+*****************************************************************************}
+
+    function t68kinlinenode.first_abs_real : tnode;
+      begin
+        if (cs_fp_emulation in current_settings.moduleswitches) then
+          result:=inherited first_abs_real
+        else
+          begin
+            case current_settings.fputype of
+              fpu_68881:
+                expectloc:=LOC_FPUREGISTER;
+              else
+                internalerror(2015022206);
+            end;
+            first_abs_real:=nil;
+          end;
+      end;
+
+    function t68kinlinenode.first_sqr_real : tnode;
+      begin
+        if (cs_fp_emulation in current_settings.moduleswitches) then
+          result:=inherited first_sqr_real
+        else
+          begin
+            case current_settings.fputype of
+              fpu_68881:
+                expectloc:=LOC_FPUREGISTER;
+              else
+                internalerror(2015022201);
+            end;
+            first_sqr_real:=nil;
+          end;
+      end;
+
+    function t68kinlinenode.first_sqrt_real : tnode;
+      begin
+        if (cs_fp_emulation in current_settings.moduleswitches) then
+          result:=inherited first_sqrt_real
+        else
+          begin
+            case current_settings.fputype of
+              fpu_68881:
+                expectloc:=LOC_FPUREGISTER;
+              else
+                internalerror(2015022203);
+            end;
+            first_sqrt_real:=nil;
+          end;
+      end;
+
+    procedure t68kinlinenode.second_abs_real;
+      begin
+        //current_asmdata.CurrAsmList.concat(tai_comment.create(strpnew('second_abs_real called!')));
+        second_do_operation(A_FABS);
+      end;
+
+    procedure t68kinlinenode.second_sqr_real;
+      begin
+        secondpass(left);
+        case current_settings.fputype of
+          fpu_68881:
+            begin
+              //current_asmdata.CurrAsmList.concat(tai_comment.create(strpnew('second_sqr_real called!')));
+              hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+              location_copy(location,left.location);
+              if left.location.loc=LOC_CFPUREGISTER then
+                begin
+                  //current_asmdata.CurrAsmList.concat(tai_comment.create(strpnew('second_srq_real called!: left was cfpuregister!')));
+                  location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
+                  location.loc := LOC_FPUREGISTER;
+                  cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmlist,OS_NO,OS_NO,left.location.register,location.register);
+                end;
+              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FMUL,S_FX,left.location.register,location.register));
+            end;
+        else
+          internalerror(2015022202);
+        end;
+      end;
+
+    procedure t68kinlinenode.second_sqrt_real;
+      begin
+        //current_asmdata.CurrAsmList.concat(tai_comment.create(strpnew('second_sqrt_real called!')));
+        second_do_operation(A_FSQRT);
+      end;
+
+    procedure t68kinlinenode.second_do_operation(op: TAsmOp);
+      var
+        href: TReference;
+      begin
+        secondpass(left);
+        case current_settings.fputype of
+          fpu_68881:
+            begin
+              location_reset(location,LOC_FPUREGISTER,left.location.size);
+
+              case left.location.loc of
+                LOC_FPUREGISTER:
+                  begin
+                    location.register:=left.location.register;
+                    current_asmdata.CurrAsmList.concat(taicpu.op_reg(op,S_FX,location.register))
+                  end;
+                LOC_CFPUREGISTER:
+                  begin
+                    location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
+                    current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_FX,left.location.register,location.register));
+                  end;
+                LOC_REFERENCE,LOC_CREFERENCE:
+                  begin
+                    location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
+                    href:=left.location.reference;
+                    tcg68k(cg).fixref(current_asmdata.CurrAsmList,href);
+                    current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,tcgsize2opsize[left.location.size],href,location.register));
+                  end;
+                else
+                  internalerror(2015022205);
+              end;
+            end;
+        else
+          internalerror(2015022204);
+        end;
+      end;
+
+      { ideas for second_abs_long (KB) }
+
+      { This is probably faster on 68000 than the generic implementation,
+        because shifting is slow on the original 68000, maybe also on the 68020?
+        Also needs to be tested on 040/060. This can also work on a CF.
+        input - d0, output - d2
+        move.l d0,d2
+        btst   #31,d2
+        sne    d1
+        extb.l d1 (or ext.w + ext.l on 68000)
+        eor.l  d1,d2
+        sub.l  d1,d2
+      }
+
+      { Solution using bitfield extraction, we don't support the necessary asm
+        construct for this yet, probably this is the fastest on 020, slower on
+        040/060 than the one above, doesn't work on '000 or CF.
+        input - d0, output - d2
+        move.l  d0,d2
+        bfexts  d0[0:1],d1
+        eor.l   d1,d2
+        sub.l   d1,d2
+      }
+begin
+  cinlinenode:=t68kinlinenode;
+end.

+ 43 - 1
compiler/m68k/n68kmat.pas

@@ -42,6 +42,10 @@ interface
         procedure emit_mod_reg_reg(signed: boolean;denum,num : tregister);override;
         procedure emit_mod_reg_reg(signed: boolean;denum,num : tregister);override;
       end;
       end;
 
 
+      tm68kunaryminusnode = class(tcgunaryminusnode)
+        procedure second_float;override;
+      end;
+
       tm68kshlshrnode = class(tshlshrnode)
       tm68kshlshrnode = class(tshlshrnode)
          procedure pass_generate_code;override;
          procedure pass_generate_code;override;
          { everything will be handled in pass_2 }
          { everything will be handled in pass_2 }
@@ -172,6 +176,43 @@ implementation
     end;
     end;
 
 
 
 
+{*****************************************************************************
+                          TM68KUNARYMINUSNODE
+*****************************************************************************}
+
+    procedure tm68kunaryminusnode.second_float;
+      var
+        href: treference;
+      begin
+        secondpass(left);
+        location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
+        //current_asmdata.CurrAsmList.concat(tai_comment.create(strpnew('unaryminus second_float called!')));
+
+        case left.location.loc of
+          LOC_REFERENCE,
+          LOC_CREFERENCE :
+            begin
+              location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
+              href:=left.location.reference;
+              tcg68k(cg).fixref(current_asmdata.CurrAsmList,href);
+              current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_FNEG,tcgsize2opsize[left.location.size],href,location.register));
+            end;
+          LOC_FPUREGISTER:
+            begin
+              location.register:=left.location.register;
+              current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FNEG,S_FX,location.register));
+            end;
+          LOC_CFPUREGISTER:
+            begin
+               location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
+               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FNEG,S_FX,left.location.register,location.register));
+            end;
+          else
+            internalerror(200306021);
+        end;
+      end;
+
+
 {*****************************************************************************
 {*****************************************************************************
                              TM68KSHLRSHRNODE
                              TM68KSHLRSHRNODE
 *****************************************************************************}
 *****************************************************************************}
@@ -207,7 +248,7 @@ implementation
             hreg64lo:=left.location.register64.reglo;
             hreg64lo:=left.location.register64.reglo;
 
 
             shiftval := tordconstnode(right).value.svalue;
             shiftval := tordconstnode(right).value.svalue;
-	    shiftval := shiftval and 63;
+            shiftval := shiftval and 63;
             if shiftval > 31 then
             if shiftval > 31 then
               begin
               begin
                 if nodetype = shln then
                 if nodetype = shln then
@@ -284,5 +325,6 @@ implementation
 begin
 begin
    cnotnode:=tm68knotnode;
    cnotnode:=tm68knotnode;
    cmoddivnode:=tm68kmoddivnode;
    cmoddivnode:=tm68kmoddivnode;
+   cunaryminusnode:=tm68kunaryminusnode;
    cshlshrnode:=tm68kshlshrnode;
    cshlshrnode:=tm68kshlshrnode;
 end.
 end.

+ 3 - 2
compiler/m68k/n68kmem.pas

@@ -27,12 +27,13 @@ interface
 
 
     uses
     uses
       globtype,
       globtype,
+      symtype,
       cgbase,cpuinfo,cpubase,
       cgbase,cpuinfo,cpubase,
       node,nmem,ncgmem;
       node,nmem,ncgmem;
 
 
     type
     type
        t68kvecnode = class(tcgvecnode)
        t68kvecnode = class(tcgvecnode)
-          procedure update_reference_reg_mul(maybe_const_reg:tregister;l:aint);override;
+          procedure update_reference_reg_mul(maybe_const_reg: tregister; regsize: tdef; l: aint); override;
           //procedure pass_generate_code;override;
           //procedure pass_generate_code;override;
        end;
        end;
 
 
@@ -59,7 +60,7 @@ implementation
     { the live range of the LOC_CREGISTER will most likely overlap the   }
     { the live range of the LOC_CREGISTER will most likely overlap the   }
     { the live range of the target LOC_(C)REGISTER)                      }
     { the live range of the target LOC_(C)REGISTER)                      }
     { The passed register may be a LOC_CREGISTER as well.                }
     { The passed register may be a LOC_CREGISTER as well.                }
-    procedure t68kvecnode.update_reference_reg_mul(maybe_const_reg:tregister;l:aint);
+    procedure t68kvecnode.update_reference_reg_mul(maybe_const_reg: tregister; regsize: tdef; l: aint);
       var
       var
         hreg: tregister;
         hreg: tregister;
         scaled: boolean;
         scaled: boolean;

+ 3 - 1
compiler/mips/aoptcpu.pas

@@ -310,7 +310,9 @@ unit aoptcpu;
 
 
   function TCpuAsmOptimizer.TryRemoveMovToRefIndex(var p: tai; next: taicpu): boolean;
   function TCpuAsmOptimizer.TryRemoveMovToRefIndex(var p: tai; next: taicpu): boolean;
     begin
     begin
-      result:=(next.oper[1]^.typ=top_ref) and
+      result:=(next.ops>1) and
+        (next.oper[1]^.typ=top_ref) and
+        (next.oper[1]^.ref^.refaddr<>addr_full) and
         (next.oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
         (next.oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
         (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) and
         (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) and
         Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next)));
         Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next)));

+ 1 - 1
compiler/msg/errord.msg

@@ -3528,7 +3528,7 @@ F*0*_Es werden nur Optionen aufgelistet, die f
 3*2Anasmelf_ELF32 (Linux) Datei mit Hilfe von Nasm
 3*2Anasmelf_ELF32 (Linux) Datei mit Hilfe von Nasm
 3*2Anasmwin32_Win32 Objektdatei mit Hilfe von Nasm
 3*2Anasmwin32_Win32 Objektdatei mit Hilfe von Nasm
 3*2Anasmwdosx_Win32/WDOSX Objektdatei mit Hilfe von Nasm
 3*2Anasmwdosx_Win32/WDOSX Objektdatei mit Hilfe von Nasm
-3*2Anasmdarwin Macho32 Objektdatei mit Hilfe von Nasm (experimentell)
+3*2Anasmdarwin_Macho32 Objektdatei mit Hilfe von Nasm (experimentell)
 3*2Awasm_Obj Datei mit Hilfe von Wasm (Watcom)
 3*2Awasm_Obj Datei mit Hilfe von Wasm (Watcom)
 3*2Anasmobj_Obj Datei mit Hilfe von Nasm
 3*2Anasmobj_Obj Datei mit Hilfe von Nasm
 3*2Amasm_Obj Datei mit Hilfe von Masm (Microsoft)
 3*2Amasm_Obj Datei mit Hilfe von Masm (Microsoft)

+ 1 - 1
compiler/msg/errordu.msg

@@ -3528,7 +3528,7 @@ F*0*_Es werden nur Optionen aufgelistet, die für die voreingestellte oder ausge
 3*2Anasmelf_ELF32 (Linux) Datei mit Hilfe von Nasm
 3*2Anasmelf_ELF32 (Linux) Datei mit Hilfe von Nasm
 3*2Anasmwin32_Win32 Objektdatei mit Hilfe von Nasm
 3*2Anasmwin32_Win32 Objektdatei mit Hilfe von Nasm
 3*2Anasmwdosx_Win32/WDOSX Objektdatei mit Hilfe von Nasm
 3*2Anasmwdosx_Win32/WDOSX Objektdatei mit Hilfe von Nasm
-3*2Anasmdarwin Macho32 Objektdatei mit Hilfe von Nasm (experimentell)
+3*2Anasmdarwin_Macho32 Objektdatei mit Hilfe von Nasm (experimentell)
 3*2Awasm_Obj Datei mit Hilfe von Wasm (Watcom)
 3*2Awasm_Obj Datei mit Hilfe von Wasm (Watcom)
 3*2Anasmobj_Obj Datei mit Hilfe von Nasm
 3*2Anasmobj_Obj Datei mit Hilfe von Nasm
 3*2Amasm_Obj Datei mit Hilfe von Masm (Microsoft)
 3*2Amasm_Obj Datei mit Hilfe von Masm (Microsoft)

+ 11 - 0
compiler/msg/errore.msg

@@ -1531,6 +1531,9 @@ parser_w_ptr_type_ignored=03338_W_Pointer type "$1" ignored
 parser_e_global_generic_references_static=03339_E_Global Generic template references static symtable
 parser_e_global_generic_references_static=03339_E_Global Generic template references static symtable
 % A generic declared in the interface section of a unit must not reference symbols that belong
 % A generic declared in the interface section of a unit must not reference symbols that belong
 % solely to the implementation section of that unit.
 % solely to the implementation section of that unit.
+parser_u_already_compiled=03340_UL_Unit $1 has been already compiled meanwhile.
+% This tells you that the recursive reading of the uses clauses triggered already
+% a compilation of the current unit, so the current compilation can be aborded.
 %
 %
 %
 %
 %
 %
@@ -3458,6 +3461,7 @@ new features, etc.):
 #    4 = x86_64
 #    4 = x86_64
 #    6 = 680x0 targets
 #    6 = 680x0 targets
 #    8 = 8086 (16-bit) targets
 #    8 = 8086 (16-bit) targets
+#    a = AArch64
 #    A = ARM
 #    A = ARM
 #    e = in extended debug mode only
 #    e = in extended debug mode only
 #    F = help for the 'fpc' binary (independent of the target compiler)
 #    F = help for the 'fpc' binary (independent of the target compiler)
@@ -3726,6 +3730,7 @@ F*2P<x>_Set target CPU (arm,avr,i386,jvm,m68k,mips,mipsel,powerpc,powerpc64,spar
 3*2Twince_Windows CE
 3*2Twince_Windows CE
 4*2Tdarwin_Darwin/Mac OS X
 4*2Tdarwin_Darwin/Mac OS X
 4*2Tfreebsd_FreeBSD
 4*2Tfreebsd_FreeBSD
+4*2Tiphonesim_ iPhoneSimulator
 4*2Tlinux_Linux
 4*2Tlinux_Linux
 4*2Tnetbsd_NetBSD
 4*2Tnetbsd_NetBSD
 4*2Topenbsd_OpenBSD
 4*2Topenbsd_OpenBSD
@@ -3736,6 +3741,7 @@ F*2P<x>_Set target CPU (arm,avr,i386,jvm,m68k,mips,mipsel,powerpc,powerpc64,spar
 6*2Tlinux_Linux
 6*2Tlinux_Linux
 6*2Tpalmos_PalmOS
 6*2Tpalmos_PalmOS
 8*2Tmsdos_MS-DOS (and compatible)
 8*2Tmsdos_MS-DOS (and compatible)
+a*2Tdarwin_Darwin/iOS
 A*2Tandroid_Android
 A*2Tandroid_Android
 A*2Tdarwin_Darwin/iPhoneOS/iOS
 A*2Tdarwin_Darwin/iPhoneOS/iOS
 A*2Tembedded_Embedded
 A*2Tembedded_Embedded
@@ -3790,6 +3796,7 @@ A*2WA_Specify native type application (Windows)
 3*2Wb_Create a bundle instead of a library (Darwin)
 3*2Wb_Create a bundle instead of a library (Darwin)
 P*2Wb_Create a bundle instead of a library (Darwin)
 P*2Wb_Create a bundle instead of a library (Darwin)
 p*2Wb_Create a bundle instead of a library (Darwin)
 p*2Wb_Create a bundle instead of a library (Darwin)
+a*2Wb_Create a bundle instead of a library (Darwin)
 A*2Wb_Create a bundle instead of a library (Darwin)
 A*2Wb_Create a bundle instead of a library (Darwin)
 4*2Wb_Create a bundle instead of a library (Darwin)
 4*2Wb_Create a bundle instead of a library (Darwin)
 3*2WB_Create a relocatable image (Windows, Symbian)
 3*2WB_Create a relocatable image (Windows, Symbian)
@@ -3807,6 +3814,7 @@ P*2WC_Specify console type application (Classic Mac OS)
 A*2WD_Use DEFFILE to export functions of DLL or EXE (Windows)
 A*2WD_Use DEFFILE to export functions of DLL or EXE (Windows)
 3*2We_Use external resources (Darwin)
 3*2We_Use external resources (Darwin)
 4*2We_Use external resources (Darwin)
 4*2We_Use external resources (Darwin)
+a*2We_Use external resources (Darwin)
 A*2We_Use external resources (Darwin)
 A*2We_Use external resources (Darwin)
 P*2We_Use external resources (Darwin)
 P*2We_Use external resources (Darwin)
 p*2We_Use external resources (Darwin)
 p*2We_Use external resources (Darwin)
@@ -3817,6 +3825,7 @@ A*2WG_Specify graphic type application (Windows)
 P*2WG_Specify graphic type application (Classic Mac OS)
 P*2WG_Specify graphic type application (Classic Mac OS)
 3*2Wi_Use internal resources (Darwin)
 3*2Wi_Use internal resources (Darwin)
 4*2Wi_Use internal resources (Darwin)
 4*2Wi_Use internal resources (Darwin)
+a*2Wi_Use internal resources (Darwin)
 A*2Wi_Use internal resources (Darwin)
 A*2Wi_Use internal resources (Darwin)
 P*2Wi_Use internal resources (Darwin)
 P*2Wi_Use internal resources (Darwin)
 p*2Wi_Use internal resources (Darwin)
 p*2Wi_Use internal resources (Darwin)
@@ -3840,6 +3849,8 @@ A*2Wpxxxx_Specify the controller type; see fpc -i or fpc -iu for possible values
 m*2Wpxxxx_Specify the controller type; see fpc -i or fpc -iu for possible values
 m*2Wpxxxx_Specify the controller type; see fpc -i or fpc -iu for possible values
 V*2Wpxxxx_Specify the controller type; see fpc -i or fpc -iu for possible values
 V*2Wpxxxx_Specify the controller type; see fpc -i or fpc -iu for possible values
 3*2WP<x>_Minimum iOS deployment version: 3.0, 5.0.1, ... (iphonesim)
 3*2WP<x>_Minimum iOS deployment version: 3.0, 5.0.1, ... (iphonesim)
+4*2WP<x>_Minimum iOS deployment version: 8.0, 8.0.2, ... (iphonesim)
+a*2WP<x>_Minimum iOS deployment version: 7.0, 7.1.2, ... (Darwin)
 A*2WP<x>_Minimum iOS deployment version: 3.0, 5.0.1, ... (Darwin)
 A*2WP<x>_Minimum iOS deployment version: 3.0, 5.0.1, ... (Darwin)
 3*2WR_Generate relocation code (Windows)
 3*2WR_Generate relocation code (Windows)
 4*2WR_Generate relocation code (Windows)
 4*2WR_Generate relocation code (Windows)

+ 3 - 2
compiler/msgidx.inc

@@ -441,6 +441,7 @@ const
   parser_e_default_value_val_const=03337;
   parser_e_default_value_val_const=03337;
   parser_w_ptr_type_ignored=03338;
   parser_w_ptr_type_ignored=03338;
   parser_e_global_generic_references_static=03339;
   parser_e_global_generic_references_static=03339;
+  parser_u_already_compiled=03340;
   type_e_mismatch=04000;
   type_e_mismatch=04000;
   type_e_incompatible_types=04001;
   type_e_incompatible_types=04001;
   type_e_not_equal_types=04002;
   type_e_not_equal_types=04002;
@@ -1006,9 +1007,9 @@ const
   option_info=11024;
   option_info=11024;
   option_help_pages=11025;
   option_help_pages=11025;
 
 
-  MsgTxtSize = 74953;
+  MsgTxtSize = 75323;
 
 
   MsgIdxMax : array[1..20] of longint=(
   MsgIdxMax : array[1..20] of longint=(
-    26,99,340,1000,96,57,126,29,202,64,
+    26,99,341,1000,96,57,126,29,202,64,
     58,20,1,1,1,1,1,1,1,1
     58,20,1,1,1,1,1,1,1,1
   );
   );

ファイルの差分が大きいため隠しています
+ 288 - 287
compiler/msgtxt.inc


+ 8 - 4
compiler/nadd.pas

@@ -2819,10 +2819,6 @@ implementation
 
 
         if try_make_mul32to64 then
         if try_make_mul32to64 then
           begin
           begin
-            { if the code generator can handle 32 to 64-bit muls, we're done here }
-            if not use_generic_mul32to64 then
-              exit;
-
             { this uses the same criteria for signedness as the 32 to 64-bit mul
             { this uses the same criteria for signedness as the 32 to 64-bit mul
               handling in the i386 code generator }
               handling in the i386 code generator }
             if is_signed(left.resultdef) and is_signed(right.resultdef) then
             if is_signed(left.resultdef) and is_signed(right.resultdef) then
@@ -3124,6 +3120,14 @@ implementation
                   internalerror(200103291);
                   internalerror(200103291);
                  expectloc:=LOC_FLAGS;
                  expectloc:=LOC_FLAGS;
                end
                end
+             else if (nodetype=muln) and
+                is_64bitint(resultdef) and
+                not use_generic_mul32to64 and
+                try_make_mul32to64 then
+               begin
+                 { if the code generator can handle 32 to 64-bit muls,
+                   we're done here }
+               end
 {$ifndef cpu64bitalu}
 {$ifndef cpu64bitalu}
               { is there a 64 bit type ? }
               { is there a 64 bit type ? }
              else if (torddef(ld).ordtype in [s64bit,u64bit,scurrency]) then
              else if (torddef(ld).ordtype in [s64bit,u64bit,scurrency]) then

+ 2 - 4
compiler/nbas.pas

@@ -52,8 +52,7 @@ interface
           p_asm : TAsmList;
           p_asm : TAsmList;
           currenttai : tai;
           currenttai : tai;
           { Used registers in assembler block }
           { Used registers in assembler block }
-          used_regs_int,
-          used_regs_fpu : tcpuregisterset;
+          has_registerlist : boolean;
           constructor create(p : TAsmList);virtual;
           constructor create(p : TAsmList);virtual;
           constructor create_get_position;
           constructor create_get_position;
           destructor destroy;override;
           destructor destroy;override;
@@ -642,8 +641,6 @@ implementation
         inherited create(asmn);
         inherited create(asmn);
         p_asm:=p;
         p_asm:=p;
         currenttai:=nil;
         currenttai:=nil;
-        used_regs_int:=[];
-        used_regs_fpu:=[];
       end;
       end;
 
 
 
 
@@ -751,6 +748,7 @@ implementation
           end
           end
         else n.p_asm := nil;
         else n.p_asm := nil;
         n.currenttai:=currenttai;
         n.currenttai:=currenttai;
+        n.has_registerlist:=has_registerlist;
         result:=n;
         result:=n;
       end;
       end;
 
 

ファイルの差分が大きいため隠しています
+ 652 - 420
compiler/ncal.pas


+ 5 - 2
compiler/ncgbas.pas

@@ -255,7 +255,9 @@ interface
            end;
            end;
 
 
          { Allocate registers used in the assembler block }
          { Allocate registers used in the assembler block }
-         cg.alloccpuregisters(current_asmdata.CurrAsmList,R_INTREGISTER,used_regs_int);
+         { has_registerlist=true means that registers are specified and already allocated }
+         if (not has_registerlist) then
+           cg.allocallcpuregisters(current_asmdata.CurrAsmList);
 
 
          if (po_inline in current_procinfo.procdef.procoptions) then
          if (po_inline in current_procinfo.procdef.procoptions) then
            begin
            begin
@@ -344,7 +346,8 @@ interface
            end;
            end;
 
 
          { Release register used in the assembler block }
          { Release register used in the assembler block }
-         cg.dealloccpuregisters(current_asmdata.CurrAsmList,R_INTREGISTER,used_regs_int);
+         if (not has_registerlist) then
+           cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
        end;
        end;
 
 
 
 

+ 2 - 2
compiler/ncgcal.pas

@@ -947,7 +947,7 @@ implementation
                  callref:=can_call_ref(href);
                  callref:=can_call_ref(href);
                  if not callref then
                  if not callref then
                    begin
                    begin
-                     pvreg:=cg.getintregister(current_asmdata.CurrAsmList,proc_addr_size);
+                     pvreg:=hlcg.getaddressregister(current_asmdata.CurrAsmList,proc_addr_voidptrdef);
                      cg.a_load_ref_reg(current_asmdata.CurrAsmList,proc_addr_size,proc_addr_size,href,pvreg);
                      cg.a_load_ref_reg(current_asmdata.CurrAsmList,proc_addr_size,proc_addr_size,href,pvreg);
                    end;
                    end;
 
 
@@ -1026,7 +1026,7 @@ implementation
               secondpass(right);
               secondpass(right);
               callref:=false;
               callref:=false;
 
 
-              pvreg:=cg.getintregister(current_asmdata.CurrAsmList,proc_addr_size);
+              pvreg:=hlcg.getaddressregister(current_asmdata.CurrAsmList,proc_addr_voidptrdef);
               { Only load OS_ADDR from the reference (when converting to hlcg:
               { Only load OS_ADDR from the reference (when converting to hlcg:
                 watch out with procedure of object) }
                 watch out with procedure of object) }
               if po_is_block in procdefinition.procoptions then
               if po_is_block in procdefinition.procoptions then

+ 1 - 1
compiler/ncgcnv.pas

@@ -364,7 +364,7 @@ interface
             {$ifdef cpu_uses_separate_address_registers}
             {$ifdef cpu_uses_separate_address_registers}
               if getregtype(left.location.register)<>R_ADDRESSREGISTER then
               if getregtype(left.location.register)<>R_ADDRESSREGISTER then
                 begin
                 begin
-                  location.reference.base:=rg.getaddressregister(current_asmdata.CurrAsmList);
+                  location.reference.base:=cg.getaddressregister(current_asmdata.CurrAsmList);
                   cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,
                   cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,
                           left.location.register,location.reference.base);
                           left.location.register,location.reference.base);
                 end
                 end

+ 4 - 8
compiler/ncginl.pas

@@ -749,16 +749,12 @@ implementation
       secondpass(left);
       secondpass(left);
 
 
       opsize:=tcgsize2unsigned[left.location.size];
       opsize:=tcgsize2unsigned[left.location.size];
-      if opsize < OS_32 then
-        opsize:=OS_32;
-
-      if (left.location.loc <> LOC_REGISTER) or
-         (left.location.size <> opsize) then
+      if not(left.location.loc in [LOC_REGISTER,LOC_CREGISTER]) then
         hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,cgsize_orddef(opsize),true);
         hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,cgsize_orddef(opsize),true);
 
 
-      location_reset(location,LOC_REGISTER,opsize);
-      location.register := cg.getintregister(current_asmdata.CurrAsmList,opsize);
-      cg.a_bit_scan_reg_reg(current_asmdata.CurrAsmList,reverse,opsize,left.location.register,location.register);
+      location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
+      location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
+      cg.a_bit_scan_reg_reg(current_asmdata.CurrAsmList,reverse,opsize,location.size,left.location.register,location.register);
     end;
     end;
 
 
 
 

+ 11 - 3
compiler/ncgld.pas

@@ -768,9 +768,17 @@ implementation
                            (right.resultdef.typ=floatdef) and
                            (right.resultdef.typ=floatdef) and
                            (left.location.size<>right.location.size) then
                            (left.location.size<>right.location.size) then
                           begin
                           begin
-                            hlcg.a_loadfpu_ref_ref(current_asmdata.CurrAsmList,
-                              right.resultdef,left.resultdef,
-                              right.location.reference,left.location.reference)
+                            { assume that all float types can be handed by the
+                              fpu if one can be handled by the fpu }
+                            if not use_vectorfpu(left.resultdef) or
+                               not use_vectorfpu(right.resultdef) then
+                              hlcg.a_loadfpu_ref_ref(current_asmdata.CurrAsmList,
+                                right.resultdef,left.resultdef,
+                                right.location.reference,left.location.reference)
+                            else
+                              hlcg.a_loadmm_ref_ref(current_asmdata.CurrAsmList,
+                                right.resultdef,left.resultdef,
+                                right.location.reference,left.location.reference,mms_movescalar)
                           end
                           end
                         else
                         else
                           begin
                           begin

+ 48 - 26
compiler/ncgmem.pas

@@ -27,7 +27,8 @@ unit ncgmem;
 interface
 interface
 
 
     uses
     uses
-      globtype,cgbase,cpuinfo,cpubase,
+      globtype,cgbase,cgutils,cpuinfo,cpubase,
+      symtype,
       node,nmem;
       node,nmem;
 
 
     type
     type
@@ -67,10 +68,12 @@ interface
            This routine should update location.reference correctly,
            This routine should update location.reference correctly,
            so it points to the correct address.
            so it points to the correct address.
          }
          }
-         procedure update_reference_reg_mul(maybe_const_reg:tregister;l:aint);virtual;
-         procedure update_reference_reg_packed(maybe_const_reg:tregister;l:aint);virtual;
+         procedure update_reference_reg_mul(maybe_const_reg: tregister;regsize: tdef; l: aint);virtual;
+         procedure update_reference_reg_packed(maybe_const_reg: tregister; regsize: tdef; l: aint);virtual;
+         procedure update_reference_offset(var ref: treference; index, mulsize: aint); virtual;
          procedure second_wideansistring;virtual;
          procedure second_wideansistring;virtual;
          procedure second_dynamicarray;virtual;
          procedure second_dynamicarray;virtual;
+         function valid_index_size(size: tcgsize): boolean;virtual;
        public
        public
          procedure pass_generate_code;override;
          procedure pass_generate_code;override;
        end;
        end;
@@ -81,11 +84,11 @@ implementation
     uses
     uses
       systems,
       systems,
       cutils,cclasses,verbose,globals,constexp,
       cutils,cclasses,verbose,globals,constexp,
-      symconst,symbase,symtype,symdef,symsym,symcpu,symtable,defutil,paramgr,
+      symconst,symbase,symdef,symsym,symcpu,symtable,defutil,paramgr,
       aasmbase,aasmtai,aasmdata,
       aasmbase,aasmtai,aasmdata,
       procinfo,pass_2,parabase,
       procinfo,pass_2,parabase,
       pass_1,nld,ncon,nadd,ncnv,nutils,
       pass_1,nld,ncon,nadd,ncnv,nutils,
-      cgutils,cgobj,hlcgobj,
+      cgobj,hlcgobj,
       tgobj,ncgutil,objcgutl,
       tgobj,ncgutil,objcgutl,
       defcmp
       defcmp
       ;
       ;
@@ -345,7 +348,7 @@ implementation
                       {$ifdef cpu_uses_separate_address_registers}
                       {$ifdef cpu_uses_separate_address_registers}
                         if getregtype(left.location.register)<>R_ADDRESSREGISTER then
                         if getregtype(left.location.register)<>R_ADDRESSREGISTER then
                           begin
                           begin
-                            location.reference.base:=rg.getaddressregister(current_asmdata.CurrAsmList);
+                            location.reference.base:=cg.getaddressregister(current_asmdata.CurrAsmList);
                             hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,left.resultdef,left.resultdef,
                             hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,left.resultdef,left.resultdef,
                               left.location.register,location.reference.base);
                               left.location.register,location.reference.base);
                           end
                           end
@@ -519,8 +522,8 @@ implementation
              }
              }
              asmsym:=current_asmdata.RefAsmSymbol(vs.mangledname);
              asmsym:=current_asmdata.RefAsmSymbol(vs.mangledname);
              reference_reset_symbol(tmpref,asmsym,0,sizeof(pint));
              reference_reset_symbol(tmpref,asmsym,0,sizeof(pint));
-             location.reference.index:=cg.getaddressregister(current_asmdata.CurrAsmList);
-             cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,tmpref,location.reference.index);
+             location.reference.index:=hlcg.getintregister(current_asmdata.CurrAsmList,ptruinttype);
+             hlcg.a_load_ref_reg(current_asmdata.CurrAsmList,ptruinttype,ptruinttype,tmpref,location.reference.index);
              { always packrecords C -> natural alignment }
              { always packrecords C -> natural alignment }
              location.reference.alignment:=vs.vardef.alignment;
              location.reference.alignment:=vs.vardef.alignment;
            end
            end
@@ -613,7 +616,7 @@ implementation
      { the live range of the LOC_CREGISTER will most likely overlap the   }
      { the live range of the LOC_CREGISTER will most likely overlap the   }
      { the live range of the target LOC_(C)REGISTER)                      }
      { the live range of the target LOC_(C)REGISTER)                      }
      { The passed register may be a LOC_CREGISTER as well.                }
      { The passed register may be a LOC_CREGISTER as well.                }
-     procedure tcgvecnode.update_reference_reg_mul(maybe_const_reg:tregister;l:aint);
+     procedure tcgvecnode.update_reference_reg_mul(maybe_const_reg: tregister; regsize: tdef; l: aint);
        var
        var
          hreg: tregister;
          hreg: tregister;
        begin
        begin
@@ -643,7 +646,7 @@ implementation
 
 
 
 
      { see remarks for tcgvecnode.update_reference_reg_mul above }
      { see remarks for tcgvecnode.update_reference_reg_mul above }
-     procedure tcgvecnode.update_reference_reg_packed(maybe_const_reg:tregister;l:aint);
+     procedure tcgvecnode.update_reference_reg_packed(maybe_const_reg: tregister; regsize: tdef; l:aint);
        var
        var
          sref: tsubsetreference;
          sref: tsubsetreference;
          offsetreg, hreg: tregister;
          offsetreg, hreg: tregister;
@@ -661,7 +664,7 @@ implementation
 {$endif not cpu64bitalu}
 {$endif not cpu64bitalu}
              ) then
              ) then
            begin
            begin
-             update_reference_reg_mul(maybe_const_reg,l div 8);
+             update_reference_reg_mul(maybe_const_reg,regsize,l div 8);
              exit;
              exit;
            end;
            end;
          if (l > 8*sizeof(aint)) then
          if (l > 8*sizeof(aint)) then
@@ -672,7 +675,7 @@ implementation
          cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_IMUL,OS_INT,l,hreg);
          cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_IMUL,OS_INT,l,hreg);
          { keep alignment for index }
          { keep alignment for index }
          sref.ref.alignment := left.resultdef.alignment;
          sref.ref.alignment := left.resultdef.alignment;
-         if not ispowerof2(sref.ref.alignment,temp) then
+         if not ispowerof2(packedbitsloadsize(l),temp) then
            internalerror(2006081201);
            internalerror(2006081201);
          alignpower:=temp;
          alignpower:=temp;
          offsetreg := cg.getaddressregister(current_asmdata.CurrAsmList);
          offsetreg := cg.getaddressregister(current_asmdata.CurrAsmList);
@@ -699,6 +702,12 @@ implementation
        end;
        end;
 
 
 
 
+     procedure tcgvecnode.update_reference_offset(var ref: treference; index, mulsize: aint);
+       begin
+         inc(ref.offset,index*mulsize);
+       end;
+
+
      procedure tcgvecnode.second_wideansistring;
      procedure tcgvecnode.second_wideansistring;
        begin
        begin
        end;
        end;
@@ -708,6 +717,13 @@ implementation
        end;
        end;
 
 
 
 
+     function tcgvecnode.valid_index_size(size: tcgsize): boolean;
+       begin
+         result:=
+           tcgsize2signed[size]=tcgsize2signed[OS_ADDR];
+       end;
+
+
      procedure tcgvecnode.rangecheck_array;
      procedure tcgvecnode.rangecheck_array;
        var
        var
          hightree : tnode;
          hightree : tnode;
@@ -853,6 +869,7 @@ implementation
          paraloc2 : tcgpara;
          paraloc2 : tcgpara;
          subsetref : tsubsetreference;
          subsetref : tsubsetreference;
          temp : longint;
          temp : longint;
+         indexdef : tdef;
       begin
       begin
          paraloc1.init;
          paraloc1.init;
          paraloc2.init;
          paraloc2.init;
@@ -910,7 +927,7 @@ implementation
 
 
               { in ansistrings/widestrings S[1] is p<w>char(S)[0] }
               { in ansistrings/widestrings S[1] is p<w>char(S)[0] }
               if not(cs_zerobasedstrings in current_settings.localswitches) then
               if not(cs_zerobasedstrings in current_settings.localswitches) then
-                dec(location.reference.offset,offsetdec);
+                update_reference_offset(location.reference,-1,offsetdec);
            end
            end
          else if is_dynamic_array(left.resultdef) then
          else if is_dynamic_array(left.resultdef) then
            begin
            begin
@@ -963,7 +980,7 @@ implementation
               or is_64bitint(resultdef)
               or is_64bitint(resultdef)
 {$endif not cpu64bitalu}
 {$endif not cpu64bitalu}
               ) then
               ) then
-           dec(location.reference.offset,bytemulsize*tarraydef(left.resultdef).lowrange);
+           update_reference_offset(location.reference,-tarraydef(left.resultdef).lowrange,bytemulsize);
 
 
          if right.nodetype=ordconstn then
          if right.nodetype=ordconstn then
            begin
            begin
@@ -984,10 +1001,10 @@ implementation
                    { only orddefs are bitpacked }
                    { only orddefs are bitpacked }
                    not is_ordinal(resultdef))) then
                    not is_ordinal(resultdef))) then
                 begin
                 begin
-                  extraoffset:=bytemulsize*tordconstnode(right).value.svalue;
-                  inc(location.reference.offset,extraoffset);
-                  { adjust alignment after to this change }
-                  location.reference.alignment:=newalignment(location.reference.alignment,extraoffset);
+                  extraoffset:=tordconstnode(right).value.svalue;
+                  update_reference_offset(location.reference,extraoffset,bytemulsize);
+                  { adjust alignment after this change }
+                  location.reference.alignment:=newalignment(location.reference.alignment,extraoffset*bytemulsize);
                   { don't do this for floats etc.; needed to properly set the }
                   { don't do this for floats etc.; needed to properly set the }
                   { size for bitpacked arrays (e.g. a bitpacked array of      }
                   { size for bitpacked arrays (e.g. a bitpacked array of      }
                   { enums who are size 2 but fit in one byte -> in the array  }
                   { enums who are size 2 but fit in one byte -> in the array  }
@@ -1000,10 +1017,10 @@ implementation
                 begin
                 begin
                   subsetref.ref := location.reference;
                   subsetref.ref := location.reference;
                   subsetref.ref.alignment := left.resultdef.alignment;
                   subsetref.ref.alignment := left.resultdef.alignment;
-                  if not ispowerof2(subsetref.ref.alignment,temp) then
+                  if not ispowerof2(packedbitsloadsize(resultdef.packedbitsize),temp) then
                     internalerror(2006081212);
                     internalerror(2006081212);
                   alignpow:=temp;
                   alignpow:=temp;
-                  inc(subsetref.ref.offset,((mulsize * (tordconstnode(right).value.svalue-tarraydef(left.resultdef).lowrange)) shr (3+alignpow)) shl alignpow);
+                  update_reference_offset(subsetref.ref,(mulsize * (tordconstnode(right).value.svalue-tarraydef(left.resultdef).lowrange)) shr (3+alignpow),1 shl alignpow);
                   subsetref.bitindexreg := NR_NO;
                   subsetref.bitindexreg := NR_NO;
                   subsetref.startbit := (mulsize * (tordconstnode(right).value.svalue-tarraydef(left.resultdef).lowrange)) and ((1 shl (3+alignpow))-1);
                   subsetref.startbit := (mulsize * (tordconstnode(right).value.svalue-tarraydef(left.resultdef).lowrange)) and ((1 shl (3+alignpow))-1);
                   subsetref.bitlen := resultdef.packedbitsize;
                   subsetref.bitlen := resultdef.packedbitsize;
@@ -1048,8 +1065,7 @@ implementation
                             replacenode(rightp^,taddnode(rightp^).left);
                             replacenode(rightp^,taddnode(rightp^).left);
                           end;
                           end;
                      end;
                      end;
-                   inc(location.reference.offset,
-                       mulsize*extraoffset);
+                   update_reference_offset(location.reference,extraoffset,mulsize);
                 end;
                 end;
               { calculate from left to right }
               { calculate from left to right }
               if not(location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then
               if not(location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then
@@ -1067,8 +1083,14 @@ implementation
               secondpass(right);
               secondpass(right);
 
 
               { if mulsize = 1, we won't have to modify the index }
               { if mulsize = 1, we won't have to modify the index }
-              if not(right.location.loc in [LOC_CREGISTER,LOC_REGISTER]) or (right.location.size<>OS_ADDR) then
-                hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,ptruinttype,true);
+              if not(right.location.loc in [LOC_CREGISTER,LOC_REGISTER]) or
+                 not valid_index_size(right.location.size) then
+                begin
+                  hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,ptruinttype,true);
+                  indexdef:=ptruinttype
+                end
+              else
+                indexdef:=right.resultdef;
 
 
               if isjump then
               if isjump then
                begin
                begin
@@ -1090,9 +1112,9 @@ implementation
               { insert the register and the multiplication factor in the
               { insert the register and the multiplication factor in the
                 reference }
                 reference }
               if not is_packed_array(left.resultdef) then
               if not is_packed_array(left.resultdef) then
-                update_reference_reg_mul(right.location.register,mulsize)
+                update_reference_reg_mul(right.location.register,indexdef,mulsize)
               else
               else
-                update_reference_reg_packed(right.location.register,mulsize);
+                update_reference_reg_packed(right.location.register,indexdef,mulsize);
            end;
            end;
 
 
         location.size:=newsize;
         location.size:=newsize;

この差分においてかなりの量のファイルが変更されているため、一部のファイルを表示していません