Browse Source

Merged from trunk

git-svn-id: branches/laksen/armiw@30146 -
Jeppe Johansen 10 years ago
parent
commit
914e9e7b49
100 changed files with 9758 additions and 3318 deletions
  1. 66 2
      .gitattributes
  2. 28 3
      Makefile
  3. 4 1
      Makefile.fpc
  4. 65 10
      compiler/Makefile
  5. 14 10
      compiler/Makefile.fpc
  6. 93 36
      compiler/aarch64/a64att.inc
  7. 57 0
      compiler/aarch64/a64atts.inc
  8. 158 44
      compiler/aarch64/a64ins.dat
  9. 93 36
      compiler/aarch64/a64op.inc
  10. 168 161
      compiler/aarch64/a64reg.dat
  11. 462 1125
      compiler/aarch64/aasmcpu.pas
  12. 288 0
      compiler/aarch64/agcpugas.pas
  13. 25 7
      compiler/aarch64/aoptcpub.pas
  14. 2284 0
      compiler/aarch64/cgcpu.pas
  15. 200 38
      compiler/aarch64/cpubase.pas
  16. 2 2
      compiler/aarch64/cpuinfo.pas
  17. 40 0
      compiler/aarch64/cpunode.pas
  18. 456 520
      compiler/aarch64/cpupara.pas
  19. 68 0
      compiler/aarch64/cpupi.pas
  20. 70 0
      compiler/aarch64/cputarg.pas
  21. 156 0
      compiler/aarch64/hlcgcpu.pas
  22. 402 0
      compiler/aarch64/ncpuadd.pas
  23. 201 0
      compiler/aarch64/ncpucnv.pas
  24. 184 0
      compiler/aarch64/ncpuinl.pas
  25. 196 0
      compiler/aarch64/ncpumat.pas
  26. 142 0
      compiler/aarch64/ncpumem.pas
  27. 175 0
      compiler/aarch64/ncpuset.pas
  28. 5 0
      compiler/aarch64/ra64con.inc
  29. 162 157
      compiler/aarch64/ra64dwa.inc
  30. 1 1
      compiler/aarch64/ra64nor.inc
  31. 6 1
      compiler/aarch64/ra64num.inc
  32. 101 96
      compiler/aarch64/ra64rni.inc
  33. 48 43
      compiler/aarch64/ra64sri.inc
  34. 162 157
      compiler/aarch64/ra64sta.inc
  35. 6 1
      compiler/aarch64/ra64std.inc
  36. 5 0
      compiler/aarch64/ra64sup.inc
  37. 88 0
      compiler/aarch64/racpu.pas
  38. 1053 0
      compiler/aarch64/racpugas.pas
  39. 171 0
      compiler/aarch64/rgcpu.pas
  40. 21 4
      compiler/aasmtai.pas
  41. 4 1
      compiler/aggas.pas
  42. 19 5
      compiler/aoptobj.pas
  43. 45 12
      compiler/arm/aasmcpu.pas
  44. 1 2
      compiler/arm/agarmgas.pas
  45. 5 3
      compiler/arm/cgcpu.pas
  46. 6 0
      compiler/arm/cpuinfo.pas
  47. 4 3
      compiler/arm/narmmem.pas
  48. 1 1
      compiler/arm/raarmgas.pas
  49. 2 1
      compiler/assemble.pas
  50. 13 0
      compiler/cgbase.pas
  51. 2 2
      compiler/cghlcpu.pas
  52. 19 8
      compiler/cgobj.pas
  53. 6 0
      compiler/cgutils.pas
  54. 1 1
      compiler/finput.pas
  55. 1 0
      compiler/fpcdefs.inc
  56. 3 1
      compiler/fppu.pas
  57. 2 2
      compiler/globals.pas
  58. 3 3
      compiler/hlcg2ll.pas
  59. 19 7
      compiler/hlcgobj.pas
  60. 9 8
      compiler/i386/cpuinfo.pas
  61. 8 0
      compiler/i386/i386att.inc
  62. 8 0
      compiler/i386/i386atts.inc
  63. 8 0
      compiler/i386/i386int.inc
  64. 1 1
      compiler/i386/i386nop.inc
  65. 8 0
      compiler/i386/i386op.inc
  66. 9 1
      compiler/i386/i386prop.inc
  67. 112 0
      compiler/i386/i386tab.inc
  68. 1 1
      compiler/i386/popt386.pas
  69. 8 0
      compiler/i8086/i8086att.inc
  70. 8 0
      compiler/i8086/i8086atts.inc
  71. 8 0
      compiler/i8086/i8086int.inc
  72. 1 1
      compiler/i8086/i8086nop.inc
  73. 8 0
      compiler/i8086/i8086op.inc
  74. 9 1
      compiler/i8086/i8086prop.inc
  75. 112 0
      compiler/i8086/i8086tab.inc
  76. 5 4
      compiler/i8086/n8086mem.pas
  77. 2 2
      compiler/jvm/hlcgcpu.pas
  78. 2 2
      compiler/link.pas
  79. 10 1
      compiler/m68k/aasmcpu.pas
  80. 4 4
      compiler/m68k/cgcpu.pas
  81. 1 2
      compiler/m68k/cpunode.pas
  82. 36 18
      compiler/m68k/n68kadd.pas
  83. 212 0
      compiler/m68k/n68kinl.pas
  84. 43 1
      compiler/m68k/n68kmat.pas
  85. 3 2
      compiler/m68k/n68kmem.pas
  86. 3 1
      compiler/mips/aoptcpu.pas
  87. 1 1
      compiler/msg/errord.msg
  88. 1 1
      compiler/msg/errordu.msg
  89. 11 0
      compiler/msg/errore.msg
  90. 3 2
      compiler/msgidx.inc
  91. 288 287
      compiler/msgtxt.inc
  92. 8 4
      compiler/nadd.pas
  93. 2 4
      compiler/nbas.pas
  94. 652 420
      compiler/ncal.pas
  95. 5 2
      compiler/ncgbas.pas
  96. 2 2
      compiler/ncgcal.pas
  97. 1 1
      compiler/ncgcnv.pas
  98. 4 8
      compiler/ncginl.pas
  99. 11 3
      compiler/ncgld.pas
  100. 48 26
      compiler/ncgmem.pas

+ 66 - 2
.gitattributes

@@ -14,13 +14,25 @@ compiler/aarch64/a64op.inc svneol=native#text/plain
 compiler/aarch64/a64reg.dat svneol=native#text/plain
 compiler/aarch64/a64tab.inc svneol=native#text/plain
 compiler/aarch64/aasmcpu.pas svneol=native#text/plain
+compiler/aarch64/agcpugas.pas svneol=native#text/plain
 compiler/aarch64/aoptcpu.pas svneol=native#text/plain
 compiler/aarch64/aoptcpub.pas svneol=native#text/plain
 compiler/aarch64/aoptcpud.pas svneol=native#text/plain
+compiler/aarch64/cgcpu.pas svneol=native#text/plain
 compiler/aarch64/cpubase.pas svneol=native#text/plain
 compiler/aarch64/cpuinfo.pas svneol=native#text/plain
+compiler/aarch64/cpunode.pas svneol=native#text/plain
 compiler/aarch64/cpupara.pas svneol=native#text/plain
+compiler/aarch64/cpupi.pas svneol=native#text/plain
+compiler/aarch64/cputarg.pas svneol=native#text/plain
+compiler/aarch64/hlcgcpu.pas svneol=native#text/plain
 compiler/aarch64/itcpugas.pas svneol=native#text/plain
+compiler/aarch64/ncpuadd.pas svneol=native#text/plain
+compiler/aarch64/ncpucnv.pas svneol=native#text/plain
+compiler/aarch64/ncpuinl.pas svneol=native#text/plain
+compiler/aarch64/ncpumat.pas svneol=native#text/plain
+compiler/aarch64/ncpumem.pas svneol=native#text/plain
+compiler/aarch64/ncpuset.pas svneol=native#text/plain
 compiler/aarch64/ra64con.inc svneol=native#text/plain
 compiler/aarch64/ra64dwa.inc svneol=native#text/plain
 compiler/aarch64/ra64nor.inc svneol=native#text/plain
@@ -30,6 +42,9 @@ compiler/aarch64/ra64sri.inc svneol=native#text/plain
 compiler/aarch64/ra64sta.inc svneol=native#text/plain
 compiler/aarch64/ra64std.inc svneol=native#text/plain
 compiler/aarch64/ra64sup.inc svneol=native#text/plain
+compiler/aarch64/racpu.pas svneol=native#text/plain
+compiler/aarch64/racpugas.pas svneol=native#text/plain
+compiler/aarch64/rgcpu.pas svneol=native#text/plain
 compiler/aarch64/symcpu.pas svneol=native#text/plain
 compiler/aasmbase.pas svneol=native#text/plain
 compiler/aasmdata.pas svneol=native#text/plain
@@ -349,6 +364,7 @@ compiler/m68k/m68kreg.dat svneol=native#text/plain
 compiler/m68k/n68kadd.pas svneol=native#text/plain
 compiler/m68k/n68kcal.pas svneol=native#text/plain
 compiler/m68k/n68kcnv.pas svneol=native#text/plain
+compiler/m68k/n68kinl.pas svneol=native#text/plain
 compiler/m68k/n68kmat.pas svneol=native#text/plain
 compiler/m68k/n68kmem.pas svneol=native#text/plain
 compiler/m68k/r68kcon.inc svneol=native#text/plain
@@ -915,6 +931,10 @@ ide/fputils.pas svneol=native#text/plain
 ide/fpvars.pas svneol=native#text/plain
 ide/fpviews.pas svneol=native#text/plain
 ide/fpw32.rc -text
+ide/gdbmicon.pas svneol=native#text/plain
+ide/gdbmiint.pas svneol=native#text/plain
+ide/gdbmiproc.pas svneol=native#text/plain
+ide/gdbmiwrap.pas svneol=native#text/plain
 ide/globdir.inc svneol=native#text/plain
 ide/gplprog.pt -text
 ide/gplunit.pt -text
@@ -4453,6 +4473,7 @@ packages/iosxlocale/Makefile.fpc svneol=native#text/plain
 packages/iosxlocale/Makefile.fpc.fpcmake svneol=native#text/plain
 packages/iosxlocale/fpmake.pp svneol=native#text/plain
 packages/iosxlocale/src/iosxlocale.pp svneol=native#text/plain
+packages/iosxlocale/src/iosxwstr.pp svneol=native#text/plain
 packages/jni/Makefile svneol=native#text/plain
 packages/jni/Makefile.fpc svneol=native#text/plain
 packages/jni/fpmake.pp svneol=native#text/plain
@@ -4835,6 +4856,9 @@ packages/libndsfpc/examples/audio/maxmod/song_events_example2/Makefile svneol=na
 packages/libndsfpc/examples/audio/maxmod/song_events_example2/Makefile.fpc svneol=native#text/plain
 packages/libndsfpc/examples/audio/maxmod/song_events_example2/audio/example2.it -text
 packages/libndsfpc/examples/audio/maxmod/song_events_example2/song_events_example2.pp svneol=native#text/plain
+packages/libndsfpc/examples/audio/maxmod/streaming/Makefile svneol=native#text/plain
+packages/libndsfpc/examples/audio/maxmod/streaming/Makefile.fpc svneol=native#text/plain
+packages/libndsfpc/examples/audio/maxmod/streaming/streaming.pp svneol=native#text/plain
 packages/libndsfpc/examples/audio/micrecord/Makefile svneol=native#text/plain
 packages/libndsfpc/examples/audio/micrecord/Makefile.fpc svneol=native#text/plain
 packages/libndsfpc/examples/audio/micrecord/micrecord.pp svneol=native#text/plain
@@ -5131,8 +5155,6 @@ packages/libndsfpc/examples/graphics/Ext_Palettes/backgrounds/gfx/devkitlogo.gri
 packages/libndsfpc/examples/graphics/Ext_Palettes/backgrounds/gfx/devkitlogo.png -text
 packages/libndsfpc/examples/graphics/Ext_Palettes/backgrounds/gfx/drunkenlogo.grit svneol=native#text/plain
 packages/libndsfpc/examples/graphics/Ext_Palettes/backgrounds/gfx/drunkenlogo.png -text
-packages/libndsfpc/examples/graphics/Ext_Palettes/backgrounds/inc/devkitlogo.inc svneol=native#text/plain
-packages/libndsfpc/examples/graphics/Ext_Palettes/backgrounds/inc/drunkenlogo.inc svneol=native#text/plain
 packages/libndsfpc/examples/graphics/Makefile svneol=native#text/plain
 packages/libndsfpc/examples/graphics/Makefile.fpc svneol=native#text/plain
 packages/libndsfpc/examples/graphics/Printing/Makefile svneol=native#text/plain
@@ -5435,6 +5457,7 @@ packages/libogcfpc/src/ogc/ios.inc svneol=native#text/plain
 packages/libogcfpc/src/ogc/ipc.inc svneol=native#text/plain
 packages/libogcfpc/src/ogc/irq.inc svneol=native#text/plain
 packages/libogcfpc/src/ogc/isfs.inc svneol=native#text/plain
+packages/libogcfpc/src/ogc/libversion.inc svneol=native#text/plain
 packages/libogcfpc/src/ogc/lwp.inc svneol=native#text/plain
 packages/libogcfpc/src/ogc/lwp_config.inc svneol=native#text/plain
 packages/libogcfpc/src/ogc/lwp_heap.inc svneol=native#text/plain
@@ -7855,6 +7878,16 @@ rtl/COPYING.txt svneol=native#text/plain
 rtl/Makefile svneol=native#text/plain
 rtl/Makefile.fpc svneol=native#text/plain
 rtl/README.txt svneol=native#text/plain
+rtl/aarch64/aarch64.inc svneol=native#text/plain
+rtl/aarch64/int64p.inc svneol=native#text/plain
+rtl/aarch64/makefile.cpu svneol=native#text/plain
+rtl/aarch64/math.inc svneol=native#text/plain
+rtl/aarch64/mathu.inc svneol=native#text/plain
+rtl/aarch64/set.inc svneol=native#text/plain
+rtl/aarch64/setjump.inc svneol=native#text/plain
+rtl/aarch64/setjumph.inc svneol=native#text/plain
+rtl/aarch64/strings.inc svneol=native#text/plain
+rtl/aarch64/stringss.inc svneol=native#text/plain
 rtl/aix/Makefile svneol=native#text/plain
 rtl/aix/Makefile.fpc svneol=native#text/plain
 rtl/aix/dlaix.inc svneol=native#text/plain
@@ -8054,6 +8087,8 @@ rtl/charmaps/cp8859_5.pas svneol=native#text/pascal
 rtl/charmaps/cpall.pas svneol=native#text/pascal
 rtl/darwin/Makefile svneol=native#text/plain
 rtl/darwin/Makefile.fpc svneol=native#text/plain
+rtl/darwin/aarch64/sig_cpu.inc svneol=native#text/plain
+rtl/darwin/aarch64/sighnd.inc svneol=native#text/plain
 rtl/darwin/arm/sig_cpu.inc svneol=native#text/plain
 rtl/darwin/arm/sighnd.inc svneol=native#text/plain
 rtl/darwin/console.pp svneol=native#text/plain
@@ -8113,6 +8148,7 @@ rtl/dragonfly/x86_64/sighnd.inc svneol=native#text/plain
 rtl/dragonfly/x86_64/signal.inc svneol=native#text/plain
 rtl/embedded/Makefile svneol=native#text/plain
 rtl/embedded/Makefile.fpc svneol=native#text/plain
+rtl/embedded/arm/allwinner_a20.pp svneol=native#text/plain
 rtl/embedded/arm/at91sam7x256.pp svneol=native#text/plain
 rtl/embedded/arm/cortexm0.pp svneol=native#text/pascal
 rtl/embedded/arm/cortexm0_start.inc svneol=native#text/plain
@@ -8229,6 +8265,7 @@ rtl/gba/gbabiosh.inc svneol=native#text/plain
 rtl/gba/libc.inc svneol=native#text/plain
 rtl/gba/libch.inc svneol=native#text/plain
 rtl/gba/prt0.as svneol=native#text/plain
+rtl/gba/rtl.cfg svneol=native#text/plain
 rtl/gba/rtldefs.inc svneol=native#text/plain
 rtl/gba/sysdir.inc svneol=native#text/plain
 rtl/gba/sysfile.inc svneol=native#text/plain
@@ -8758,6 +8795,7 @@ rtl/nds/ndsbiosh.inc svneol=native#text/plain
 rtl/nds/ndsh.inc svneol=native#text/plain
 rtl/nds/prt07.as svneol=native#text/plain
 rtl/nds/prt09.as svneol=native#text/plain
+rtl/nds/rtl.cfg svneol=native#text/plain
 rtl/nds/rtldefs.inc svneol=native#text/plain
 rtl/nds/sysdir.inc svneol=native#text/plain
 rtl/nds/sysfile.inc svneol=native#text/plain
@@ -9294,6 +9332,7 @@ rtl/wii/classes.pp svneol=native#text/plain
 rtl/wii/dos.pp svneol=native#text/plain
 rtl/wii/libc.inc svneol=native#text/plain
 rtl/wii/libch.inc svneol=native#text/plain
+rtl/wii/rtl.cfg svneol=native#text/plain
 rtl/wii/rtldefs.inc svneol=native#text/plain
 rtl/wii/sysdir.inc svneol=native#text/plain
 rtl/wii/sysfile.inc svneol=native#text/plain
@@ -10219,6 +10258,7 @@ tests/tbs/tb0466.pp svneol=native#text/plain
 tests/tbs/tb0467.pp svneol=native#text/plain
 tests/tbs/tb0468.pp svneol=native#text/plain
 tests/tbs/tb0468a.pas svneol=native#text/plain
+tests/tbs/tb0468a.pp svneol=native#text/plain
 tests/tbs/tb0469.pp svneol=native#text/plain
 tests/tbs/tb0470.pp svneol=native#text/plain
 tests/tbs/tb0471.pp svneol=native#text/plain
@@ -10326,6 +10366,7 @@ tests/tbs/tb0568.pp svneol=native#text/plain
 tests/tbs/tb0569.pp svneol=native#text/pascal
 tests/tbs/tb0570.pp svneol=native#text/plain
 tests/tbs/tb0571.pas svneol=native#text/plain
+tests/tbs/tb0571.pp svneol=native#text/plain
 tests/tbs/tb0572.pp svneol=native#text/plain
 tests/tbs/tb0573.pp svneol=native#text/plain
 tests/tbs/tb0574.pp svneol=native#text/pascal
@@ -10367,6 +10408,7 @@ tests/tbs/tb0607.pp svneol=native#text/plain
 tests/tbs/tb0608.pp svneol=native#text/pascal
 tests/tbs/tb0609.pp svneol=native#text/plain
 tests/tbs/tb205.pp svneol=native#text/plain
+tests/tbs/tb610.pp svneol=native#text/pascal
 tests/tbs/tbs0594.pp svneol=native#text/pascal
 tests/tbs/ub0060.pp svneol=native#text/plain
 tests/tbs/ub0069.pp svneol=native#text/plain
@@ -10474,6 +10516,13 @@ tests/test/cg/obj/beos/i386/tcext4.o -text
 tests/test/cg/obj/beos/i386/tcext5.o -text
 tests/test/cg/obj/cpptcl1.cpp svneol=native#text/plain
 tests/test/cg/obj/cpptcl2.cpp svneol=native#text/plain
+tests/test/cg/obj/darwin/aarch64/cpptcl1.o -text
+tests/test/cg/obj/darwin/aarch64/cpptcl2.o -text
+tests/test/cg/obj/darwin/aarch64/ctest.o -text
+tests/test/cg/obj/darwin/aarch64/tcext3.o -text
+tests/test/cg/obj/darwin/aarch64/tcext4.o -text
+tests/test/cg/obj/darwin/aarch64/tcext5.o -text
+tests/test/cg/obj/darwin/aarch64/tcext6.o -text
 tests/test/cg/obj/darwin/arm/cpptcl1.o -text
 tests/test/cg/obj/darwin/arm/cpptcl2.o -text
 tests/test/cg/obj/darwin/arm/ctest.o -text
@@ -10769,6 +10818,7 @@ tests/test/cg/tcnvint3.pp svneol=native#text/plain
 tests/test/cg/tcnvint4.pp svneol=native#text/plain
 tests/test/cg/tcnvint5.pp svneol=native#text/plain
 tests/test/cg/tcnvint6.pp svneol=native#text/plain
+tests/test/cg/tcnvint7.pp svneol=native#text/plain
 tests/test/cg/tcnvptr.pp svneol=native#text/plain
 tests/test/cg/tcnvset.pp svneol=native#text/plain
 tests/test/cg/tcnvstr1.pp svneol=native#text/plain
@@ -11648,6 +11698,8 @@ tests/test/tgenconstraint8.pp svneol=native#text/pascal
 tests/test/tgenconstraint9.pp svneol=native#text/pascal
 tests/test/tgeneric1.pp svneol=native#text/plain
 tests/test/tgeneric10.pp svneol=native#text/plain
+tests/test/tgeneric100.pp svneol=native#text/pascal
+tests/test/tgeneric101.pp svneol=native#text/pascal
 tests/test/tgeneric11.pp svneol=native#text/plain
 tests/test/tgeneric12.pp svneol=native#text/plain
 tests/test/tgeneric13.pp svneol=native#text/plain
@@ -11744,6 +11796,7 @@ tests/test/tgeneric95.pp svneol=native#text/pascal
 tests/test/tgeneric96.pp svneol=native#text/pascal
 tests/test/tgeneric97.pp svneol=native#text/pascal
 tests/test/tgeneric98.pp svneol=native#text/pascal
+tests/test/tgeneric99.pp svneol=native#text/pascal
 tests/test/tgoto.pp svneol=native#text/plain
 tests/test/theap.pp svneol=native#text/plain
 tests/test/theapthread.pp svneol=native#text/plain
@@ -11904,6 +11957,7 @@ tests/test/tmsg3.pp svneol=native#text/plain
 tests/test/tmsg4.pp svneol=native#text/plain
 tests/test/tmt1.pp svneol=native#text/plain
 tests/test/tmul1.pp svneol=native#text/pascal
+tests/test/tnest1.pp svneol=native#text/plain
 tests/test/tnoext1.pp svneol=native#text/plain
 tests/test/tnoext2.pp svneol=native#text/plain
 tests/test/tnoext3.pp svneol=native#text/plain
@@ -12093,6 +12147,7 @@ tests/test/tover1.pp svneol=native#text/plain
 tests/test/tover2.pp svneol=native#text/plain
 tests/test/tover3.pp svneol=native#text/plain
 tests/test/tover4.pas svneol=native#text/plain
+tests/test/tover4.pp svneol=native#text/plain
 tests/test/tpackrec.pp svneol=native#text/plain
 tests/test/tparray1.pp svneol=native#text/plain
 tests/test/tparray10.pp svneol=native#text/plain
@@ -12342,6 +12397,7 @@ tests/test/tunit3.pp svneol=native#text/plain
 tests/test/tunroll1.pp svneol=native#text/plain
 tests/test/tutf81.pp svneol=native#text/plain
 tests/test/tutf82.pp svneol=native#text/plain
+tests/test/tutf8cpl.pp svneol=native#text/plain
 tests/test/tvarpropsetter1.pp svneol=native#text/plain
 tests/test/tvarpropsetter2.pp svneol=native#text/plain
 tests/test/tvarset1.pp svneol=native#text/plain
@@ -12391,6 +12447,7 @@ tests/test/ugeneric96a.pp svneol=native#text/pascal
 tests/test/ugeneric96b.pp svneol=native#text/pascal
 tests/test/ugeneric96c.pp svneol=native#text/pascal
 tests/test/ugeneric96d.pp svneol=native#text/pascal
+tests/test/ugeneric99.pp svneol=native#text/pascal
 tests/test/uhintdir.pp svneol=native#text/plain
 tests/test/uhlp3.pp svneol=native#text/pascal
 tests/test/uhlp31.pp svneol=native#text/pascal
@@ -12605,6 +12662,7 @@ tests/test/units/system/tslice2.pp svneol=native#text/plain
 tests/test/units/system/tstr1.pp svneol=native#text/pascal
 tests/test/units/system/tstring.pp svneol=native#text/plain
 tests/test/units/system/ttrig.pas svneol=native#text/plain
+tests/test/units/system/ttrig.pp svneol=native#text/plain
 tests/test/units/system/ttrunc.pp svneol=native#text/plain
 tests/test/units/system/tval.inc svneol=native#text/plain
 tests/test/units/system/tval.pp svneol=native#text/plain
@@ -12888,6 +12946,7 @@ tests/webtbf/tw2128.pp svneol=native#text/plain
 tests/webtbf/tw2129.pp svneol=native#text/plain
 tests/webtbf/tw21363.pp svneol=native#text/pascal
 tests/webtbf/tw21466.pas svneol=native#text/pascal
+tests/webtbf/tw21466.pp svneol=native#text/pascal
 tests/webtbf/tw2154.pp svneol=native#text/plain
 tests/webtbf/tw21566.pp svneol=native#text/pascal
 tests/webtbf/tw2174.pp svneol=native#text/plain
@@ -13092,6 +13151,7 @@ tests/webtbf/tw8588.pp svneol=native#text/plain
 tests/webtbf/tw8591.pp svneol=native#text/plain
 tests/webtbf/tw8717.pp svneol=native#text/plain
 tests/webtbf/tw8738.pas svneol=native#text/plain
+tests/webtbf/tw8738.pp svneol=native#text/plain
 tests/webtbf/tw8777a.pp svneol=native#text/plain
 tests/webtbf/tw8777b.pp svneol=native#text/plain
 tests/webtbf/tw8777c.pp svneol=native#text/plain
@@ -13513,6 +13573,7 @@ tests/webtbs/tw1430.pp svneol=native#text/plain
 tests/webtbs/tw14307.pp svneol=native#text/plain
 tests/webtbs/tw14315.pp svneol=native#text/plain
 tests/webtbs/tw1433.pp svneol=native#text/plain
+tests/webtbs/tw14347.pp svneol=native#text/pascal
 tests/webtbs/tw14363.pp svneol=native#text/plain
 tests/webtbs/tw14388.pp svneol=native#text/pascal
 tests/webtbs/tw14403.pp svneol=native#text/plain
@@ -13712,6 +13773,7 @@ tests/webtbs/tw17838.pp svneol=native#text/pascal
 tests/webtbs/tw17846.pp svneol=native#text/plain
 tests/webtbs/tw17862.pp svneol=native#text/plain
 tests/webtbs/tw17904.pas svneol=native#text/plain
+tests/webtbs/tw17904.pp svneol=native#text/plain
 tests/webtbs/tw17907/main/main.pas svneol=native#text/plain
 tests/webtbs/tw17907/test.bat svneol=native#text/plain
 tests/webtbs/tw17907/unit1/unit0001.pas svneol=native#text/plain
@@ -13817,6 +13879,7 @@ tests/webtbs/tw19651.pp svneol=native#text/plain
 tests/webtbs/tw19697.pp svneol=native#text/pascal
 tests/webtbs/tw19700.pp svneol=native#text/plain
 tests/webtbs/tw19701.pas svneol=native#text/plain
+tests/webtbs/tw19701.pp svneol=native#text/plain
 tests/webtbs/tw19851a.pp svneol=native#text/pascal
 tests/webtbs/tw19851b.pp svneol=native#text/pascal
 tests/webtbs/tw19864.pp svneol=native#text/pascal
@@ -14253,6 +14316,7 @@ tests/webtbs/tw2737.pp svneol=native#text/plain
 tests/webtbs/tw2738.pp svneol=native#text/plain
 tests/webtbs/tw2739.pp svneol=native#text/plain
 tests/webtbs/tw27424.pp svneol=native#text/pascal
+tests/webtbs/tw27515.pp svneol=native#text/pascal
 tests/webtbs/tw2758.pp svneol=native#text/plain
 tests/webtbs/tw2763.pp svneol=native#text/plain
 tests/webtbs/tw2765.pp svneol=native#text/plain

+ 28 - 3
Makefile

@@ -1,8 +1,8 @@
 #
-# Don't edit, this file is generated by FPCMake Version 2.0.0 [2015-01-04 rev 29399]
+# Don't edit, this file is generated by FPCMake Version 2.0.0 [2015-02-23 rev 29972]
 #
 default: help
-MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim i386-android i386-aros m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii powerpc-aix sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-netbsd x86_64-solaris x86_64-openbsd x86_64-darwin x86_64-win64 x86_64-embedded x86_64-dragonfly arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian arm-android powerpc64-linux powerpc64-darwin powerpc64-embedded powerpc64-aix avr-embedded armeb-linux armeb-embedded mips-linux mipsel-linux mipsel-embedded mipsel-android jvm-java jvm-android i8086-msdos
+MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim i386-android i386-aros m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii powerpc-aix sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-netbsd x86_64-solaris x86_64-openbsd x86_64-darwin x86_64-win64 x86_64-embedded x86_64-iphonesim x86_64-dragonfly arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian arm-android powerpc64-linux powerpc64-darwin powerpc64-embedded powerpc64-aix avr-embedded armeb-linux armeb-embedded mips-linux mipsel-linux mipsel-embedded mipsel-android jvm-java jvm-android i8086-msdos aarch64-darwin
 BSDs = freebsd netbsd openbsd darwin dragonfly
 UNIXs = linux $(BSDs) solaris qnx haiku aix
 LIMIT83fs = go32v2 os2 emx watcom msdos
@@ -379,6 +379,9 @@ endif
 ifeq ($(CPU_TARGET),avr)
 PPSUF=avr
 endif
+ifeq ($(CPU_TARGET),aarch64)
+PPSUF=a64
+endif
 ifdef CROSSCOMPILE
 ifneq ($(CPU_TARGET),jvm)
 PPPRE=ppcross
@@ -467,7 +470,7 @@ endif
 endif
 BuildOnlyBaseCPUs=jvm
 ifneq ($(wildcard utils),)
-NOUTILSTARGETS=embedded gba msdos $(BuildOnlyBaseCPUs)
+NOUTILSTARGETS=embedded gba nds msdos $(BuildOnlyBaseCPUs)
 ifeq ($(findstring $(OS_TARGET),$(NOUTILSTARGETS)),)
 ifdef BUILDFULLNATIVE
 UTILS=1
@@ -633,6 +636,9 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 override TARGET_DIRS+=compiler rtl utils packages ide installer
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+override TARGET_DIRS+=compiler rtl utils packages ide installer
+endif
 ifeq ($(FULL_TARGET),x86_64-dragonfly)
 override TARGET_DIRS+=compiler rtl utils packages ide installer
 endif
@@ -705,6 +711,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 override TARGET_DIRS+=compiler rtl utils packages ide installer
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+override TARGET_DIRS+=compiler rtl utils packages ide installer
+endif
 override INSTALL_FPCPACKAGE=y
 ifdef REQUIRE_UNITSDIR
 override UNITSDIR+=$(REQUIRE_UNITSDIR)
@@ -2280,6 +2289,14 @@ TARGET_DIRS_PACKAGES=1
 TARGET_DIRS_IDE=1
 TARGET_DIRS_INSTALLER=1
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_IDE=1
+TARGET_DIRS_INSTALLER=1
+endif
 ifeq ($(FULL_TARGET),x86_64-dragonfly)
 TARGET_DIRS_COMPILER=1
 TARGET_DIRS_RTL=1
@@ -2472,6 +2489,14 @@ TARGET_DIRS_PACKAGES=1
 TARGET_DIRS_IDE=1
 TARGET_DIRS_INSTALLER=1
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+TARGET_DIRS_COMPILER=1
+TARGET_DIRS_RTL=1
+TARGET_DIRS_UTILS=1
+TARGET_DIRS_PACKAGES=1
+TARGET_DIRS_IDE=1
+TARGET_DIRS_INSTALLER=1
+endif
 ifdef TARGET_DIRS_COMPILER
 compiler_all:
 	$(MAKE) -C compiler all

+ 4 - 1
Makefile.fpc

@@ -79,6 +79,9 @@ endif
 ifeq ($(CPU_TARGET),avr)
 PPSUF=avr
 endif
+ifeq ($(CPU_TARGET),aarch64)
+PPSUF=a64
+endif
 
 # cross compilers uses full cpu_target, not just ppc-suffix
 # (except if the target cannot run a native compiler)
@@ -204,7 +207,7 @@ endif
 BuildOnlyBaseCPUs=jvm
 
 ifneq ($(wildcard utils),)
-NOUTILSTARGETS=embedded gba msdos $(BuildOnlyBaseCPUs)
+NOUTILSTARGETS=embedded gba nds msdos $(BuildOnlyBaseCPUs)
 ifeq ($(findstring $(OS_TARGET),$(NOUTILSTARGETS)),)
 ifdef BUILDFULLNATIVE
 UTILS=1

+ 65 - 10
compiler/Makefile

@@ -1,8 +1,8 @@
 #
-# Don't edit, this file is generated by FPCMake Version 2.0.0 [2014-12-07 rev 29213]
+# Don't edit, this file is generated by FPCMake Version 2.0.0 [2015-02-23 rev 29972]
 #
 default: all
-MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim i386-android i386-aros m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii powerpc-aix sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-netbsd x86_64-solaris x86_64-openbsd x86_64-darwin x86_64-win64 x86_64-embedded x86_64-dragonfly arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian arm-android powerpc64-linux powerpc64-darwin powerpc64-embedded powerpc64-aix avr-embedded armeb-linux armeb-embedded mips-linux mipsel-linux mipsel-embedded mipsel-android jvm-java jvm-android i8086-msdos
+MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim i386-android i386-aros m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii powerpc-aix sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-netbsd x86_64-solaris x86_64-openbsd x86_64-darwin x86_64-win64 x86_64-embedded x86_64-iphonesim x86_64-dragonfly arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian arm-android powerpc64-linux powerpc64-darwin powerpc64-embedded powerpc64-aix avr-embedded armeb-linux armeb-embedded mips-linux mipsel-linux mipsel-embedded mipsel-android jvm-java jvm-android i8086-msdos aarch64-darwin
 BSDs = freebsd netbsd openbsd darwin dragonfly
 UNIXs = linux $(BSDs) solaris qnx haiku aix
 LIMIT83fs = go32v2 os2 emx watcom msdos
@@ -328,7 +328,7 @@ endif
 override PACKAGE_NAME=compiler
 override PACKAGE_VERSION=3.1.1
 unexport FPC_VERSION FPC_COMPILERINFO
-CYCLETARGETS=i386 powerpc sparc arm x86_64 powerpc64 m68k armeb mipsel mips avr jvm i8086
+CYCLETARGETS=i386 powerpc sparc arm x86_64 powerpc64 m68k armeb mipsel mips avr jvm i8086 aarch64
 ALLTARGETS=$(CYCLETARGETS)
 ifdef ALPHA
 PPC_TARGET=alpha
@@ -372,6 +372,9 @@ endif
 ifdef I8086
 PPC_TARGET=i8086
 endif
+ifdef AARCH64
+PPC_TARGET=aarch64
+endif
 ifndef PPC_TARGET
 PPC_TARGET=$(CPU_TARGET)
 endif
@@ -469,6 +472,9 @@ endif
 ifeq ($(CPC_TARGET),i8086)
 CPUSUF=8086
 endif
+ifeq ($(CPC_TARGET),aarch64)
+CPUSUF=a64
+endif
 NOCPUDEF=1
 MSGFILE=msg/error$(FPCLANG).msg
 SVNVERSION:=$(firstword $(wildcard $(addsuffix /svnversion$(SRCEXEEXT),$(SEARCHPATH))))
@@ -545,6 +551,9 @@ endif
 ifeq ($(OS_TARGET),msdos)
 NoNativeBinaries=1
 endif
+ifeq ($(OS_TARGET),nds)
+NoNativeBinaries=1
+endif
 ifeq ($(FULL_TARGET),i386-linux)
 override TARGET_DIRS+=utils
 endif
@@ -704,6 +713,9 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 override TARGET_DIRS+=utils
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+override TARGET_DIRS+=utils
+endif
 ifeq ($(FULL_TARGET),x86_64-dragonfly)
 override TARGET_DIRS+=utils
 endif
@@ -776,6 +788,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 override TARGET_DIRS+=utils
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+override TARGET_DIRS+=utils
+endif
 ifeq ($(FULL_TARGET),i386-linux)
 override TARGET_PROGRAMS+=pp
 endif
@@ -935,6 +950,9 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 override TARGET_PROGRAMS+=pp
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+override TARGET_PROGRAMS+=pp
+endif
 ifeq ($(FULL_TARGET),x86_64-dragonfly)
 override TARGET_PROGRAMS+=pp
 endif
@@ -1007,6 +1025,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 override TARGET_PROGRAMS+=pp
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+override TARGET_PROGRAMS+=pp
+endif
 override INSTALL_FPCPACKAGE=y
 ifeq ($(FULL_TARGET),i386-linux)
 override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
@@ -1167,6 +1188,9 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
+endif
 ifeq ($(FULL_TARGET),x86_64-dragonfly)
 override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
 endif
@@ -1239,6 +1263,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+override COMPILER_INCLUDEDIR+=$(CPC_TARGET)
+endif
 ifeq ($(FULL_TARGET),i386-linux)
 override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
 endif
@@ -1398,6 +1425,9 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
+endif
 ifeq ($(FULL_TARGET),x86_64-dragonfly)
 override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
 endif
@@ -1470,6 +1500,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+override COMPILER_UNITDIR+=$(COMPILERSOURCEDIR)
+endif
 ifeq ($(FULL_TARGET),i386-linux)
 override COMPILER_TARGETDIR+=.
 endif
@@ -1629,6 +1662,9 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 override COMPILER_TARGETDIR+=.
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+override COMPILER_TARGETDIR+=.
+endif
 ifeq ($(FULL_TARGET),x86_64-dragonfly)
 override COMPILER_TARGETDIR+=.
 endif
@@ -1701,6 +1737,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 override COMPILER_TARGETDIR+=.
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+override COMPILER_TARGETDIR+=.
+endif
 ifeq ($(FULL_TARGET),i386-linux)
 override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 endif
@@ -1860,6 +1899,9 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
+endif
 ifeq ($(FULL_TARGET),x86_64-dragonfly)
 override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 endif
@@ -1932,6 +1974,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+override COMPILER_UNITTARGETDIR+=$(CPU_UNITDIR)/units/$(FULL_TARGET)
+endif
 ifdef REQUIRE_UNITSDIR
 override UNITSDIR+=$(REQUIRE_UNITSDIR)
 endif
@@ -2741,6 +2786,9 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 REQUIRE_PACKAGES_RTL=1
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+REQUIRE_PACKAGES_RTL=1
+endif
 ifeq ($(FULL_TARGET),x86_64-dragonfly)
 REQUIRE_PACKAGES_RTL=1
 endif
@@ -2813,6 +2861,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 REQUIRE_PACKAGES_RTL=1
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+REQUIRE_PACKAGES_RTL=1
+endif
 ifdef REQUIRE_PACKAGES_RTL
 PACKAGEDIR_RTL:=$(firstword $(subst /Makefile.fpc,,$(strip $(wildcard $(addsuffix /rtl/Makefile.fpc,$(PACKAGESDIR))))))
 ifneq ($(PACKAGEDIR_RTL),)
@@ -2965,7 +3016,7 @@ ifdef CREATESHARED
 override FPCOPT+=-Cg
 endif
 ifneq ($(findstring $(OS_TARGET),dragonfly freebsd openbsd netbsd linux solaris),)
-ifeq ($(CPU_TARGET),x86_64)
+ifneq ($(findstring $(CPU_TARGET),x86_64 mips mipsel),)
 override FPCOPT+=-Cg
 endif
 endif
@@ -3604,6 +3655,9 @@ endif
 ifeq ($(FULL_TARGET),x86_64-embedded)
 TARGET_DIRS_UTILS=1
 endif
+ifeq ($(FULL_TARGET),x86_64-iphonesim)
+TARGET_DIRS_UTILS=1
+endif
 ifeq ($(FULL_TARGET),x86_64-dragonfly)
 TARGET_DIRS_UTILS=1
 endif
@@ -3676,6 +3730,9 @@ endif
 ifeq ($(FULL_TARGET),i8086-msdos)
 TARGET_DIRS_UTILS=1
 endif
+ifeq ($(FULL_TARGET),aarch64-darwin)
+TARGET_DIRS_UTILS=1
+endif
 ifdef TARGET_DIRS_UTILS
 utils_all:
 	$(MAKE) -C utils all
@@ -3804,7 +3861,7 @@ INSTALLEXEFILE=$(PPCROSSNAME)
 else
 INSTALLEXEFILE=$(EXENAME)
 endif
-PPC_TARGETS=i386 m68k powerpc sparc arm armeb x86_64 powerpc64 alpha vis ia64 mips mipsel avr jvm i8086
+PPC_TARGETS=i386 m68k powerpc sparc arm armeb x86_64 powerpc64 alpha vis ia64 mips mipsel avr jvm i8086 aarch64
 INSTALL_TARGETS=$(addsuffix _exe_install,$(sort $(CYCLETARGETS) $(PPC_TARGETS)))
 .PHONY: $(PPC_TARGETS) $(INSTALL_TARGETS)
 $(PPC_TARGETS):
@@ -3839,11 +3896,11 @@ ppuclean:
 tempclean:
 	-$(DEL) $(PPCROSSNAME) $(TEMPNAME) $(TEMPNAME1) $(TEMPNAME2) $(TEMPNAME3) $(MSG2INC) pp1.wpo pp2.wpo
 execlean :
-	-$(DEL) ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) $(EXENAME) $(TEMPWPONAME1) $(TEMPWPONAME2)
+	-$(DEL) ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) ppca64$(EXEEXT) $(EXENAME) $(TEMPWPONAME1) $(TEMPWPONAME2)
 $(addsuffix _clean,$(ALLTARGETS)):
 	-$(DELTREE) $(addprefix $(subst _clean,,$@),/units)
 	-$(DEL) $(addprefix $(subst _clean,,$@)/,*$(OEXT) *$(PPUEXT) *$(RSTEXT) *$(ASMEXT) *$(STATICLIBEXT) *$(SHAREDLIBEXT) *$(PPLEXT))
-	-$(DEL) $(addprefix $(subst _clean,,$@)/,ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) $(EXENAME))
+	-$(DEL) $(addprefix $(subst _clean,,$@)/,ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) ppca64$(EXEEXT) $(EXENAME))
 cycleclean: cleanall $(addsuffix _clean,$(CPC_TARGET))
 	-$(DEL) $(EXENAME)
 clean: tempclean execlean cleanall $(addsuffix _clean,$(CPC_TARGET)) $(addsuffix _clean,$(TARGET_DIRS))
@@ -3995,13 +4052,11 @@ cycle:
 	$(MAKE) 'FPC=$(BASEDIR)/$(TEMPNAME)' OS_TARGET=$(OS_SOURCE) EXENAME=$(PPCROSSNAME) CROSSBINDIR= BINUTILSPREFIX= CROSSCYCLEBOOTSTRAP=1 cycleclean compiler CYCLELEVEL=2
 ifndef CROSSINSTALL
 	$(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' rtlclean rtl CYCLELEVEL=3
-ifneq ($(OS_TARGET),embedded)
-ifneq ($(OS_TARGET),gba)
+ifndef NoNativeBinaries
 	$(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' cycleclean compiler CYCLELEVEL=3
 endif
 endif
 endif
-endif
 else
 cycle: override FPC=
 cycle:

+ 14 - 10
compiler/Makefile.fpc

@@ -32,7 +32,7 @@ fpcdir=..
 unexport FPC_VERSION FPC_COMPILERINFO
 
 # Which platforms are ready for inclusion in the cycle
-CYCLETARGETS=i386 powerpc sparc arm x86_64 powerpc64 m68k armeb mipsel mips avr jvm i8086
+CYCLETARGETS=i386 powerpc sparc arm x86_64 powerpc64 m68k armeb mipsel mips avr jvm i8086 aarch64
 
 # All supported targets used for clean
 ALLTARGETS=$(CYCLETARGETS)
@@ -80,6 +80,9 @@ endif
 ifdef I8086
 PPC_TARGET=i8086
 endif
+ifdef AARCH64
+PPC_TARGET=aarch64
+endif
 
 # Default is to generate a compiler for the same
 # platform as CPU_TARGET (a native compiler)
@@ -205,6 +208,9 @@ endif
 ifeq ($(CPC_TARGET),i8086)
 CPUSUF=8086
 endif
+ifeq ($(CPC_TARGET),aarch64)
+CPUSUF=a64
+endif
 
 # Do not define the default -d$(CPU_TARGET) because that
 # will conflict with our -d$(CPC_TARGET)
@@ -323,6 +329,9 @@ endif
 ifeq ($(OS_TARGET),msdos)
 NoNativeBinaries=1
 endif
+ifeq ($(OS_TARGET),nds)
+NoNativeBinaries=1
+endif
 
 [rules]
 #####################################################################
@@ -397,7 +406,7 @@ endif
 # CPU targets
 #####################################################################
 
-PPC_TARGETS=i386 m68k powerpc sparc arm armeb x86_64 powerpc64 alpha vis ia64 mips mipsel avr jvm i8086
+PPC_TARGETS=i386 m68k powerpc sparc arm armeb x86_64 powerpc64 alpha vis ia64 mips mipsel avr jvm i8086 aarch64
 INSTALL_TARGETS=$(addsuffix _exe_install,$(sort $(CYCLETARGETS) $(PPC_TARGETS)))
 
 .PHONY: $(PPC_TARGETS) $(INSTALL_TARGETS)
@@ -452,12 +461,12 @@ tempclean:
         -$(DEL) $(PPCROSSNAME) $(TEMPNAME) $(TEMPNAME1) $(TEMPNAME2) $(TEMPNAME3) $(MSG2INC) pp1.wpo pp2.wpo
 
 execlean :
-        -$(DEL) ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) $(EXENAME) $(TEMPWPONAME1) $(TEMPWPONAME2)
+        -$(DEL) ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) ppca64$(EXEEXT) $(EXENAME) $(TEMPWPONAME1) $(TEMPWPONAME2)
 
 $(addsuffix _clean,$(ALLTARGETS)):
         -$(DELTREE) $(addprefix $(subst _clean,,$@),/units)
         -$(DEL) $(addprefix $(subst _clean,,$@)/,*$(OEXT) *$(PPUEXT) *$(RSTEXT) *$(ASMEXT) *$(STATICLIBEXT) *$(SHAREDLIBEXT) *$(PPLEXT))
-        -$(DEL) $(addprefix $(subst _clean,,$@)/,ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) $(EXENAME))
+        -$(DEL) $(addprefix $(subst _clean,,$@)/,ppc386$(EXEEXT) ppc68k$(EXEEXT) ppcx64$(EXEEXT) ppcppc$(EXEEXT) ppcsparc$(EXEEXT) ppcppc64$(EXEEXT) ppcarm$(EXEEXT) ppcmips$(EXEEXT) ppcmipsel$(EXEEXT) ppcjvm$(EXEEXT) ppc8086$(EXEEXT) ppca64$(EXEEXT) $(EXENAME))
 
 cycleclean: cleanall $(addsuffix _clean,$(CPC_TARGET))
         -$(DEL) $(EXENAME)
@@ -682,14 +691,10 @@ cycle:
 # ppc<ARCH> (target native)
 ifndef CROSSINSTALL
         $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' rtlclean rtl CYCLELEVEL=3
-# building a native compiler for embedded targets is not possible
-ifneq ($(OS_TARGET),embedded)
-# building a native compiler for the arm-gba target is not possible
-ifneq ($(OS_TARGET),gba)
+ifndef NoNativeBinaries
         $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' cycleclean compiler CYCLELEVEL=3
 endif
 endif
-endif
 
 endif
 
@@ -715,7 +720,6 @@ cycle:
 # ppc<ARCH> (target native)
 ifndef CROSSINSTALL
         $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' 'OPT=$(RTLOPT) $(CROSSOPT)' rtlclean rtl CYCLELEVEL=3
-# building a native compiler for JVM and embedded targets is not possible
 ifndef NoNativeBinaries
         $(MAKE) 'FPC=$(BASEDIR)/$(PPCROSSNAME)' 'OPT=$(strip $(LOCALOPT) $(CROSSOPT))' cycleclean compiler CYCLELEVEL=3
 endif

+ 93 - 36
compiler/aarch64/a64att.inc

@@ -1,15 +1,37 @@
 { don't edit, this file is generated from armins.dat }
 (
-'nop',
+'none',
 'b',
-'cb',
-'tb',
+'cbz',
+'cbnz',
+'tbz',
+'tbnz',
 'bl',
 'blr',
 'br',
 'ret',
+'brk',
+'hlt',
+'hvc',
+'smc',
+'svc',
+'eret',
+'dcps1',
+'dcps2',
+'dcps3',
+'drps',
+'dc',
+'at',
+'tlbi',
+'hint',
+'clrex',
+'dsb',
+'dmb',
+'isb',
 'ldr',
 'str',
+'ldur',
+'stur',
 'ldp',
 'stp',
 'ldnp',
@@ -17,40 +39,66 @@
 'ldtr',
 'sttr',
 'ldxr',
+'ldxp',
 'stxr',
+'stxp',
 'ldar',
 'stlr',
 'ldaxr',
 'stlxr',
+'stlxp',
+'ld1',
+'ld2',
+'ld3',
+'ld4',
+'st1',
+'st2',
+'st3',
+'st4',
+'ld1r',
+'ld2r',
+'ld3r',
+'ld4r',
 'prfm',
+'prfum',
 'add',
-'adc',
 'sub',
-'sbc',
 'cmp',
 'cmn',
-'mov',
 'and',
-'bic',
 'eor',
-'eon',
 'orr',
 'orn',
 'tst',
-'mvn',
+'movz',
+'movn',
 'movk',
+'mrs',
+'msr',
 'adrp',
 'adr',
 'bfm',
 'sbfm',
 'ubfm',
 'extr',
-'sxt',
-'uxt',
+'adc',
+'sbc',
+'bic',
+'eon',
 'asrv',
-'llslv',
+'lslv',
 'lsrv',
 'rorv',
+'madd',
+'msub',
+'smaddl',
+'smsubl',
+'smulh',
+'umaddl',
+'umsubl',
+'umulh',
+'sdiv',
+'udiv',
 'cls',
 'clz',
 'rbit',
@@ -63,33 +111,39 @@
 'csneg',
 'ccmn',
 'ccmp',
-'madd',
-'msub',
-'smaddl',
-'smsubl',
-'smulh',
-'umaddl',
-'umsubl',
-'umulh',
-'sdiv',
-'udiv',
-'neg',
+'nop',
+'yield',
+'wfe',
+'wfi',
+'sev',
+'sevl',
+'mov',
+'bfi',
+'bfxil',
+'sbfiz',
+'sbfx',
+'ubfiz',
+'ubfx',
 'asr',
 'lsl',
 'lsr',
 'ror',
-'cset',
-'csetm',
-'cinc',
-'cinv',
-'cneg',
+'sxt',
+'uxt',
+'neg',
 'ngc',
+'mvn',
 'mneg',
 'mul',
 'smnegl',
 'smull',
 'umnegl',
 'umull',
+'cset',
+'csetm',
+'cinc',
+'cinv',
+'cneg',
 'fmov',
 'fcvt',
 'fcvtas',
@@ -104,13 +158,13 @@
 'fcvtzu',
 'scvtf',
 'ucvtf',
-'fprinta',
-'fprinti',
-'fprintm',
-'fprintn',
-'fprintp',
-'fprintx',
-'fprintz',
+'frinta',
+'frinti',
+'frintm',
+'frintn',
+'frintp',
+'frintx',
+'frintz',
 'fabs',
 'fneg',
 'fsqrt',
@@ -130,5 +184,8 @@
 'fcmpe',
 'fccmp',
 'fcmmpe',
-'fcsel'
+'fcsel',
+'umov',
+'ins',
+'movi'
 );

+ 57 - 0
compiler/aarch64/a64atts.inc

@@ -130,5 +130,62 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufNONE
 );

+ 158 - 44
compiler/aarch64/a64ins.dat

@@ -1,10 +1,15 @@
-[NOP]
+; invalid
+[NONE]
 
 [B]
 
-[CB]
+[CBZ]
+
+[CBNZ]
 
-[TB]
+[TBZ]
+
+[TBNZ]
 
 [BL]
 
@@ -14,10 +19,50 @@
 
 [RET]
 
+[BRK]
+
+[HLT]
+
+[HVC]
+
+[SMC]
+
+[SVC]
+
+[ERET]
+
+[DCPS1]
+
+[DCPS2]
+
+[DCPS3]
+
+[DRPS]
+
+[DC]
+
+[AT]
+
+[TLBI]
+
+[HINT]
+
+[CLREX]
+
+[DSB]
+
+[DMB]
+
+[ISB]
+
 [LDR]
 
 [STR]
 
+[LDUR]
+
+[STUR]
+
 [LDP]
 
 [STP]
@@ -32,8 +77,12 @@
 
 [LDXR]
 
+[LDXP]
+
 [STXR]
 
+[STXP]
+
 [LDAR]
 
 [STLR]
@@ -42,40 +91,64 @@
 
 [STLXR]
 
+[STLXP]
+
+[LD1]
+
+[LD2]
+
+[LD3]
+
+[LD4]
+
+[ST1]
+
+[ST2]
+
+[ST3]
+
+[ST4]
+
+[LD1R]
+
+[LD2R]
+
+[LD3R]
+
+[LD4R]
+
 [PRFM]
 
-[ADD]
+[PRFUM]
 
-[ADC]
+[ADD]
 
 [SUB]
 
-[SBC]
-
 [CMP]
 
 [CMN]
 
-[MOV]
-
 [AND]
 
-[BIC]
-
 [EOR]
 
-[EON]
-
 [ORR]
 
 [ORN]
 
 [TST]
 
-[MVN]
+[MOVZ]
+
+[MOVN]
 
 [MOVK]
 
+[MRS]
+
+[MSR]
+
 [ADRP]
 
 [ADR]
@@ -88,18 +161,42 @@
 
 [EXTR]
 
-[SXT]
+[ADC]
 
-[UXT]
+[SBC]
+
+[BIC]
+
+[EON]
 
 [ASRV]
 
-[LLSLV]
+[LSLV]
 
 [LSRV]
 
 [RORV]
 
+[MADD]
+
+[MSUB]
+
+[SMADDL]
+
+[SMSUBL]
+
+[SMULH]
+
+[UMADDL]
+
+[UMSUBL]
+
+[UMULH]
+
+[SDIV]
+
+[UDIV]
+
 [CLS]
 
 [CLZ]
@@ -124,29 +221,33 @@
 
 [CCMP]
 
-[MADD]
+; Aliases
+; they are not generated by the compiler, they are only used for inline assembler
+[NOP]
 
-[MSUB]
+[YIELD]
 
-[SMADDL]
+[WFE]
 
-[SMSUBL]
+[WFI]
 
-[SMULH]
+[SEV]
 
-[UMADDL]
+[SEVL]
 
-[UMSUBL]
+[MOV]
 
-[UMULH]
+[BFI]
 
-[SDIV]
+[BFXIL]
 
-[UDIV]
+[SBFIZ]
 
-; Aliases
-; they are not generated by the compiler, they are only used for inline assembler
-[NEG]
+[SBFX]
+
+[UBFIZ]
+
+[UBFX]
 
 [ASR]
 
@@ -156,18 +257,16 @@
 
 [ROR]
 
-[CSET]
-
-[CSETM]
-
-[CINC]
+[SXT]
 
-[CINV]
+[UXT]
 
-[CNEG]
+[NEG]
 
 [NGC]
 
+[MVN]
+
 [MNEG]
 
 [MUL]
@@ -180,6 +279,16 @@
 
 [UMULL]
 
+[CSET]
+
+[CSETM]
+
+[CINC]
+
+[CINV]
+
+[CNEG]
+
 [FMOV]
 
 [FCVT]
@@ -208,19 +317,19 @@
 
 [UCVTF]
 
-[FPRINTA]
+[FRINTA]
 
-[FPRINTI]
+[FRINTI]
 
-[FPRINTM]
+[FRINTM]
 
-[FPRINTN]
+[FRINTN]
 
-[FPRINTP]
+[FRINTP]
 
-[FPRINTX]
+[FRINTX]
 
-[FPRINTZ]
+[FRINTZ]
 
 [FABS]
 
@@ -262,3 +371,8 @@
 
 [FCSEL]
 
+[UMOV]
+
+[INS]
+
+[MOVI]

+ 93 - 36
compiler/aarch64/a64op.inc

@@ -1,15 +1,37 @@
 { don't edit, this file is generated from armins.dat }
 (
-A_NOP,
+A_NONE,
 A_B,
-A_CB,
-A_TB,
+A_CBZ,
+A_CBNZ,
+A_TBZ,
+A_TBNZ,
 A_BL,
 A_BLR,
 A_BR,
 A_RET,
+A_BRK,
+A_HLT,
+A_HVC,
+A_SMC,
+A_SVC,
+A_ERET,
+A_DCPS1,
+A_DCPS2,
+A_DCPS3,
+A_DRPS,
+A_DC,
+A_AT,
+A_TLBI,
+A_HINT,
+A_CLREX,
+A_DSB,
+A_DMB,
+A_ISB,
 A_LDR,
 A_STR,
+A_LDUR,
+A_STUR,
 A_LDP,
 A_STP,
 A_LDNP,
@@ -17,40 +39,66 @@ A_STNP,
 A_LDTR,
 A_STTR,
 A_LDXR,
+A_LDXP,
 A_STXR,
+A_STXP,
 A_LDAR,
 A_STLR,
 A_LDAXR,
 A_STLXR,
+A_STLXP,
+A_LD1,
+A_LD2,
+A_LD3,
+A_LD4,
+A_ST1,
+A_ST2,
+A_ST3,
+A_ST4,
+A_LD1R,
+A_LD2R,
+A_LD3R,
+A_LD4R,
 A_PRFM,
+A_PRFUM,
 A_ADD,
-A_ADC,
 A_SUB,
-A_SBC,
 A_CMP,
 A_CMN,
-A_MOV,
 A_AND,
-A_BIC,
 A_EOR,
-A_EON,
 A_ORR,
 A_ORN,
 A_TST,
-A_MVN,
+A_MOVZ,
+A_MOVN,
 A_MOVK,
+A_MRS,
+A_MSR,
 A_ADRP,
 A_ADR,
 A_BFM,
 A_SBFM,
 A_UBFM,
 A_EXTR,
-A_SXT,
-A_UXT,
+A_ADC,
+A_SBC,
+A_BIC,
+A_EON,
 A_ASRV,
-A_LLSLV,
+A_LSLV,
 A_LSRV,
 A_RORV,
+A_MADD,
+A_MSUB,
+A_SMADDL,
+A_SMSUBL,
+A_SMULH,
+A_UMADDL,
+A_UMSUBL,
+A_UMULH,
+A_SDIV,
+A_UDIV,
 A_CLS,
 A_CLZ,
 A_RBIT,
@@ -63,33 +111,39 @@ A_CSINV,
 A_CSNEG,
 A_CCMN,
 A_CCMP,
-A_MADD,
-A_MSUB,
-A_SMADDL,
-A_SMSUBL,
-A_SMULH,
-A_UMADDL,
-A_UMSUBL,
-A_UMULH,
-A_SDIV,
-A_UDIV,
-A_NEG,
+A_NOP,
+A_YIELD,
+A_WFE,
+A_WFI,
+A_SEV,
+A_SEVL,
+A_MOV,
+A_BFI,
+A_BFXIL,
+A_SBFIZ,
+A_SBFX,
+A_UBFIZ,
+A_UBFX,
 A_ASR,
 A_LSL,
 A_LSR,
 A_ROR,
-A_CSET,
-A_CSETM,
-A_CINC,
-A_CINV,
-A_CNEG,
+A_SXT,
+A_UXT,
+A_NEG,
 A_NGC,
+A_MVN,
 A_MNEG,
 A_MUL,
 A_SMNEGL,
 A_SMULL,
 A_UMNEGL,
 A_UMULL,
+A_CSET,
+A_CSETM,
+A_CINC,
+A_CINV,
+A_CNEG,
 A_FMOV,
 A_FCVT,
 A_FCVTAS,
@@ -104,13 +158,13 @@ A_FCVTZS,
 A_FCVTZU,
 A_SCVTF,
 A_UCVTF,
-A_FPRINTA,
-A_FPRINTI,
-A_FPRINTM,
-A_FPRINTN,
-A_FPRINTP,
-A_FPRINTX,
-A_FPRINTZ,
+A_FRINTA,
+A_FRINTI,
+A_FRINTM,
+A_FRINTN,
+A_FRINTP,
+A_FRINTX,
+A_FRINTZ,
 A_FABS,
 A_FNEG,
 A_FSQRT,
@@ -130,5 +184,8 @@ A_FCMP,
 A_FCMPE,
 A_FCCMP,
 A_FCMMPE,
-A_FCSEL
+A_FCSEL,
+A_UMOV,
+A_INS,
+A_MOVI
 );

+ 168 - 161
compiler/aarch64/a64reg.dat

@@ -70,169 +70,176 @@ W30,$01,$04,$1E,w30,30,30
 X30,$01,$05,$1E,x30,30,30
 WZR,$01,$04,$1F,wzr,31,31
 XZR,$01,$05,$1F,xzr,31,31
+; sp and zr share the same register number, but we still have to be able to
+; differentiate them because some instructions can be encoded with both ->
+; use a different superregister after all
+WSP,$01,$04,$20,wsp,31,31
+SP,$01,$05,$20,sp,31,31
 
 
 ; vfp registers
-B0,$04,$01,$00,b0,0,0
-H0,$04,$03,$00,h0,0,0
-S0,$04,$09,$00,s0,0,0
-D0,$04,$0a,$00,d0,0,0
-Q0,$04,$05,$00,q0,0,0
-B1,$04,$01,$01,b1,1,1
-H1,$04,$03,$01,h1,1,1
-S1,$04,$09,$01,s1,1,1
-D1,$04,$0a,$01,d1,1,1
-Q1,$04,$05,$01,q1,1,1
-B2,$04,$01,$02,b2,2,2
-H2,$04,$03,$02,h2,2,2
-S2,$04,$09,$02,s2,2,2
-D2,$04,$0a,$02,d2,2,2
-Q2,$04,$05,$02,q2,2,2
-B3,$04,$01,$03,b3,3,3
-H3,$04,$03,$03,h3,3,3
-S3,$04,$09,$03,s3,3,3
-D3,$04,$0a,$03,d3,3,3
-Q3,$04,$05,$03,q3,3,3
-B4,$04,$01,$04,b4,4,4
-H4,$04,$03,$04,h4,4,4
-S4,$04,$09,$04,s4,4,4
-D4,$04,$0a,$04,d4,4,4
-Q4,$04,$05,$04,q4,4,4
-B5,$04,$01,$05,b5,5,5
-H5,$04,$03,$05,h5,5,5
-S5,$04,$09,$05,s5,5,5
-D5,$04,$0a,$05,d5,5,5
-Q5,$04,$05,$05,q5,5,5
-B6,$04,$01,$06,b6,6,6
-H6,$04,$03,$06,h6,6,6
-S6,$04,$09,$06,s6,6,6
-D6,$04,$0a,$06,d6,6,6
-Q6,$04,$05,$06,q6,6,6
-B7,$04,$01,$07,b7,7,7
-H7,$04,$03,$07,h7,7,7
-S7,$04,$09,$07,s7,7,7
-D7,$04,$0a,$07,d7,7,7
-Q7,$04,$05,$07,q7,7,7
-B8,$04,$01,$08,b8,8,8
-H8,$04,$03,$08,h8,8,8
-S8,$04,$09,$08,s8,8,8
-D8,$04,$0a,$08,d8,8,8
-Q8,$04,$05,$08,q8,8,8
-B9,$04,$01,$09,b9,9,9
-H9,$04,$03,$09,h9,9,9
-S9,$04,$09,$09,s9,9,9
-D9,$04,$0a,$09,d9,9,9
-Q9,$04,$05,$09,q9,9,9
-B10,$04,$01,$0A,b10,10,10
-H10,$04,$03,$0A,h10,10,10
-S10,$04,$09,$0A,s10,10,10
-D10,$04,$0a,$0A,d10,10,10
-Q10,$04,$05,$0A,q10,10,10
-B11,$04,$01,$0B,b11,11,11
-H11,$04,$03,$0B,h11,11,11
-S11,$04,$09,$0B,s11,11,11
-D11,$04,$0a,$0B,d11,11,11
-Q11,$04,$05,$0B,q11,11,11
-B12,$04,$01,$0C,b12,12,12
-H12,$04,$03,$0C,h12,12,12
-S12,$04,$09,$0C,s12,12,12
-D12,$04,$0a,$0C,d12,12,12
-Q12,$04,$05,$0C,q12,12,12
-B13,$04,$01,$0D,b13,13,13
-H13,$04,$03,$0D,h13,13,13
-S13,$04,$09,$0D,s13,13,13
-D13,$04,$0a,$0D,d13,13,13
-Q13,$04,$05,$0D,q13,13,13
-B14,$04,$01,$0E,b14,14,14
-H14,$04,$03,$0E,h14,14,14
-S14,$04,$09,$0E,s14,14,14
-D14,$04,$0a,$0E,d14,14,14
-Q14,$04,$05,$0E,q14,14,14
-B15,$04,$01,$0F,b15,15,15
-H15,$04,$03,$0F,h15,15,15
-S15,$04,$09,$0F,s15,15,15
-D15,$04,$0a,$0F,d15,15,15
-Q15,$04,$05,$0F,q15,15,15
-B16,$04,$01,$10,b16,16,16
-H16,$04,$03,$10,h16,16,16
-S16,$04,$09,$10,s16,16,16
-D16,$04,$0a,$10,d16,16,16
-Q16,$04,$05,$10,q16,16,16
-B17,$04,$01,$11,b17,17,17
-H17,$04,$03,$11,h17,17,17
-S17,$04,$09,$11,s17,17,17
-D17,$04,$0a,$11,d17,17,17
-Q17,$04,$05,$11,q17,17,17
-B18,$04,$01,$12,b18,18,18
-H18,$04,$03,$12,h18,18,18
-S18,$04,$09,$12,s18,18,18
-D18,$04,$0a,$12,d18,18,18
-Q18,$04,$05,$12,q18,18,18
-B19,$04,$01,$13,b19,19,19
-H19,$04,$03,$13,h19,19,19
-S19,$04,$09,$13,s19,19,19
-D19,$04,$0a,$13,d19,19,19
-Q19,$04,$05,$13,q19,19,19
-B20,$04,$01,$14,b20,20,20
-H20,$04,$03,$14,h20,20,20
-S20,$04,$09,$14,s20,20,20
-D20,$04,$0a,$14,d20,20,20
-Q20,$04,$05,$14,q20,20,20
-B21,$04,$01,$15,b21,21,21
-H21,$04,$03,$15,h21,21,21
-S21,$04,$09,$15,s21,21,21
-D21,$04,$0a,$15,d21,21,21
-Q21,$04,$05,$15,q21,21,21
-B22,$04,$01,$16,b22,22,22
-H22,$04,$03,$16,h22,22,22
-S22,$04,$09,$16,s22,22,22
-D22,$04,$0a,$16,d22,22,22
-Q22,$04,$05,$16,q22,22,22
-B23,$04,$01,$17,b23,23,23
-H23,$04,$03,$17,h23,23,23
-S23,$04,$09,$17,s23,23,23
-D23,$04,$0a,$17,d23,23,23
-Q23,$04,$05,$17,q23,23,23
-B24,$04,$01,$18,b24,24,24
-H24,$04,$03,$18,h24,24,24
-S24,$04,$09,$18,s24,24,24
-D24,$04,$0a,$18,d24,24,24
-Q24,$04,$05,$18,q24,24,24
-B25,$04,$01,$19,b25,25,25
-H25,$04,$03,$19,h25,25,25
-S25,$04,$09,$19,s25,25,25
-D25,$04,$0a,$19,d25,25,25
-Q25,$04,$05,$19,q25,25,25
-B26,$04,$01,$1A,b26,26,26
-H26,$04,$03,$1A,h26,26,26
-S26,$04,$09,$1A,s26,26,26
-D26,$04,$0a,$1A,d26,26,26
-Q26,$04,$05,$1A,q26,26,26
-B27,$04,$01,$1B,b27,27,27
-H27,$04,$03,$1B,h27,27,27
-S27,$04,$09,$1B,s27,27,27
-D27,$04,$0a,$1B,d27,27,27
-Q27,$04,$05,$1B,q27,27,27
-B28,$04,$01,$1C,b28,28,28
-H28,$04,$03,$1C,h28,28,28
-S28,$04,$09,$1C,s28,28,28
-D28,$04,$0a,$1C,d28,28,28
-Q28,$04,$05,$1C,q28,28,28
-B29,$04,$01,$1D,b29,29,29
-H29,$04,$03,$1D,h29,29,29
-S29,$04,$09,$1D,s29,29,29
-D29,$04,$0a,$1D,d29,29,29
-Q29,$04,$05,$1D,q29,29,29
-B30,$04,$01,$1E,b30,30,30
-H30,$04,$03,$1E,h30,30,30
-S30,$04,$09,$1E,s30,30,30
-D30,$04,$0a,$1E,d30,30,30
-Q30,$04,$05,$1E,q30,30,30
-B31,$04,$01,$1F,b31,31,31
-H31,$04,$03,$1F,h31,31,31
-S31,$04,$09,$1F,s31,31,31
-D31,$04,$0a,$1F,d31,31,31
-Q31,$04,$05,$1F,q31,31,31
+B0,$04,$01,$00,b0,64,64
+H0,$04,$03,$00,h0,64,64
+S0,$04,$09,$00,s0,64,64
+D0,$04,$0a,$00,d0,64,64
+Q0,$04,$05,$00,q0,64,64
+B1,$04,$01,$01,b1,65,65
+H1,$04,$03,$01,h1,65,65
+S1,$04,$09,$01,s1,65,65
+D1,$04,$0a,$01,d1,65,65
+Q1,$04,$05,$01,q1,65,65
+B2,$04,$01,$02,b2,66,66
+H2,$04,$03,$02,h2,66,66
+S2,$04,$09,$02,s2,66,66
+D2,$04,$0a,$02,d2,66,66
+Q2,$04,$05,$02,q2,66,66
+B3,$04,$01,$03,b3,67,67
+H3,$04,$03,$03,h3,67,67
+S3,$04,$09,$03,s3,67,67
+D3,$04,$0a,$03,d3,67,67
+Q3,$04,$05,$03,q3,67,67
+B4,$04,$01,$04,b4,68,68
+H4,$04,$03,$04,h4,68,68
+S4,$04,$09,$04,s4,68,68
+D4,$04,$0a,$04,d4,68,68
+Q4,$04,$05,$04,q4,68,68
+B5,$04,$01,$05,b5,69,69
+H5,$04,$03,$05,h5,69,69
+S5,$04,$09,$05,s5,69,69
+D5,$04,$0a,$05,d5,69,69
+Q5,$04,$05,$05,q5,69,69
+B6,$04,$01,$06,b6,70,70
+H6,$04,$03,$06,h6,70,70
+S6,$04,$09,$06,s6,70,70
+D6,$04,$0a,$06,d6,70,70
+Q6,$04,$05,$06,q6,70,70
+B7,$04,$01,$07,b7,71,71
+H7,$04,$03,$07,h7,71,71
+S7,$04,$09,$07,s7,71,71
+D7,$04,$0a,$07,d7,71,71
+Q7,$04,$05,$07,q7,71,71
+B8,$04,$01,$08,b8,72,72
+H8,$04,$03,$08,h8,72,72
+S8,$04,$09,$08,s8,72,72
+D8,$04,$0a,$08,d8,72,72
+Q8,$04,$05,$08,q8,72,72
+B9,$04,$01,$09,b9,73,73
+H9,$04,$03,$09,h9,73,73
+S9,$04,$09,$09,s9,73,73
+D9,$04,$0a,$09,d9,73,73
+Q9,$04,$05,$09,q9,73,73
+B10,$04,$01,$0A,b10,74,74
+H10,$04,$03,$0A,h10,74,74
+S10,$04,$09,$0A,s10,74,74
+D10,$04,$0a,$0A,d10,74,74
+Q10,$04,$05,$0A,q10,74,74
+B11,$04,$01,$0B,b11,75,75
+H11,$04,$03,$0B,h11,75,75
+S11,$04,$09,$0B,s11,75,75
+D11,$04,$0a,$0B,d11,75,75
+Q11,$04,$05,$0B,q11,75,75
+B12,$04,$01,$0C,b12,76,76
+H12,$04,$03,$0C,h12,76,76
+S12,$04,$09,$0C,s12,76,76
+D12,$04,$0a,$0C,d12,76,76
+Q12,$04,$05,$0C,q12,76,76
+B13,$04,$01,$0D,b13,77,77
+H13,$04,$03,$0D,h13,77,77
+S13,$04,$09,$0D,s13,77,77
+D13,$04,$0a,$0D,d13,77,77
+Q13,$04,$05,$0D,q13,77,77
+B14,$04,$01,$0E,b14,78,78
+H14,$04,$03,$0E,h14,78,78
+S14,$04,$09,$0E,s14,78,78
+D14,$04,$0a,$0E,d14,78,78
+Q14,$04,$05,$0E,q14,78,78
+B15,$04,$01,$0F,b15,79,79
+H15,$04,$03,$0F,h15,79,79
+S15,$04,$09,$0F,s15,79,79
+D15,$04,$0a,$0F,d15,79,79
+Q15,$04,$05,$0F,q15,79,79
+B16,$04,$01,$10,b16,80,80
+H16,$04,$03,$10,h16,80,80
+S16,$04,$09,$10,s16,80,80
+D16,$04,$0a,$10,d16,80,80
+Q16,$04,$05,$10,q16,80,80
+B17,$04,$01,$11,b17,81,81
+H17,$04,$03,$11,h17,81,81
+S17,$04,$09,$11,s17,81,81
+D17,$04,$0a,$11,d17,81,81
+Q17,$04,$05,$11,q17,81,81
+B18,$04,$01,$12,b18,82,82
+H18,$04,$03,$12,h18,82,82
+S18,$04,$09,$12,s18,82,82
+D18,$04,$0a,$12,d18,82,82
+Q18,$04,$05,$12,q18,82,82
+B19,$04,$01,$13,b19,83,83
+H19,$04,$03,$13,h19,83,83
+S19,$04,$09,$13,s19,83,83
+D19,$04,$0a,$13,d19,83,83
+Q19,$04,$05,$13,q19,83,83
+B20,$04,$01,$14,b20,84,84
+H20,$04,$03,$14,h20,84,84
+S20,$04,$09,$14,s20,84,84
+D20,$04,$0a,$14,d20,84,84
+Q20,$04,$05,$14,q20,84,84
+B21,$04,$01,$15,b21,85,85
+H21,$04,$03,$15,h21,85,85
+S21,$04,$09,$15,s21,85,85
+D21,$04,$0a,$15,d21,85,85
+Q21,$04,$05,$15,q21,85,85
+B22,$04,$01,$16,b22,86,86
+H22,$04,$03,$16,h22,86,86
+S22,$04,$09,$16,s22,86,86
+D22,$04,$0a,$16,d22,86,86
+Q22,$04,$05,$16,q22,86,86
+B23,$04,$01,$17,b23,87,87
+H23,$04,$03,$17,h23,87,87
+S23,$04,$09,$17,s23,87,87
+D23,$04,$0a,$17,d23,87,87
+Q23,$04,$05,$17,q23,87,87
+B24,$04,$01,$18,b24,88,88
+H24,$04,$03,$18,h24,88,88
+S24,$04,$09,$18,s24,88,88
+D24,$04,$0a,$18,d24,88,88
+Q24,$04,$05,$18,q24,88,88
+B25,$04,$01,$19,b25,89,89
+H25,$04,$03,$19,h25,89,89
+S25,$04,$09,$19,s25,89,89
+D25,$04,$0a,$19,d25,89,89
+Q25,$04,$05,$19,q25,89,89
+B26,$04,$01,$1A,b26,90,90
+H26,$04,$03,$1A,h26,90,90
+S26,$04,$09,$1A,s26,90,90
+D26,$04,$0a,$1A,d26,90,90
+Q26,$04,$05,$1A,q26,90,90
+B27,$04,$01,$1B,b27,91,91
+H27,$04,$03,$1B,h27,91,91
+S27,$04,$09,$1B,s27,91,91
+D27,$04,$0a,$1B,d27,91,91
+Q27,$04,$05,$1B,q27,91,91
+B28,$04,$01,$1C,b28,92,92
+H28,$04,$03,$1C,h28,92,92
+S28,$04,$09,$1C,s28,92,92
+D28,$04,$0a,$1C,d28,92,92
+Q28,$04,$05,$1C,q28,92,92
+B29,$04,$01,$1D,b29,93,93
+H29,$04,$03,$1D,h29,93,93
+S29,$04,$09,$1D,s29,93,93
+D29,$04,$0a,$1D,d29,93,93
+Q29,$04,$05,$1D,q29,93,93
+B30,$04,$01,$1E,b30,94,94
+H30,$04,$03,$1E,h30,94,94
+S30,$04,$09,$1E,s30,94,94
+D30,$04,$0a,$1E,d30,94,94
+Q30,$04,$05,$1E,q30,94,94
+B31,$04,$01,$1F,b31,95,95
+H31,$04,$03,$1F,h31,95,95
+S31,$04,$09,$1F,s31,95,95
+D31,$04,$0a,$1F,d31,95,95
+Q31,$04,$05,$1F,q31,95,95
 
 NZCV,$05,$00,$00,nzcv,0,0
-
+FPCR,$05,$00,$01,fpcr,0,0
+FPSR,$05,$00,$02,fpsr,0,0
+TPIDR_EL0,$05,$00,$03,tpidr_el0,0,0

+ 462 - 1125
compiler/aarch64/aasmcpu.pas

@@ -1,7 +1,7 @@
 {
     Copyright (c) 2003-2012 by Florian Klaempfl and others
 
-    Contains the assembler object for ARM64
+    Contains the assembler object for Aarch64
 
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -149,9 +149,6 @@ uses
 
       pinsentry=^tinsentry;
 
-{    const
-      InsTab : array[0..instabentries-1] of TInsEntry={$i a64tab.inc} }
-
     var
       InsTabCache : PInsTabCache;
 
@@ -159,6 +156,7 @@ uses
       taicpu = class(tai_cpu_abstract_sym)
          oppostfix : TOpPostfix;
          procedure loadshifterop(opidx:longint;const so:tshifterop);
+         procedure loadconditioncode(opidx: longint; const c: tasmcond);
          constructor op_none(op : tasmop);
 
          constructor op_reg(op : tasmop;_op1 : tregister);
@@ -167,15 +165,21 @@ uses
 
          constructor op_reg_reg(op : tasmop;_op1,_op2 : tregister);
          constructor op_reg_ref(op : tasmop;_op1 : tregister;const _op2 : treference);
+         constructor op_reg_cond(op: tasmop; _op1: tregister; _op2: tasmcond);
          constructor op_reg_const(op:tasmop; _op1: tregister; _op2: aint);
+         constructor op_reg_const_shifterop(op : tasmop;_op1: tregister; _op2: aint;_op3 : tshifterop);
 
          constructor op_reg_reg_reg(op : tasmop;_op1,_op2,_op3 : tregister);
          constructor op_reg_reg_reg_reg(op : tasmop;_op1,_op2,_op3,_op4 : tregister);
          constructor op_reg_reg_const(op : tasmop;_op1,_op2 : tregister; _op3: aint);
+         constructor op_reg_reg_const_const(op : tasmop;_op1,_op2 : tregister; _op3, _op4: aint);
+         constructor op_reg_reg_const_shifterop(op : tasmop;_op1,_op2 : tregister; _op3: aint; const _op4 : tshifterop);
          constructor op_reg_reg_sym_ofs(op : tasmop;_op1,_op2 : tregister; _op3: tasmsymbol;_op3ofs: longint);
          constructor op_reg_reg_ref(op : tasmop;_op1,_op2 : tregister; const _op3: treference);
          constructor op_reg_reg_shifterop(op : tasmop;_op1,_op2 : tregister;_op3 : tshifterop);
-         constructor op_reg_reg_reg_shifterop(op : tasmop;_op1,_op2,_op3 : tregister;_op4 : tshifterop);
+         constructor op_reg_reg_reg_shifterop(op : tasmop;_op1,_op2,_op3 : tregister; const _op4 : tshifterop);
+         constructor op_reg_reg_reg_cond(op : tasmop;_op1,_op2,_op3 : tregister; const _op4: tasmcond);
+
 
          { this is for Jmp instructions }
          constructor op_cond_sym(op : tasmop;cond:TAsmCond;_op1 : tasmsymbol);
@@ -188,6 +192,7 @@ uses
          function is_same_reg_move(regtype: Tregistertype):boolean; override;
 
          function spilling_get_operation_type(opnr: longint): topertype;override;
+         function spilling_get_operation_type_ref(opnr: longint; reg: tregister): topertype;override;
 
          { assembler }
       public
@@ -203,28 +208,29 @@ uses
          procedure ppuwriteoper(ppufile:tcompilerppufile;const o:toper);override;
          procedure ppubuildderefimploper(var o:toper);override;
          procedure ppuderefoper(var o:toper);override;
-      private
-         { next fields are filled in pass1, so pass2 is faster }
-         inssize   : shortint;
-         insoffset : longint;
-         LastInsOffset : longint; { need to be public to be reset }
-         insentry  : PInsEntry;
-         function  InsEnd:longint;
-         procedure create_ot(objdata:TObjData);
-         function  Matches(p:PInsEntry):longint;
-         function  calcsize(p:PInsEntry):shortint;
-         procedure gencode(objdata:TObjData);
-         function  NeedAddrPrefix(opidx:byte):boolean;
-         procedure Swapoperands;
-         function  FindInsentry(objdata:TObjData):boolean;
       end;
 
       tai_align = class(tai_align_abstract)
         { nothing to add }
       end;
 
-    function spilling_create_load(const ref:treference;r:tregister):Taicpu;
-    function spilling_create_store(r:tregister; const ref:treference):Taicpu;
+    type
+      tsimplereftype =
+         { valid reference }
+        (sr_simple,
+         { invalid reference, should not be generated by the code generator (but
+           can be encountered via inline assembly, where it must be rejected) }
+         sr_internal_illegal,
+         { invalid reference, may be generated by the code generator and then
+           must be simplified (also rejected in inline assembly) }
+         sr_complex);
+
+    function simple_ref_type(op: tasmop; size:tcgsize; oppostfix: toppostfix; const ref: treference): tsimplereftype;
+    function can_be_shifter_operand(opc: tasmop; opnr: longint): boolean;
+    function valid_shifter_operand(opc: tasmop; useszr, usessp, is64bit: boolean; sm: tshiftmode; shiftimm: longint): boolean;
+
+    function spilling_create_load(const ref: treference; r: tregister): taicpu;
+    function spilling_create_store(r: tregister; const ref: treference): taicpu;
 
     function setoppostfix(i : taicpu;pf : toppostfix) : taicpu;
     function setcondition(i : taicpu;c : tasmcond) : taicpu;
@@ -261,6 +267,21 @@ implementation
       end;
 
 
+    procedure taicpu.loadconditioncode(opidx: longint; const c: tasmcond);
+      begin
+        allocate_oper(opidx+1);
+        with oper[opidx]^ do
+          begin
+            if typ<>top_conditioncode then
+              begin
+                clearop(opidx);
+              end;
+            cc:=c;
+            typ:=top_conditioncode;
+          end;
+      end;
+
+
 {*****************************************************************************
                                  taicpu Constructors
 *****************************************************************************}
@@ -314,6 +335,16 @@ implementation
       end;
 
 
+    constructor taicpu.op_reg_const_shifterop(op: tasmop; _op1: tregister; _op2: aint; _op3: tshifterop);
+      begin
+        inherited create(op);
+        ops:=3;
+        loadreg(0,_op1);
+        loadconst(1,_op2);
+        loadshifterop(2,_op3);
+      end;
+
+
     constructor taicpu.op_reg_ref(op : tasmop;_op1 : tregister;const _op2 : treference);
       begin
          inherited create(op);
@@ -323,6 +354,15 @@ implementation
       end;
 
 
+    constructor taicpu.op_reg_cond(op: tasmop; _op1: tregister; _op2: tasmcond);
+      begin
+        inherited create(op);
+        ops:=2;
+        loadreg(0,_op1);
+        loadconditioncode(1,_op2);
+      end;
+
+
     constructor taicpu.op_reg_reg_reg(op : tasmop;_op1,_op2,_op3 : tregister);
       begin
          inherited create(op);
@@ -354,6 +394,28 @@ implementation
       end;
 
 
+     constructor taicpu.op_reg_reg_const_const(op: tasmop; _op1, _op2: tregister; _op3, _op4: aint);
+       begin
+         inherited create(op);
+         ops:=4;
+         loadreg(0,_op1);
+         loadreg(1,_op2);
+         loadconst(2,aint(_op3));
+         loadconst(3,aint(_op4));
+       end;
+
+
+     constructor taicpu.op_reg_reg_const_shifterop(op: tasmop; _op1, _op2: tregister; _op3: aint; const _op4: tshifterop);
+       begin
+         inherited create(op);
+         ops:=4;
+         loadreg(0,_op1);
+         loadreg(1,_op2);
+         loadconst(2,aint(_op3));
+         loadshifterop(3,_op4);
+       end;
+
+
      constructor taicpu.op_reg_reg_sym_ofs(op : tasmop;_op1,_op2 : tregister; _op3: tasmsymbol;_op3ofs: longint);
        begin
          inherited create(op);
@@ -384,7 +446,7 @@ implementation
       end;
 
 
-     constructor taicpu.op_reg_reg_reg_shifterop(op : tasmop;_op1,_op2,_op3 : tregister;_op4 : tshifterop);
+     constructor taicpu.op_reg_reg_reg_shifterop(op : tasmop;_op1,_op2,_op3 : tregister; const _op4 : tshifterop);
       begin
          inherited create(op);
          ops:=4;
@@ -394,6 +456,16 @@ implementation
          loadshifterop(3,_op4);
       end;
 
+     constructor taicpu.op_reg_reg_reg_cond(op: tasmop; _op1, _op2, _op3: tregister; const _op4: tasmcond);
+       begin
+         inherited create(op);
+         ops:=4;
+         loadreg(0,_op1);
+         loadreg(1,_op2);
+         loadreg(2,_op3);
+         loadconditioncode(3,_op4);
+       end;
+
 
     constructor taicpu.op_cond_sym(op : tasmop;cond:TAsmCond;_op1 : tasmsymbol);
       begin
@@ -454,85 +526,403 @@ implementation
       end;
 
 
-    function spilling_create_load(const ref:treference;r:tregister):Taicpu;
+    function spilling_create_op(op: tasmop; const ref: treference; r: tregister): taicpu;
+      const
+        { invalid sizes for aarch64 are 0 }
+        subreg2bytesize: array[TSubRegister] of byte =
+          (0,0,0,0,4,8,0,0,0,4,8,0,0,0);
       var
-        op: tasmop;
+        scalefactor: byte;
       begin
+        scalefactor:=subreg2bytesize[getsubreg(r)];
+        if scalefactor=0 then
+          internalerror(2014120301);
+        if (ref.offset>4095*scalefactor) or
+           ((ref.offset>255) and
+            ((ref.offset mod scalefactor)<>0)) or
+           (ref.offset<-256) then
+          internalerror(2014120302);
         case getregtype(r) of
-          R_INTREGISTER :
-            result:=taicpu.op_reg_ref(A_LDR,r,ref);
-          R_MMREGISTER :
-            begin
-              case getsubreg(r) of
-                R_SUBFD:
-                  op:=A_LDR;
-                R_SUBFS:
-                  op:=A_LDR;
-                else
-                  internalerror(2009112905);
-              end;
-              result:=taicpu.op_reg_ref(op,r,ref);
-            end;
+          R_INTREGISTER,
+          R_MMREGISTER:
+            result:=taicpu.op_reg_ref(op,r,ref);
           else
             internalerror(200401041);
         end;
       end;
 
 
-    function spilling_create_store(r:tregister; const ref:treference):Taicpu;
+    function is_valid_load_symbol(op: tasmop; oppostfix: toppostfix; const ref: treference): tsimplereftype;
+      begin
+        result:=sr_complex;
+        if not assigned(ref.symboldata) and
+           not(ref.refaddr in [addr_gotpageoffset,addr_gotpage,addr_pageoffset,addr_page]) then
+          exit;
+        { can't use pre-/post-indexed mode here (makes no sense either) }
+        if ref.addressmode<>AM_OFFSET then
+          exit;
+        { "ldr literal" must be a 32/64 bit LDR and have a symbol }
+        if assigned(ref.symboldata) and
+           ((op<>A_LDR) or
+            not(oppostfix in [PF_NONE,PF_W,PF_SW]) or
+            not assigned(ref.symbol)) then
+          exit;
+        { if this is a (got) page offset load, we must have a base register and a
+          symbol }
+        if (ref.refaddr in [addr_gotpageoffset,addr_pageoffset]) and
+           (not assigned(ref.symbol) or
+            (ref.base=NR_NO) or
+            (ref.index<>NR_NO) or
+            (ref.offset<>0)) then
+          begin
+            result:=sr_internal_illegal;
+            exit;
+          end;
+        { cannot have base or index register (we generate these kind of
+          references internally, they should never end up here with an
+          extra base or offset) }
+        if (ref.refaddr in [addr_gotpage,addr_page]) and
+           (ref.base<>NR_NO) or
+           (ref.index<>NR_NO) then
+          begin
+            result:=sr_internal_illegal;
+            exit;
+          end;
+        result:=sr_simple;
+      end;
+
+
+    function simple_ref_type(op: tasmop; size:tcgsize; oppostfix: toppostfix; const ref: treference): tsimplereftype;
       var
-        op: tasmop;
+        maxoffs: asizeint;
+        accesssize: longint;
       begin
-        case getregtype(r) of
-          R_INTREGISTER :
-            result:=taicpu.op_reg_ref(A_STR,r,ref);
-          R_MMREGISTER :
-            begin
-              case getsubreg(r) of
-                R_SUBFD:
-                  op:=A_STR;
-                R_SUBFS:
-                  op:=A_STR;
+        result:=sr_internal_illegal;
+        { post-indexed is only allowed for vector and immediate loads/stores }
+        if (ref.addressmode=AM_POSTINDEXED) and
+           not(op in [A_LD1,A_LD2,A_LD3,A_LD4,A_ST1,A_ST2,A_ST3,A_ST4]) and
+           (not(op in [A_LDR,A_STR,A_LDP,A_STP]) or
+            (ref.base=NR_NO) or
+            (ref.index<>NR_NO)) then
+          exit;
+
+        { can only have a shift mode if we have an index }
+        if (ref.index=NR_NO) and
+           (ref.shiftmode<>SM_None) then
+          exit;
+
+        { the index can never be the stack pointer }
+        if ref.index=NR_SP then
+          exit;
+
+        { no instruction supports an index without a base }
+        if (ref.base=NR_NO) and
+           (ref.index<>NR_NO) then
+          begin
+            result:=sr_complex;
+            exit;
+          end;
+
+        { LDR literal or GOT entry: 32 or 64 bit, label }
+        if assigned(ref.symboldata) or
+           assigned(ref.symbol) then
+          begin
+            { we generate these kind of references internally; at least for now,
+              they should never end up here with an extra base or offset or so }
+            result:=is_valid_load_symbol(op,oppostfix,ref);
+            exit;
+          end;
+
+        { any other reference cannot be gotpage/gotpageoffset/pic }
+        if ref.refaddr in [addr_gotpage,addr_gotpageoffset,addr_page,addr_pageoffset,addr_pic] then
+          exit;
+
+        { base & index:
+            * index cannot be the stack pointer
+            * offset must be 0
+            * can scale with the size of the access
+            * can zero/sign extend 32 bit index register, and/or multiple by
+              access size
+            * no pre/post-indexing
+        }
+        if (ref.base<>NR_NO) and
+           (ref.index<>NR_NO) then
+          begin
+            if ref.addressmode in [AM_PREINDEXED,AM_POSTINDEXED] then
+              exit;
+            case op of
+              { this holds for both integer and fpu/vector loads }
+              A_LDR,A_STR:
+                if (ref.offset=0) and
+                   (((ref.shiftmode=SM_None) and
+                     (ref.shiftimm=0)) or
+                    ((ref.shiftmode in [SM_LSL,SM_UXTW,SM_SXTW]) and
+                     (ref.shiftimm=tcgsizep2size[size]))) then
+                  result:=sr_simple
                 else
-                  internalerror(2009112904);
-              end;
-              result:=taicpu.op_reg_ref(op,r,ref);
+                  result:=sr_complex;
+              { todo }
+              A_LD1,A_LD2,A_LD3,A_LD4,
+              A_ST1,A_ST2,A_ST3,A_ST4:
+                internalerror(2014110704);
+              { these don't support base+index }
+              A_LDUR,A_STUR,
+              A_LDP,A_STP:
+                result:=sr_complex;
+              else
+                { nothing: result is already sr_internal_illegal };
             end;
+            exit;
+          end;
+
+        { base + immediate offset. Variants:
+            * LDR*/STR*:
+              - pre- or post-indexed with signed 9 bit immediate
+              - regular with unsiged scaled immediate (multiple of access
+                size), in the range 0 to (12 bit * access_size)-1
+            * LDP/STP
+              - pre- or post-indexed with signed 9 bit immediate
+              - regular with signed 9 bit immediate
+            * LDUR*/STUR*:
+              - regular with signed 9 bit immediate
+        }
+        if ref.base<>NR_NO then
+          begin
+            accesssize:=1 shl tcgsizep2size[size];
+            case op of
+              A_LDR,A_STR:
+                begin
+                  if (ref.addressmode=AM_OFFSET) and
+                     (ref.offset>=0) and
+                     (ref.offset<(((1 shl 12)-1)*accesssize)) and
+                     ((ref.offset mod accesssize)=0) then
+                    result:=sr_simple
+                  else if (ref.offset>=-256) and
+                     (ref.offset<=255) then
+                    begin
+                      { non pre-/post-indexed regular loads/stores can only be
+                        performed using LDUR/STUR }
+                      if ref.addressmode in [AM_PREINDEXED,AM_POSTINDEXED] then
+                        result:=sr_simple
+                      else
+                        result:=sr_complex
+                    end
+                  else
+                    result:=sr_complex;
+                end;
+              A_LDP,A_LDNP,
+              A_STP,A_STNP:
+                begin
+                  { only supported for 32/64 bit }
+                  if not(oppostfix in [PF_W,PF_SW,PF_None]) then
+                    exit;
+                  { offset must be a multple of the access size }
+                  if (ref.offset mod accesssize)<>0 then
+                    exit;
+                  { offset must fit in a signed 7 bit offset }
+                  if (ref.offset>=-(1 shl (6+tcgsizep2size[size]))) and
+                     (ref.offset<=(1 shl (6+tcgsizep2size[size]))-1) then
+                    result:=sr_simple
+                  else
+                    result:=sr_complex;
+                end;
+              A_LDUR,A_STUR:
+                begin
+                  if (ref.addressmode=AM_OFFSET) and
+                     (ref.offset>=-256) and
+                     (ref.offset<=255) then
+                    result:=sr_simple
+                  else
+                    result:=sr_complex;
+                end;
+              { todo }
+              A_LD1,A_LD2,A_LD3,A_LD4,
+              A_ST1,A_ST2,A_ST3,A_ST4:
+                internalerror(2014110907);
+              A_LDAR,
+              A_LDAXR,
+              A_LDXR,
+              A_LDXP,
+              A_STLR,
+              A_STLXR,
+              A_STLXP,
+              A_STXP,
+              A_STXR:
+                begin
+                  if (ref.addressmode=AM_OFFSET) and
+                     (ref.offset=0) then
+                    result:=sr_simple;
+                end
+              else
+                { nothing: result is already sr_internal_illegal };
+            end;
+            exit;
+          end;
+        { absolute addresses are not supported, have to load them first into
+          a register }
+        result:=sr_complex;
+      end;
+
+
+    function can_be_shifter_operand(opc: tasmop; opnr: longint): boolean;
+      begin
+        case opc of
+          A_ADD,
+          A_AND,
+          A_EON,
+          A_EOR,
+          A_ORN,
+          A_ORR,
+          A_SUB:
+            result:=opnr=3;
+          A_BIC,
+          A_CMN,
+          A_CMP,
+          A_MOVK,
+          A_MOVZ,
+          A_MOVN,
+          A_MVN,
+          A_NEG,
+          A_TST:
+            result:=opnr=2;
           else
-            internalerror(200401041);
+            result:=false;
         end;
       end;
 
 
+    function valid_shifter_operand(opc: tasmop; useszr, usessp, is64bit: boolean; sm: tshiftmode; shiftimm: longint): boolean;
+      begin
+        case opc of
+          A_ADD,
+          A_SUB,
+          A_NEG,
+          A_AND,
+          A_TST,
+          A_CMN,
+          A_CMP:
+            begin
+              result:=false;
+              if not useszr then
+                result:=
+                  (sm in shiftedregmodes) and
+                  ((shiftimm in [0..31]) or
+                   (is64bit and
+                    (shiftimm in [32..63])));
+              if not usessp then
+                result:=
+                  result or
+                  ((sm in extendedregmodes) and
+                   (shiftimm in [0..4]));
+            end;
+          A_BIC,
+          A_EON,
+          A_EOR,
+          A_MVN,
+          A_ORN,
+          A_ORR:
+            result:=
+              (sm in shiftedregmodes) and
+              (shiftimm in [0..31*(ord(is64bit)+1)+ord(is64bit)]);
+          A_MOVK,
+          A_MOVZ,
+          A_MOVN:
+            result:=
+              (sm=SM_LSL) and
+              ((shiftimm in [0,16]) or
+               (is64bit and
+                (shiftimm in [32,48])));
+          else
+            result:=false;
+        end;
+      end;
+
+
+    function spilling_create_load(const ref: treference; r: tregister): taicpu;
+      var
+        op: tasmop;
+      begin
+        if (ref.index<>NR_NO) or
+           (ref.offset<-256) or
+           (ref.offset>255) then
+          op:=A_LDR
+        else
+          op:=A_LDUR;
+        result:=spilling_create_op(op,ref,r);
+      end;
+
+
+    function spilling_create_store(r: tregister; const ref: treference): taicpu;
+      var
+        op: tasmop;
+      begin
+        if (ref.index<>NR_NO) or
+           (ref.offset<-256) or
+           (ref.offset>255) then
+          op:=A_STR
+        else
+          op:=A_STUR;
+        result:=spilling_create_op(op,ref,r);
+      end;
+
+
     function taicpu.spilling_get_operation_type(opnr: longint): topertype;
       begin
         case opcode of
-          A_ADC,A_ADD,A_AND,A_BIC,
-          A_EOR,A_CLZ,A_RBIT,
-          A_LDR,
-          A_MOV,A_MVN,A_MUL,
-          A_ORR,A_SBC,A_SUB,
-          A_UXT,A_SXT:
+          A_B,A_BL,
+          A_CMN,A_CMP,
+          A_CCMN,A_CCMP,
+          A_TST:
+            result:=operand_read;
+          A_STR,A_STUR:
             if opnr=0 then
-              result:=operand_write
+              result:=operand_read
             else
+              { check for pre/post indexed in spilling_get_operation_type_ref }
               result:=operand_read;
-          A_B,A_BL,
-          A_CMN,A_CMP,A_TST:
-            result:=operand_read;
-          A_STR:
-            { important is what happens with the involved registers }
+          A_STLXP,
+          A_STLXR,
+          A_STXP,
+          A_STXR:
             if opnr=0 then
-              result := operand_read
+              result:=operand_write
             else
-              { check for pre/post indexed }
-              result := operand_read;
-          else
-            internalerror(200403151);
+              result:=operand_read;
+          A_STP:
+            begin
+              if opnr in [0,1] then
+                result:=operand_read
+              else
+                { check for pre/post indexed in spilling_get_operation_type_ref }
+                result:=operand_read;
+            end;
+           A_LDP,
+           A_LDXP:
+             begin
+               if opnr in [0,1] then
+                 result:=operand_write
+               else
+                 { check for pre/post indexed in spilling_get_operation_type_ref }
+                 result:=operand_read;
+             end;
+           else
+             if opnr=0 then
+               result:=operand_write
+             else
+               result:=operand_read;
         end;
       end;
 
 
+    function taicpu.spilling_get_operation_type_ref(opnr: longint; reg: tregister): topertype;
+      begin
+        result:=operand_read;
+        if (oper[opnr]^.ref^.base = reg) and
+          (oper[opnr]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) then
+           result:=operand_readwrite;
+      end;
+
+
     procedure BuildInsTabCache;
       var
         i : longint;
@@ -1069,22 +1459,12 @@ implementation
         { we need to reset everything here, because the choosen insentry
           can be invalid for a new situation where the previously optimized
           insentry is not correct }
-        InsEntry:=nil;
-        InsSize:=0;
-        LastInsOffset:=-1;
       end;
 
 
     procedure taicpu.ResetPass2;
       begin
         { we are here in a second pass, check if the instruction can be optimized }
-        if assigned(InsEntry) and
-           ((InsEntry^.flags and IF_PASS2)<>0) then
-         begin
-           InsEntry:=nil;
-           InsSize:=0;
-         end;
-        LastInsOffset:=-1;
       end;
 
 
@@ -1097,18 +1477,15 @@ implementation
     function taicpu.Pass1(objdata:TObjData):longint;
       begin
         Pass1:=0;
-        LastInsOffset:=-1;
       end;
 
 
     procedure taicpu.Pass2(objdata:TObjData);
       begin
         { error in pass1 ? }
-        if insentry=nil then
-         exit;
         current_filepos:=fileinfo;
         { Generate the instruction }
-        GenCode(objdata);
+        { GenCode(objdata); }
       end;
 
 
@@ -1132,1046 +1509,6 @@ implementation
       end;
 
 
-    function  taicpu.InsEnd:longint;
-      begin
-        Result:=0; { unimplemented }
-      end;
-
-
-    procedure taicpu.create_ot(objdata:TObjData);
-      begin
-      end;
-
-
-    function taicpu.Matches(p:PInsEntry):longint;
-      begin
-      end;
-
-
-    function  taicpu.calcsize(p:PInsEntry):shortint;
-      begin
-        result:=4;
-      end;
-
-
-    function  taicpu.NeedAddrPrefix(opidx:byte):boolean;
-      begin
-        Result:=False; { unimplemented }
-      end;
-
-
-    procedure taicpu.Swapoperands;
-      begin
-      end;
-
-
-    function taicpu.FindInsentry(objdata:TObjData):boolean;
-      begin
-      end;
-
-
-    procedure taicpu.gencode(objdata:TObjData);
-      var
-        bytes : dword;
-        i_field : byte;
-
-      procedure setshifterop(op : byte);
-        begin
-          case oper[op]^.typ of
-            top_const:
-              begin
-                i_field:=1;
-                bytes:=bytes or dword(oper[op]^.val and $fff);
-              end;
-            top_reg:
-              begin
-                i_field:=0;
-                bytes:=bytes or (getsupreg(oper[op]^.reg) shl 16);
-
-                { does a real shifter op follow? }
-                if (op+1<=op) and (oper[op+1]^.typ=top_shifterop) then
-                  begin
-                  end;
-              end;
-          else
-            internalerror(2005091103);
-          end;
-        end;
-
-      begin
-        bytes:=$0;
-        { evaluate and set condition code }
-
-        { condition code allowed? }
-
-        { setup rest of the instruction }
-        case insentry^.code[0] of
-          #$08:
-            begin
-              { set instruction code }
-              bytes:=bytes or (ord(insentry^.code[1]) shl 26);
-              bytes:=bytes or (ord(insentry^.code[2]) shl 21);
-
-              { set destination }
-              bytes:=bytes or (getsupreg(oper[0]^.reg) shl 12);
-
-              { create shifter op }
-              setshifterop(1);
-
-              { set i field }
-              bytes:=bytes or (i_field shl 25);
-
-              { set s if necessary }
-              if oppostfix=PF_S then
-                bytes:=bytes or (1 shl 20);
-            end;
-          #$ff:
-            internalerror(2005091101);
-          else
-            internalerror(2005091102);
-        end;
-        { we're finished, write code }
-        objdata.writebytes(bytes,sizeof(bytes));
-      end;
-
-
-{$ifdef dummy}
-(*
-static void gencode (long segment, long offset, int bits,
-                     insn *ins, char *codes, long insn_end)
-{
-    int has_S_code;             /* S - setflag */
-    int has_B_code;             /* B - setflag */
-    int has_T_code;             /* T - setflag */
-    int has_W_code;             /* ! => W flag */
-    int has_F_code;             /* ^ => S flag */
-    int keep;
-    unsigned char c;
-    unsigned char bytes[4];
-    long          data, size;
-    static int cc_code[] =      /* bit pattern of cc */
-  {                             /* order as enum in  */
-    0x0E, 0x03, 0x02, 0x00,     /* nasm.h            */
-    0x0A, 0x0C, 0x08, 0x0D,
-    0x09, 0x0B, 0x04, 0x01,
-    0x05, 0x07, 0x06,
-  };
-
-
-#ifdef DEBUG
-static char *CC[] =
-  {                                    /* condition code names */
-    "AL", "CC", "CS", "EQ",
-    "GE", "GT", "HI", "LE",
-    "LS", "LT", "MI", "NE",
-    "PL", "VC", "VS", "",
-    "S"
-};
-
-
-    has_S_code = (ins->condition & C_SSETFLAG);
-    has_B_code = (ins->condition & C_BSETFLAG);
-    has_T_code = (ins->condition & C_TSETFLAG);
-    has_W_code = (ins->condition & C_EXSETFLAG);
-    has_F_code = (ins->condition & C_FSETFLAG);
-    ins->condition = (ins->condition & 0x0F);
-
-
-    if (rt_debug)
-      {
-    printf ("gencode: instruction: %s%s", insn_names[ins->opcode],
-            CC[ins->condition & 0x0F]);
-    if (has_S_code)
-      printf ("S");
-    if (has_B_code)
-      printf ("B");
-    if (has_T_code)
-      printf ("T");
-    if (has_W_code)
-      printf ("!");
-    if (has_F_code)
-      printf ("^");
-
-    printf ("\n");
-
-    c = *codes;
-
-    printf ("   (%d)  decode - '0x%02X'\n", ins->operands, c);
-
-
-    bytes[0] = 0xB;
-    bytes[1] = 0xE;
-    bytes[2] = 0xE;
-    bytes[3] = 0xF;
-      }
-
-    // First condition code in upper nibble
-    if (ins->condition < C_NONE)
-      {
-        c = cc_code[ins->condition] << 4;
-      }
-    else
-      {
-        c = cc_code[C_AL] << 4; // is often ALWAYS but not always
-      }
-
-
-    switch (keep = *codes)
-      {
-        case 1:
-          // B, BL
-          ++codes;
-          c |= *codes++;
-          bytes[0] = c;
-
-          if (ins->oprs[0].segment != segment)
-            {
-              // fais une relocation
-              c = 1;
-              data = 0; // Let the linker locate ??
-            }
-          else
-            {
-              c = 0;
-              data = ins->oprs[0].offset - (offset + 8);
-
-              if (data % 4)
-                {
-                  errfunc (ERR_NONFATAL, "offset not aligned on 4 bytes");
-                }
-            }
-
-          if (data >= 0x1000)
-            {
-              errfunc (ERR_NONFATAL, "too long offset");
-            }
-
-          data = data >> 2;
-          bytes[1] = (data >> 16) & 0xFF;
-          bytes[2] = (data >> 8)  & 0xFF;
-          bytes[3] = (data )      & 0xFF;
-
-          if (c == 1)
-            {
-//            out (offset, segment, &bytes[0], OUT_RAWDATA+1, NO_SEG, NO_SEG);
-              out (offset, segment, &bytes[0], OUT_REL3ADR+4, ins->oprs[0].segment, NO_SEG);
-            }
-          else
-            {
-              out (offset, segment, &bytes[0], OUT_RAWDATA+4, NO_SEG, NO_SEG);
-            }
-          return;
-
-        case 2:
-          // SWI
-          ++codes;
-          c |= *codes++;
-          bytes[0] = c;
-          data = ins->oprs[0].offset;
-          bytes[1] = (data >> 16) & 0xFF;
-          bytes[2] = (data >> 8) & 0xFF;
-          bytes[3] = (data) & 0xFF;
-          out (offset, segment, &bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
-          return;
-        case 3:
-          // BX
-          ++codes;
-          c |= *codes++;
-          bytes[0] = c;
-          bytes[1] = *codes++;
-          bytes[2] = *codes++;
-          bytes[3] = *codes++;
-          c = regval (&ins->oprs[0],1);
-          if (c == 15)  // PC
-            {
-              errfunc (ERR_WARNING, "'BX' with R15 has undefined behaviour");
-            }
-          else if (c > 15)
-            {
-              errfunc (ERR_NONFATAL, "Illegal register specified for 'BX'");
-            }
-
-          bytes[3] |= (c & 0x0F);
-          out (offset, segment, bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
-          return;
-
-        case 4:         // AND Rd,Rn,Rm
-        case 5:         // AND Rd,Rn,Rm,<shift>Rs
-        case 6:         // AND Rd,Rn,Rm,<shift>imm
-        case 7:         // AND Rd,Rn,<shift>imm
-          ++codes;
-#ifdef DEBUG
-          if (rt_debug)
-            {
-              printf ("         decode - '0x%02X'\n", keep);
-              printf ("           code - '0x%02X'\n", (unsigned char) ( *codes));
-            }
-#endif
-          bytes[0] = c | *codes;
-          ++codes;
-
-          bytes[1] = *codes;
-          if (has_S_code)
-            bytes[1] |= 0x10;
-          c = regval (&ins->oprs[1],1);
-          // Rn in low nibble
-          bytes[1] |= c;
-
-          // Rd in high nibble
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-
-          if (keep != 7)
-            {
-              // Rm in low nibble
-              bytes[3] = regval (&ins->oprs[2],1);
-            }
-
-          // Shifts if any
-          if (keep == 5 || keep == 6)
-            {
-              // Shift in bytes 2 and 3
-              if (keep == 5)
-                {
-                  // Rs
-                  c = regval (&ins->oprs[3],1);
-                  bytes[2] |= c;
-
-                  c = 0x10;             // Set bit 4 in byte[3]
-                }
-              if (keep == 6)
-                {
-                  c = (ins->oprs[3].offset) & 0x1F;
-
-                  // #imm
-                  bytes[2] |= c >> 1;
-                  if (c & 0x01)
-                    {
-                      bytes[3] |= 0x80;
-                    }
-                  c = 0;                // Clr bit 4 in byte[3]
-                }
-              // <shift>
-              c |= shiftval (&ins->oprs[3]) << 5;
-
-              bytes[3] |= c;
-            }
-
-          // reg,reg,imm
-          if (keep == 7)
-            {
-              int shimm;
-
-              shimm = imm_shift (ins->oprs[2].offset);
-
-              if (shimm == -1)
-                {
-                  errfunc (ERR_NONFATAL, "cannot create that constant");
-                }
-              bytes[3] = shimm & 0xFF;
-              bytes[2] |= (shimm & 0xF00) >> 8;
-            }
-
-          out (offset, segment, bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
-          return;
-
-        case 8:         // MOV Rd,Rm
-        case 9:         // MOV Rd,Rm,<shift>Rs
-        case 0xA:       // MOV Rd,Rm,<shift>imm
-        case 0xB:       // MOV Rd,<shift>imm
-          ++codes;
-#ifdef DEBUG
-          if (rt_debug)
-            {
-              printf ("         decode - '0x%02X'\n", keep);
-              printf ("           code - '0x%02X'\n", (unsigned char) ( *codes));
-            }
-#endif
-          bytes[0] = c | *codes;
-          ++codes;
-
-          bytes[1] = *codes;
-          if (has_S_code)
-            bytes[1] |= 0x10;
-
-          // Rd in high nibble
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-
-          if (keep != 0x0B)
-            {
-              // Rm in low nibble
-              bytes[3] = regval (&ins->oprs[1],1);
-            }
-
-          // Shifts if any
-          if (keep == 0x09 || keep == 0x0A)
-            {
-              // Shift in bytes 2 and 3
-              if (keep == 0x09)
-                {
-                  // Rs
-                  c = regval (&ins->oprs[2],1);
-                  bytes[2] |= c;
-
-                  c = 0x10;             // Set bit 4 in byte[3]
-                }
-              if (keep == 0x0A)
-                {
-                  c = (ins->oprs[2].offset) & 0x1F;
-
-                  // #imm
-                  bytes[2] |= c >> 1;
-                  if (c & 0x01)
-                    {
-                      bytes[3] |= 0x80;
-                    }
-                  c = 0;                // Clr bit 4 in byte[3]
-                }
-              // <shift>
-              c |= shiftval (&ins->oprs[2]) << 5;
-
-              bytes[3] |= c;
-            }
-
-          // reg,imm
-          if (keep == 0x0B)
-            {
-              int shimm;
-
-              shimm = imm_shift (ins->oprs[1].offset);
-
-              if (shimm == -1)
-                {
-                  errfunc (ERR_NONFATAL, "cannot create that constant");
-                }
-              bytes[3] = shimm & 0xFF;
-              bytes[2] |= (shimm & 0xF00) >> 8;
-            }
-
-          out (offset, segment, bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
-          return;
-
-
-        case 0xC:       // CMP Rn,Rm
-        case 0xD:       // CMP Rn,Rm,<shift>Rs
-        case 0xE:       // CMP Rn,Rm,<shift>imm
-        case 0xF:       // CMP Rn,<shift>imm
-          ++codes;
-
-          bytes[0] = c | *codes++;
-
-          bytes[1] = *codes;
-
-          // Implicit S code
-          bytes[1] |= 0x10;
-
-          c = regval (&ins->oprs[0],1);
-          // Rn in low nibble
-          bytes[1] |= c;
-
-          // No destination
-          bytes[2] = 0;
-
-          if (keep != 0x0B)
-            {
-              // Rm in low nibble
-              bytes[3] = regval (&ins->oprs[1],1);
-            }
-
-          // Shifts if any
-          if (keep == 0x0D || keep == 0x0E)
-            {
-              // Shift in bytes 2 and 3
-              if (keep == 0x0D)
-                {
-                  // Rs
-                  c = regval (&ins->oprs[2],1);
-                  bytes[2] |= c;
-
-                  c = 0x10;             // Set bit 4 in byte[3]
-                }
-              if (keep == 0x0E)
-                {
-                  c = (ins->oprs[2].offset) & 0x1F;
-
-                  // #imm
-                  bytes[2] |= c >> 1;
-                  if (c & 0x01)
-                    {
-                      bytes[3] |= 0x80;
-                    }
-                  c = 0;                // Clr bit 4 in byte[3]
-                }
-              // <shift>
-              c |= shiftval (&ins->oprs[2]) << 5;
-
-              bytes[3] |= c;
-            }
-
-          // reg,imm
-          if (keep == 0x0F)
-            {
-              int shimm;
-
-              shimm = imm_shift (ins->oprs[1].offset);
-
-              if (shimm == -1)
-                {
-                  errfunc (ERR_NONFATAL, "cannot create that constant");
-                }
-              bytes[3] = shimm & 0xFF;
-              bytes[2] |= (shimm & 0xF00) >> 8;
-            }
-
-          out (offset, segment, bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
-          return;
-
-        case 0x10:      // MRS Rd,<psr>
-          ++codes;
-
-          bytes[0] = c | *codes++;
-
-          bytes[1] = *codes++;
-
-          // Rd
-          c = regval (&ins->oprs[0],1);
-
-          bytes[2] = c << 4;
-
-          bytes[3] = 0;
-
-          c = ins->oprs[1].basereg;
-
-          if (c == R_CPSR || c == R_SPSR)
-            {
-              if (c == R_SPSR)
-                {
-                  bytes[1] |= 0x40;
-                }
-            }
-          else
-            {
-              errfunc (ERR_NONFATAL, "CPSR or SPSR expected");
-            }
-
-          out (offset, segment, bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
-
-          return;
-
-        case 0x11:      // MSR <psr>,Rm
-        case 0x12:      // MSR <psrf>,Rm
-        case 0x13:      // MSR <psrf>,#expression
-          ++codes;
-
-          bytes[0] = c | *codes++;
-
-          bytes[1] = *codes++;
-
-          bytes[2] = *codes;
-
-
-          if (keep == 0x11 || keep == 0x12)
-            {
-              // Rm
-              c = regval (&ins->oprs[1],1);
-
-              bytes[3] = c;
-            }
-          else
-            {
-              int shimm;
-
-              shimm = imm_shift (ins->oprs[1].offset);
-
-              if (shimm == -1)
-                {
-                  errfunc (ERR_NONFATAL, "cannot create that constant");
-                }
-              bytes[3] = shimm & 0xFF;
-              bytes[2] |= (shimm & 0xF00) >> 8;
-            }
-
-          c = ins->oprs[0].basereg;
-
-          if ( keep == 0x11)
-            {
-              if ( c == R_CPSR || c == R_SPSR)
-                {
-                if ( c== R_SPSR)
-                  {
-                    bytes[1] |= 0x40;
-                  }
-                }
-            else
-              {
-                errfunc (ERR_NONFATAL, "CPSR or SPSR expected");
-              }
-            }
-          else
-            {
-              if ( c == R_CPSR_FLG || c == R_SPSR_FLG)
-                {
-                  if ( c== R_SPSR_FLG)
-                    {
-                      bytes[1] |= 0x40;
-                    }
-                }
-              else
-                {
-                  errfunc (ERR_NONFATAL, "CPSR_flg or SPSR_flg expected");
-                }
-            }
-          break;
-
-        case 0x14:      // MUL  Rd,Rm,Rs
-        case 0x15:      // MULA Rd,Rm,Rs,Rn
-          ++codes;
-
-          bytes[0] = c | *codes++;
-
-          bytes[1] = *codes++;
-
-          bytes[3] = *codes;
-
-          // Rd
-          bytes[1] |= regval (&ins->oprs[0],1);
-          if (has_S_code)
-            bytes[1] |= 0x10;
-
-          // Rm
-          bytes[3] |= regval (&ins->oprs[1],1);
-
-          // Rs
-          bytes[2] = regval (&ins->oprs[2],1);
-
-          if (keep == 0x15)
-            {
-              bytes[2] |= regval (&ins->oprs[3],1) << 4;
-            }
-          break;
-
-        case 0x16:      // SMLAL RdHi,RdLo,Rm,Rs
-          ++codes;
-
-          bytes[0] = c | *codes++;
-
-          bytes[1] = *codes++;
-
-          bytes[3] = *codes;
-
-          // RdHi
-          bytes[1] |= regval (&ins->oprs[1],1);
-          if (has_S_code)
-            bytes[1] |= 0x10;
-
-          // RdLo
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-          // Rm
-          bytes[3] |= regval (&ins->oprs[2],1);
-
-          // Rs
-          bytes[2] |= regval (&ins->oprs[3],1);
-
-          break;
-
-        case 0x17:      // LDR Rd, expression
-          ++codes;
-
-          bytes[0] = c | *codes++;
-
-          bytes[1] = *codes++;
-
-          // Rd
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-          if (has_B_code)
-            bytes[1] |= 0x40;
-          if (has_T_code)
-            {
-              errfunc (ERR_NONFATAL, "'T' not allowed in pre-index mode");
-            }
-          if (has_W_code)
-            {
-              errfunc (ERR_NONFATAL, "'!' not allowed");
-            }
-
-          // Rn - implicit R15
-          bytes[1] |= 0xF;
-
-          if (ins->oprs[1].segment != segment)
-            {
-              errfunc (ERR_NONFATAL, "label not in same segment");
-            }
-
-          data = ins->oprs[1].offset - (offset + 8);
-
-          if (data < 0)
-            {
-              data = -data;
-            }
-          else
-            {
-              bytes[1] |= 0x80;
-            }
-
-          if (data >= 0x1000)
-            {
-              errfunc (ERR_NONFATAL, "too long offset");
-            }
-
-          bytes[2] |= ((data & 0xF00) >> 8);
-          bytes[3] = data & 0xFF;
-          break;
-
-        case 0x18:      // LDR Rd, [Rn]
-          ++codes;
-
-          bytes[0] = c | *codes++;
-
-          bytes[1] = *codes++;
-
-          // Rd
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-          if (has_B_code)
-            bytes[1] |= 0x40;
-          if (has_T_code)
-            {
-              bytes[1] |= 0x20;         // write-back
-            }
-          else
-            {
-              bytes[0] |= 0x01;         // implicit pre-index mode
-            }
-
-          if (has_W_code)
-            {
-              bytes[1] |= 0x20;         // write-back
-            }
-
-          // Rn
-          c = regval (&ins->oprs[1],1);
-          bytes[1] |= c;
-
-          if (c == 0x15)                // R15
-            data = -8;
-          else
-            data = 0;
-
-          if (data < 0)
-            {
-              data = -data;
-            }
-          else
-            {
-              bytes[1] |= 0x80;
-            }
-
-          bytes[2] |= ((data & 0xF00) >> 8);
-          bytes[3] = data & 0xFF;
-          break;
-
-        case 0x19:      // LDR Rd, [Rn,#expression]
-        case 0x20:      // LDR Rd, [Rn,Rm]
-        case 0x21:      // LDR Rd, [Rn,Rm,shift]
-          ++codes;
-
-          bytes[0] = c | *codes++;
-
-          bytes[1] = *codes++;
-
-          // Rd
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-          if (has_B_code)
-            bytes[1] |= 0x40;
-
-          // Rn
-          c = regval (&ins->oprs[1],1);
-          bytes[1] |= c;
-
-          if (ins->oprs[ins->operands-1].bracket)       // FIXME: Bracket on last operand -> pre-index  <--
-            {
-              bytes[0] |= 0x01;         // pre-index mode
-              if (has_W_code)
-                {
-                  bytes[1] |= 0x20;
-                }
-              if (has_T_code)
-                {
-                  errfunc (ERR_NONFATAL, "'T' not allowed in pre-index mode");
-                }
-            }
-          else
-            {
-              if (has_T_code)           // Forced write-back in post-index mode
-                {
-                  bytes[1] |= 0x20;
-                }
-              if (has_W_code)
-                {
-                  errfunc (ERR_NONFATAL, "'!' not allowed in post-index mode");
-                }
-            }
-
-          if (keep == 0x19)
-            {
-              data = ins->oprs[2].offset;
-
-              if (data < 0)
-                {
-                  data = -data;
-                }
-              else
-                {
-                  bytes[1] |= 0x80;
-                }
-
-              if (data >= 0x1000)
-                {
-                  errfunc (ERR_NONFATAL, "too long offset");
-                }
-
-              bytes[2] |= ((data & 0xF00) >> 8);
-              bytes[3] = data & 0xFF;
-            }
-          else
-            {
-              if (ins->oprs[2].minus == 0)
-                {
-                  bytes[1] |= 0x80;
-                }
-              c = regval (&ins->oprs[2],1);
-              bytes[3] = c;
-
-              if (keep == 0x21)
-                {
-                  c = ins->oprs[3].offset;
-                  if (c > 0x1F)
-                    {
-                      errfunc (ERR_NONFATAL, "too large shiftvalue");
-                      c = c & 0x1F;
-                    }
-
-                  bytes[2] |= c >> 1;
-                  if (c & 0x01)
-                    {
-                      bytes[3] |= 0x80;
-                    }
-                  bytes[3] |= shiftval (&ins->oprs[3]) << 5;
-                }
-            }
-
-          break;
-
-        case 0x22:      // LDRH Rd, expression
-          ++codes;
-
-          bytes[0] = c | 0x01;          // Implicit pre-index
-
-          bytes[1] = *codes++;
-
-          // Rd
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-
-          // Rn - implicit R15
-          bytes[1] |= 0xF;
-
-          if (ins->oprs[1].segment != segment)
-            {
-              errfunc (ERR_NONFATAL, "label not in same segment");
-            }
-
-          data = ins->oprs[1].offset - (offset + 8);
-
-          if (data < 0)
-            {
-              data = -data;
-            }
-          else
-            {
-              bytes[1] |= 0x80;
-            }
-
-          if (data >= 0x100)
-            {
-              errfunc (ERR_NONFATAL, "too long offset");
-            }
-          bytes[3] = *codes++;
-
-          bytes[2] |= ((data & 0xF0) >> 4);
-          bytes[3] |= data & 0xF;
-          break;
-
-        case 0x23:      // LDRH Rd, Rn
-          ++codes;
-
-          bytes[0] = c | 0x01;          // Implicit pre-index
-
-          bytes[1] = *codes++;
-
-          // Rd
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-
-          // Rn
-          c = regval (&ins->oprs[1],1);
-          bytes[1] |= c;
-
-          if (c == 0x15)                // R15
-            data = -8;
-          else
-            data = 0;
-
-          if (data < 0)
-            {
-              data = -data;
-            }
-          else
-            {
-              bytes[1] |= 0x80;
-            }
-
-          if (data >= 0x100)
-            {
-              errfunc (ERR_NONFATAL, "too long offset");
-            }
-          bytes[3] = *codes++;
-
-          bytes[2] |= ((data & 0xF0) >> 4);
-          bytes[3] |= data & 0xF;
-          break;
-
-        case 0x24:      // LDRH Rd, Rn, expression
-        case 0x25:      // LDRH Rd, Rn, Rm
-          ++codes;
-
-          bytes[0] = c;
-
-          bytes[1] = *codes++;
-
-          // Rd
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-
-          // Rn
-          c = regval (&ins->oprs[1],1);
-          bytes[1] |= c;
-
-          if (ins->oprs[ins->operands-1].bracket)       // FIXME: Bracket on last operand -> pre-index  <--
-            {
-              bytes[0] |= 0x01;         // pre-index mode
-              if (has_W_code)
-                {
-                  bytes[1] |= 0x20;
-                }
-            }
-          else
-            {
-              if (has_W_code)
-                {
-                  errfunc (ERR_NONFATAL, "'!' not allowed in post-index mode");
-                }
-            }
-
-          bytes[3] = *codes++;
-
-          if (keep == 0x24)
-            {
-              data = ins->oprs[2].offset;
-
-              if (data < 0)
-                {
-                  data = -data;
-                }
-              else
-                {
-                  bytes[1] |= 0x80;
-                }
-
-              if (data >= 0x100)
-                {
-                  errfunc (ERR_NONFATAL, "too long offset");
-                }
-
-              bytes[2] |= ((data & 0xF0) >> 4);
-              bytes[3] |= data & 0xF;
-            }
-          else
-            {
-              if (ins->oprs[2].minus == 0)
-                {
-                  bytes[1] |= 0x80;
-                }
-              c = regval (&ins->oprs[2],1);
-              bytes[3] |= c;
-
-            }
-          break;
-
-        case 0x26:      // LDM/STM Rn, {reg-list}
-          ++codes;
-
-          bytes[0] = c;
-
-          bytes[0] |= ( *codes >> 4) & 0xF;
-          bytes[1] = ( *codes << 4) & 0xF0;
-          ++codes;
-
-          if (has_W_code)
-            {
-              bytes[1] |= 0x20;
-            }
-          if (has_F_code)
-            {
-              bytes[1] |= 0x40;
-            }
-
-          // Rn
-          bytes[1] |= regval (&ins->oprs[0],1);
-
-          data = ins->oprs[1].basereg;
-
-          bytes[2] = ((data >> 8) & 0xFF);
-          bytes[3] = (data & 0xFF);
-
-          break;
-
-        case 0x27:      // SWP Rd, Rm, [Rn]
-          ++codes;
-
-          bytes[0] = c;
-
-          bytes[0] |= *codes++;
-
-          bytes[1] = regval (&ins->oprs[2],1);
-          if (has_B_code)
-            {
-              bytes[1] |= 0x40;
-            }
-          bytes[2] = regval (&ins->oprs[0],1) << 4;
-          bytes[3] = *codes++;
-          bytes[3] |= regval (&ins->oprs[1],1);
-          break;
-
-        default:
-          errfunc (ERR_FATAL, "unknown decoding of instruction");
-
-          bytes[0] = c;
-          // And a fix nibble
-          ++codes;
-          bytes[0] |= *codes++;
-
-         if ( *codes == 0x01)           // An I bit
-           {
-
-           }
-         if ( *codes == 0x02)           // An I bit
-           {
-
-           }
-         ++codes;
-      }
-    out (offset, segment, bytes, OUT_RAWDATA+4, NO_SEG, NO_SEG);
-}
-
-*)
-{$endif dummy}
-
 begin
   cai_align:=tai_align;
 end.

+ 288 - 0
compiler/aarch64/agcpugas.pas

@@ -0,0 +1,288 @@
+{
+    Copyright (c) 2003,2014 by Florian Klaempfl and Jonas Maebe
+
+    This unit implements an asm for AArch64
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+{ This unit implements the GNU Assembler writer for AArch64
+}
+
+unit agcpugas;
+
+{$i fpcdefs.inc}
+
+  interface
+
+    uses
+       globtype,
+       aasmtai,
+       aggas,
+       cpubase,cpuinfo;
+
+    type
+      TAArch64InstrWriter=class(TCPUInstrWriter)
+        procedure WriteInstruction(hp : tai);override;
+      end;
+
+      TAArch64AppleAssembler=class(TAppleGNUassembler)
+        constructor create(smart: boolean); override;
+        function MakeCmdLine: TCmdStr; override;
+      end;
+
+
+    const
+      gas_shiftmode2str : array[tshiftmode] of string[4] = (
+        '','lsl','lsr','asr',
+        'uxtb','uxth','uxtw','uxtx',
+        'sxtb','sxth','sxtw','sxtx');
+
+    const 
+      cputype_to_gas_march : array[tcputype] of string = (
+        '', // cpu_none
+        'armv8'
+      );
+
+  implementation
+
+    uses
+       cutils,globals,verbose,
+       systems,
+       assemble,
+       aasmcpu,
+       itcpugas,
+       cgbase,cgutils;
+
+
+{****************************************************************************}
+{                      Apple AArch64 Assembler writer                        }
+{****************************************************************************}
+
+    constructor TAArch64AppleAssembler.create(smart: boolean);
+      begin
+        inherited create(smart);
+        InstrWriter := TAArch64InstrWriter.create(self);
+      end;
+
+    function TAArch64AppleAssembler.MakeCmdLine: TCmdStr;
+      begin
+        { 'as' calls through to clang for aarch64, and that one only supports
+          reading from standard input in case "-" is specified as input file
+          (in which case you also have to specify the language via -x) }
+        result:=inherited;
+{$ifdef hasunix}
+        if DoPipe then
+          result:=result+' -x assembler -'
+{$endif}
+      end;
+
+
+{****************************************************************************}
+{                  Helper routines for Instruction Writer                    }
+{****************************************************************************}
+
+    function getreferencestring(var ref : treference) : string;
+      const
+        darwin_addrpage2str: array[addr_page..addr_gotpageoffset] of string[11] =
+           ('@PAGE','@PAGEOFF','@GOTPAGE','@GOTPAGEOFF');
+      begin
+        if ref.base=NR_NO then
+          begin
+            case ref.refaddr of
+              addr_gotpage,
+              addr_page,
+              addr_gotpageoffset,
+              addr_pageoffset:
+                begin
+                  if not assigned(ref.symbol) or
+                     (ref.base<>NR_NO) or
+                     (ref.index<>NR_NO) or
+                     (ref.shiftmode<>SM_None) or
+                     (ref.offset<>0) then
+                    internalerror(2014121501);
+                  if target_asm.id=as_darwin then
+                    result:=ref.symbol.name+darwin_addrpage2str[ref.refaddr]
+                  else
+                    { todo }
+                    internalerror(2014121502);
+                end
+              else
+                internalerror(2015022301);
+            end
+          end
+        else
+          begin
+            result:='['+gas_regname(ref.base);
+            if ref.addressmode=AM_POSTINDEXED then
+              result:=result+']';
+            if ref.index<>NR_NO then
+              begin
+                if (ref.offset<>0) or
+                   assigned(ref.symbol) then
+                  internalerror(2014121504);
+                result:=result+', '+gas_regname(ref.index);
+                case ref.shiftmode of
+                  SM_None: ;
+                  SM_LSL,
+                  SM_UXTW, SM_UXTX, SM_SXTW, SM_SXTX:
+                    begin
+                      result:=result+', '+gas_shiftmode2str[ref.shiftmode];
+                      if (ref.shiftmode=SM_LSL) or
+                         (ref.shiftimm<>0) then
+                        result:=result+' #'+tostr(ref.shiftimm);
+                    end
+                  else
+                    internalerror(2014121505);
+                end;
+              end
+            else
+              begin
+                if assigned(ref.symbol) then
+                  begin
+                    case ref.refaddr of
+                      addr_gotpageoffset,
+                      addr_pageoffset:
+                        begin
+                          if target_asm.id=as_darwin then
+                            result:=result+', '+ref.symbol.name+darwin_addrpage2str[ref.refaddr]
+                          else
+                            { todo }
+                            internalerror(2014122510);
+                        end
+                      else
+                        { todo: not yet generated/don't know syntax }
+                        internalerror(2014121506);
+                    end;
+                  end
+                else
+                  begin
+                    if ref.refaddr<>addr_no then
+                      internalerror(2014121506);
+                    if (ref.offset<>0) then
+                      result:=result+', #'+tostr(ref.offset);
+                  end;
+              end;
+            case ref.addressmode of
+              AM_OFFSET:
+                result:=result+']';
+              AM_PREINDEXED:
+                result:=result+']!';
+            end;
+          end;
+      end;
+
+
+    function getopstr(hp: taicpu; opnr: longint; const o: toper): string;
+      begin
+        case o.typ of
+          top_reg:
+            { we cannot yet represent "umov w0, v4.s[0]" or "ins v4.d[0], x1",
+              so for now we use "s4" or "d4" instead -> translate here }
+            if ((hp.opcode=A_INS) or
+                (hp.opcode=A_UMOV)) and
+               (getregtype(hp.oper[opnr]^.reg)=R_MMREGISTER) then
+              begin
+                case getsubreg(hp.oper[opnr]^.reg) of
+                  R_SUBMMS:
+                    getopstr:='v'+tostr(getsupreg(hp.oper[opnr]^.reg))+'.S[0]';
+                  R_SUBMMD:
+                    getopstr:='v'+tostr(getsupreg(hp.oper[opnr]^.reg))+'.D[0]';
+                  else
+                    internalerror(2014122907);
+                end;
+              end
+            else
+              getopstr:=gas_regname(o.reg);
+          top_shifterop:
+            begin
+              getopstr:=gas_shiftmode2str[o.shifterop^.shiftmode];
+              if o.shifterop^.shiftimm<>0 then
+                getopstr:=getopstr+' #'+tostr(o.shifterop^.shiftimm)
+            end;
+          top_const:
+            if o.val>=0 then
+              getopstr:='#'+tostr(o.val)
+            else
+              getopstr:='#0x'+hexStr(o.val,16);
+          top_conditioncode:
+            getopstr:=cond2str[o.cc];
+          top_ref:
+            if is_calljmp(hp.opcode) then
+              begin
+                if o.ref^.refaddr<>addr_full then
+                  internalerror(2014122220);
+                if not assigned(o.ref^.symbol) or
+                   assigned(o.ref^.relsymbol) or
+                   (o.ref^.base<>NR_NO) or
+                   (o.ref^.index<>NR_NO) or
+                   (o.ref^.offset<>0) then
+                  internalerror(2014122221);
+                getopstr:=o.ref^.symbol.name;
+              end
+            else
+              getopstr:=getreferencestring(o.ref^);
+          else
+            internalerror(2014121507);
+        end;
+      end;
+
+
+    procedure TAArch64InstrWriter.WriteInstruction(hp : tai);
+      var
+        op: TAsmOp;
+        s: string;
+        i: byte;
+        sep: string[3];
+      begin
+        op:=taicpu(hp).opcode;
+        s:=#9+gas_op2str[op]+oppostfix2str[taicpu(hp).oppostfix];
+        if taicpu(hp).condition<>C_NONE then
+          s:=s+'.'+cond2str[taicpu(hp).condition];
+        if taicpu(hp).ops<>0 then
+          begin
+            sep:=#9;
+            for i:=0 to taicpu(hp).ops-1 do
+              begin
+                 // debug code
+                 // writeln(s);
+                 // writeln(taicpu(hp).fileinfo.line);
+                 s:=s+sep+getopstr(taicpu(hp),i,taicpu(hp).oper[i]^);
+                 sep:=',';
+              end;
+          end;
+        owner.AsmWriteLn(s);
+      end;
+
+
+    const
+       as_aarch64_gas_darwin_info : tasminfo =
+          (
+            id     : as_darwin;
+            idtxt  : 'AS-Darwin';
+            asmbin : 'as';
+            asmcmd : '-o $OBJ $EXTRAOPT $ASM -arch arm64';
+            supported_targets : [system_aarch64_darwin];
+            flags : [af_needar,af_smartlink_sections,af_supports_dwarf,af_stabs_use_function_absolute_addresses];
+            labelprefix : 'L';
+            comment : '# ';
+            dollarsign: '$';
+          );
+
+
+begin
+  RegisterAssembler(as_aarch64_gas_darwin_info,TAArch64AppleAssembler);
+end.

+ 25 - 7
compiler/aarch64/aoptcpub.pas

@@ -118,17 +118,35 @@ Implementation
     End;
 
 
-  function TAoptBaseCpu.RegModifiedByInstruction(Reg: TRegister; p1: tai): boolean;
+  function TAoptBaseCpu.RegModifiedByInstruction(reg: tregister; p1: tai): boolean;
     var
-      i : Longint;
+      i: longint;
+      preg: tregister;
     begin
       result:=false;
       for i:=0 to taicpu(p1).ops-1 do
-        if (taicpu(p1).oper[i]^.typ=top_reg) and (taicpu(p1).oper[i]^.reg=Reg) and (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
-          begin
-            result:=true;
-            exit;
-          end;
+        case taicpu(p1).oper[i]^.typ of
+          top_reg:
+            begin
+              preg:=taicpu(p1).oper[i]^.reg;
+              if (getregtype(preg)=getregtype(reg)) and
+                 (getsupreg(preg)=getsupreg(reg)) and
+                 (taicpu(p1).spilling_get_operation_type(i) in [operand_write,operand_readwrite]) then
+                begin
+                  result:=true;
+                  exit;
+                end;
+            end;
+          top_ref:
+            begin
+              if (taicpu(p1).oper[i]^.ref^.addressmode<>am_offset) and
+                 (reg=taicpu(p1).oper[i]^.ref^.base) then
+                begin
+                  result:=true;
+                  exit
+                end;
+            end;
+        end;
     end;
 
 End.

+ 2284 - 0
compiler/aarch64/cgcpu.pas

@@ -0,0 +1,2284 @@
+{
+    Copyright (c) 2014 by Jonas Maebe
+
+    This unit implements the code generator for AArch64
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit cgcpu;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+       globtype,parabase,
+       cgbase,cgutils,cgobj,
+       aasmbase,aasmtai,aasmdata,aasmcpu,
+       cpubase,cpuinfo,
+       node,symconst,SymType,symdef,
+       rgcpu;
+
+    type
+      tcgaarch64=class(tcg)
+       protected
+        { simplifies "ref" so it can be used with "op". If "ref" can be used
+          with a different load/Store operation that has the same meaning as the
+          original one, "op" will be replaced with the alternative }
+        procedure make_simple_ref(list:TAsmList; var op: tasmop; size: tcgsize; oppostfix: toppostfix; var ref: treference; preferred_newbasereg: tregister);
+        { changes register size without adding register allocation info }
+        function makeregsize(reg: tregister; size: tcgsize): tregister; overload;
+       public
+        function getfpuregister(list: TAsmList; size: Tcgsize): Tregister; override;
+        procedure handle_reg_imm12_reg(list: TAsmList; op: Tasmop; size: tcgsize; src: tregister; a: tcgint; dst: tregister; tmpreg: tregister; setflags, usedest: boolean);
+        procedure init_register_allocators;override;
+        procedure done_register_allocators;override;
+        function  getmmregister(list:TAsmList;size:tcgsize):tregister;override;
+        function handle_load_store(list:TAsmList; op: tasmop; size: tcgsize; oppostfix: toppostfix; reg: tregister; ref: treference):treference;
+        procedure a_call_name(list:TAsmList;const s:string; weak: boolean);override;
+        procedure a_call_reg(list:TAsmList;Reg:tregister);override;
+        { General purpose instructions }
+        procedure maybeadjustresult(list: TAsmList; op: topcg; size: tcgsize; dst: tregister);
+        procedure a_op_const_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; reg: tregister);override;
+        procedure a_op_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src, dst: tregister);override;
+        procedure a_op_const_reg_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister);override;
+        procedure a_op_reg_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister);override;
+        procedure a_op_const_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister; setflags : boolean; var ovloc : tlocation);override;
+        procedure a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister; setflags : boolean; var ovloc : tlocation);override;
+        { move instructions }
+        procedure a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg: tregister);override;
+        procedure a_load_const_ref(list: TAsmList; size: tcgsize; a: tcgint; const ref: treference); override;
+        procedure a_load_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister;const ref: TReference);override;
+        procedure a_load_reg_ref_unaligned(list: TAsmList; fromsize, tosize: tcgsize; register: tregister; const ref: treference); override;
+        procedure a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: TReference; reg: tregister);override;
+        procedure a_load_ref_reg_unaligned(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; register: tregister); override;
+        procedure a_load_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister);override;
+        procedure a_loadaddr_ref_reg(list: TAsmList; const ref: TReference; r: tregister);override;
+        { fpu move instructions (not used, all floating point is vector unit-based) }
+        procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister); override;
+        procedure a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister); override;
+        procedure a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference); override;
+        procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister;shuffle : pmmshuffle);override;
+        procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: TReference; reg: tregister; shuffle: pmmshuffle);override;
+        procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: TReference; shuffle: pmmshuffle);override;
+
+        procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle); override;
+        procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle); override;
+
+        procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle); override;
+
+        procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); override;
+        { comparison operations }
+        procedure a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel);override;
+        procedure a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel);override;
+        procedure a_jmp_always(list: TAsmList; l: TAsmLabel);override;
+        procedure a_jmp_name(list: TAsmList; const s: string);override;
+        procedure a_jmp_cond(list: TAsmList; cond: TOpCmp; l: tasmlabel);{ override;}
+        procedure a_jmp_flags(list: TAsmList; const f: tresflags; l: tasmlabel);override;
+        procedure g_flags2reg(list: TAsmList; size: tcgsize; const f:tresflags; reg: tregister);override;
+        procedure g_overflowcheck(list: TAsmList; const loc: tlocation; def: tdef);override;
+        procedure g_overflowcheck_loc(list: TAsmList; const loc: tlocation; def: tdef; ovloc: tlocation);override;
+        procedure g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);override;
+        procedure g_proc_exit(list: TAsmList; parasize: longint; nostackframe: boolean);override;
+        procedure g_maybe_got_init(list: TAsmList); override;
+        procedure g_restore_registers(list: TAsmList);override;
+        procedure g_save_registers(list: TAsmList);override;
+        procedure g_concatcopy_move(list: TAsmList; const source, dest: treference; len: tcgint);
+        procedure g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);override;
+        procedure g_adjust_self_value(list: TAsmList; procdef: tprocdef; ioffset: tcgint);override;
+        procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);override;
+       private
+        function save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
+        procedure load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
+      end;
+
+    procedure create_codegen;
+
+    const
+      TOpCG2AsmOpReg: array[topcg] of TAsmOp = (
+        A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NEG,A_MVN,A_ORR,A_ASRV,A_LSLV,A_LSRV,A_SUB,A_EOR,A_NONE,A_RORV
+      );
+      TOpCG2AsmOpImm: array[topcg] of TAsmOp = (
+        A_NONE,A_MOV,A_ADD,A_AND,A_UDIV,A_SDIV,A_MUL,A_MUL,A_NEG,A_MVN,A_ORR,A_ASR,A_LSL,A_LSR,A_SUB,A_EOR,A_NONE,A_ROR
+      );
+      TOpCmp2AsmCond: array[topcmp] of TAsmCond = (C_NONE,C_EQ,C_GT,
+        C_LT,C_GE,C_LE,C_NE,C_LS,C_CC,C_CS,C_HI
+      );
+
+
+implementation
+
+  uses
+    globals,verbose,systems,cutils,
+    paramgr,fmodule,
+    symtable,symsym,
+    tgobj,
+    procinfo,cpupi;
+
+
+    procedure tcgaarch64.make_simple_ref(list:TAsmList; var op: tasmop; size: tcgsize; oppostfix: toppostfix; var ref: treference; preferred_newbasereg: tregister);
+      var
+        href: treference;
+        so: tshifterop;
+        accesssize: longint;
+      begin
+        if (ref.base=NR_NO) then
+          begin
+            if ref.shiftmode<>SM_None then
+              internalerror(2014110701);
+            ref.base:=ref.index;
+            ref.index:=NR_NO;
+          end;
+        { no abitrary scale factor support (the generic code doesn't set it,
+          AArch-specific code shouldn't either) }
+        if not(ref.scalefactor in [0,1]) then
+          internalerror(2014111002);
+
+        case simple_ref_type(op,size,oppostfix,ref) of
+          sr_simple:
+            exit;
+          sr_internal_illegal:
+            internalerror(2014121702);
+          sr_complex:
+            { continue } ;
+        end;
+
+        if assigned(ref.symbol) then
+          begin
+            { internal "load symbol" instructions should already be valid }
+            if assigned(ref.symboldata) or
+               (ref.refaddr in [addr_pic,addr_gotpage,addr_gotpageoffset,addr_page,addr_pageoffset]) then
+              internalerror(2014110802);
+            { no relative symbol support (needed) yet }
+            if assigned(ref.relsymbol) then
+              internalerror(2014111001);
+            { on Darwin: load the address from the GOT. There does not appear to
+              be a non-GOT variant. This consists of first loading the address
+              of the page containing the GOT entry for this variable, and then
+              the address of the entry itself from that page (can be relaxed by
+              the linker in case the variable itself can be stored directly in
+              the GOT) }
+            if target_info.system in systems_darwin then
+              begin
+                if (preferred_newbasereg=NR_NO) or
+                   (ref.base=preferred_newbasereg) or
+                   (ref.index=preferred_newbasereg) then
+                  preferred_newbasereg:=getaddressregister(list);
+                { load the (GOT) page }
+                reference_reset_symbol(href,ref.symbol,0,8);
+                if ((ref.symbol.typ in [AT_FUNCTION,AT_LABEL]) and
+                    (ref.symbol.bind in [AB_LOCAL,AB_GLOBAL])) or
+                   ((ref.symbol.typ=AT_DATA) and
+                    (ref.symbol.bind=AB_LOCAL)) then
+                  href.refaddr:=addr_page
+                else
+                  href.refaddr:=addr_gotpage;
+                list.concat(taicpu.op_reg_ref(A_ADRP,preferred_newbasereg,href));
+                { load the GOT entry (= address of the variable) }
+                reference_reset_base(href,preferred_newbasereg,0,sizeof(pint));
+                href.symbol:=ref.symbol;
+                { code symbols defined in the current compilation unit do not
+                  have to be accessed via the GOT }
+                if ((ref.symbol.typ in [AT_FUNCTION,AT_LABEL]) and
+                    (ref.symbol.bind in [AB_LOCAL,AB_GLOBAL])) or
+                   ((ref.symbol.typ=AT_DATA) and
+                    (ref.symbol.bind=AB_LOCAL)) then
+                  begin
+                    href.base:=NR_NO;
+                    href.refaddr:=addr_pageoffset;
+                    list.concat(taicpu.op_reg_reg_ref(A_ADD,preferred_newbasereg,preferred_newbasereg,href));
+                  end
+                else
+                  begin
+                    href.refaddr:=addr_gotpageoffset;
+                    { use a_load_ref_reg() rather than directly encoding the LDR,
+                      so that we'll check the validity of the reference }
+                    a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,preferred_newbasereg);
+                  end;
+                { set as new base register }
+                if ref.base=NR_NO then
+                  ref.base:=preferred_newbasereg
+                else if ref.index=NR_NO then
+                  ref.index:=preferred_newbasereg
+                else
+                  begin
+                    { make sure it's valid in case ref.base is SP -> make it
+                      the second operand}
+                    a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,preferred_newbasereg,ref.base,preferred_newbasereg);
+                    ref.base:=preferred_newbasereg
+                  end;
+                ref.symbol:=nil;
+              end
+            else
+              { todo }
+              internalerror(2014111003);
+          end;
+
+        { base & index }
+        if (ref.base<>NR_NO) and
+           (ref.index<>NR_NO) then
+          begin
+            case op of
+              A_LDR, A_STR:
+                begin
+                  if (ref.shiftmode=SM_None) and
+                     (ref.shiftimm<>0) then
+                    internalerror(2014110805);
+                  { wrong shift? (possible in case of something like
+                     array_of_2byte_rec[x].bytefield -> shift will be set 1, but
+                     the final load is a 1 byte -> can't use shift after all }
+                  if (ref.shiftmode in [SM_LSL,SM_UXTW,SM_SXTW]) and
+                     ((ref.shiftimm<>BsfDWord(tcgsizep2size[size])) or
+                      (ref.offset<>0)) then
+                    begin
+                      if preferred_newbasereg=NR_NO then
+                        preferred_newbasereg:=getaddressregister(list);
+                      { "add" supports a superset of the shift modes supported by
+                        load/store instructions }
+                      shifterop_reset(so);
+                      so.shiftmode:=ref.shiftmode;
+                      so.shiftimm:=ref.shiftimm;
+                      list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,preferred_newbasereg,ref.base,ref.index,so));
+                      reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.alignment);
+                      { possibly still an invalid offset -> fall through }
+                    end
+                  else if ref.offset<>0 then
+                    begin
+                      if (preferred_newbasereg=NR_NO) or
+                         { we keep ref.index, so it must not be overwritten }
+                         (ref.index=preferred_newbasereg) then
+                        preferred_newbasereg:=getaddressregister(list);
+                      { add to the base and not to the index, because the index
+                        may be scaled; this works even if the base is SP }
+                      a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
+                      ref.offset:=0;
+                      ref.base:=preferred_newbasereg;
+                      { finished }
+                      exit;
+                    end
+                  else
+                    { valid -> exit }
+                    exit;
+                end;
+              { todo }
+              A_LD1,A_LD2,A_LD3,A_LD4,
+              A_ST1,A_ST2,A_ST3,A_ST4:
+                internalerror(2014110704);
+              { these don't support base+index }
+              A_LDUR,A_STUR,
+              A_LDP,A_STP:
+                begin
+                  { these either don't support pre-/post-indexing, or don't
+                    support it with base+index }
+                  if ref.addressmode<>AM_OFFSET then
+                    internalerror(2014110911);
+                  if preferred_newbasereg=NR_NO then
+                    preferred_newbasereg:=getaddressregister(list);
+                  if ref.shiftmode<>SM_None then
+                    begin
+                      { "add" supports a superset of the shift modes supported by
+                        load/store instructions }
+                      shifterop_reset(so);
+                      so.shiftmode:=ref.shiftmode;
+                      so.shiftimm:=ref.shiftimm;
+                      list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,preferred_newbasereg,ref.base,ref.index,so));
+                    end
+                  else
+                    a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,ref.index,ref.base,preferred_newbasereg);
+                  reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.alignment);
+                  { fall through to the handling of base + offset, since the
+                    offset may still be too big }
+                end;
+              else
+                internalerror(2014110901);
+            end;
+          end;
+
+        { base + offset }
+        if ref.base<>NR_NO then
+          begin
+            { valid offset for LDUR/STUR -> use that }
+            if (ref.addressmode=AM_OFFSET) and
+               (op in [A_LDR,A_STR]) and
+               (ref.offset>=-256) and
+               (ref.offset<=255) then
+              begin
+                if op=A_LDR then
+                  op:=A_LDUR
+                else
+                  op:=A_STUR
+              end
+            { if it's not a valid LDUR/STUR, use LDR/STR }
+            else if (op in [A_LDUR,A_STUR]) and
+               ((ref.offset<-256) or
+                (ref.offset>255) or
+                (ref.addressmode<>AM_OFFSET)) then
+              begin
+                if op=A_LDUR then
+                  op:=A_LDR
+                else
+                  op:=A_STR
+              end;
+            case op of
+              A_LDR,A_STR:
+                begin
+                  case ref.addressmode of
+                    AM_PREINDEXED:
+                      begin
+                        { since the loaded/stored register cannot be the same
+                          as the base register, we can safely add the
+                          offset to the base if it doesn't fit}
+                        if (ref.offset<-256) or
+                            (ref.offset>255) then
+                          begin
+                            a_op_const_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base);
+                            ref.offset:=0;
+                          end;
+                      end;
+                    AM_POSTINDEXED:
+                      begin
+                        { cannot emulate post-indexing if we have to fold the
+                          offset into the base register }
+                        if (ref.offset<-256) or
+                            (ref.offset>255) then
+                          internalerror(2014110909);
+                        { ok }
+                      end;
+                    AM_OFFSET:
+                      begin
+                        { unsupported offset -> fold into base register }
+                        accesssize:=1 shl tcgsizep2size[size];
+                        if (ref.offset<0) or
+                           (ref.offset>(((1 shl 12)-1)*accesssize)) or
+                           ((ref.offset mod accesssize)<>0) then
+                          begin
+                            if preferred_newbasereg=NR_NO then
+                              preferred_newbasereg:=getaddressregister(list);
+                            { can we split the offset beween an
+                              "add/sub (imm12 shl 12)" and the load (also an
+                              imm12)?
+                              -- the offset from the load will always be added,
+                                that's why the lower bound has a smaller range
+                                than the upper bound; it must also be a multiple
+                                of the access size }
+                            if (ref.offset>=-(((1 shl 12)-1) shl 12)) and
+                               (ref.offset<=((1 shl 12)-1) shl 12 + ((1 shl 12)-1)) and
+                               ((ref.offset mod accesssize)=0) then
+                              begin
+                                a_op_const_reg_reg(list,OP_ADD,OS_ADDR,(ref.offset shr 12) shl 12,ref.base,preferred_newbasereg);
+                                ref.offset:=ref.offset-(ref.offset shr 12) shl 12;
+                              end
+                            else
+                              begin
+                                a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
+                                ref.offset:=0;
+                              end;
+                            reference_reset_base(ref,preferred_newbasereg,ref.offset,ref.alignment);
+                          end;
+                      end
+                    else
+                      internalerror(2014110904);
+                  end;
+                end;
+              A_LDP,A_STP:
+                begin
+                  { unsupported offset -> fold into base register (these
+                    instructions support all addressmodes) }
+                  if (ref.offset<-(1 shl (6+tcgsizep2size[size]))) or
+                     (ref.offset>(1 shl (6+tcgsizep2size[size]))-1) then
+                    begin
+                      case ref.addressmode of
+                        AM_POSTINDEXED:
+                          { don't emulate post-indexing if we have to fold the
+                            offset into the base register }
+                          internalerror(2014110910);
+                        AM_PREINDEXED:
+                          { this means the offset must be added to the current
+                            base register }
+                          preferred_newbasereg:=ref.base;
+                        AM_OFFSET:
+                          if preferred_newbasereg=NR_NO then
+                            preferred_newbasereg:=getaddressregister(list);
+                      end;
+                      a_op_const_reg_reg(list,OP_ADD,OS_ADDR,ref.offset,ref.base,preferred_newbasereg);
+                      reference_reset_base(ref,preferred_newbasereg,0,ref.alignment);
+                    end
+                end;
+              A_LDUR,A_STUR:
+                begin
+                  { valid, checked above }
+                end;
+              { todo }
+              A_LD1,A_LD2,A_LD3,A_LD4,
+              A_ST1,A_ST2,A_ST3,A_ST4:
+                internalerror(2014110908);
+              else
+                internalerror(2014110708);
+            end;
+            { done }
+            exit;
+          end;
+
+        { only an offset -> change to base (+ offset 0) }
+        if preferred_newbasereg=NR_NO then
+          preferred_newbasereg:=getaddressregister(list);
+        a_load_const_reg(list,OS_ADDR,ref.offset,preferred_newbasereg);
+        reference_reset_base(ref,preferred_newbasereg,0,newalignment(8,ref.offset));
+      end;
+
+
+    function tcgaarch64.makeregsize(reg: tregister; size: tcgsize): tregister;
+      var
+        subreg:Tsubregister;
+      begin
+        subreg:=cgsize2subreg(getregtype(reg),size);
+        result:=reg;
+        setsubreg(result,subreg);
+      end;
+
+
+    function tcgaarch64.getfpuregister(list: TAsmList; size: Tcgsize): Tregister;
+      begin
+        internalerror(2014122110);
+        { squash warning }
+        result:=NR_NO;
+      end;
+
+
+    function tcgaarch64.handle_load_store(list: TAsmList; op: tasmop; size: tcgsize; oppostfix: toppostfix; reg: tregister; ref: treference):treference;
+      begin
+        make_simple_ref(list,op,size,oppostfix,ref,NR_NO);
+        list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),oppostfix));
+        result:=ref;
+      end;
+
+
+    procedure tcgaarch64.handle_reg_imm12_reg(list: TAsmList; op: Tasmop; size: tcgsize; src: tregister; a: tcgint; dst: tregister; tmpreg: tregister; setflags, usedest: boolean);
+      var
+        instr: taicpu;
+        so: tshifterop;
+        hadtmpreg: boolean;
+      begin
+        { imm12 }
+        if (a>=0) and
+           (a<=((1 shl 12)-1)) then
+          if usedest then
+            instr:=taicpu.op_reg_reg_const(op,dst,src,a)
+          else
+            instr:=taicpu.op_reg_const(op,src,a)
+        { imm12 lsl 12 }
+        else if (a and not(((tcgint(1) shl 12)-1) shl 12))=0 then
+          begin
+            so.shiftmode:=SM_LSL;
+            so.shiftimm:=12;
+            if usedest then
+              instr:=taicpu.op_reg_reg_const_shifterop(op,dst,src,a shr 12,so)
+            else
+              instr:=taicpu.op_reg_const_shifterop(op,src,a shr 12,so)
+          end
+        else
+          begin
+            { todo: other possible optimizations (e.g. load 16 bit constant in
+                register and then add/sub/cmp/cmn shifted the rest) }
+            if tmpreg=NR_NO then
+              begin
+                hadtmpreg:=false;
+                tmpreg:=getintregister(list,size);
+              end
+            else
+              begin
+                hadtmpreg:=true;
+                getcpuregister(list,tmpreg);
+              end;
+            a_load_const_reg(list,size,a,tmpreg);
+            if usedest then
+              instr:=taicpu.op_reg_reg_reg(op,dst,src,tmpreg)
+            else
+              instr:=taicpu.op_reg_reg(op,src,tmpreg);
+            if hadtmpreg then
+              ungetcpuregister(list,tmpreg);
+          end;
+        if setflags then
+          setoppostfix(instr,PF_S);
+        list.concat(instr);
+      end;
+
+
+{****************************************************************************
+                              Assembler code
+****************************************************************************}
+
+    procedure tcgaarch64.init_register_allocators;
+      begin
+        inherited init_register_allocators;
+
+        rg[R_INTREGISTER]:=trgintcpu.create(R_INTREGISTER,R_SUBWHOLE,
+            [RS_X0,RS_X1,RS_X2,RS_X3,RS_X4,RS_X5,RS_X6,RS_X7,RS_X8,
+             RS_X9,RS_X10,RS_X11,RS_X12,RS_X13,RS_X14,RS_X15,RS_X16,RS_X17,
+             RS_X19,RS_X20,RS_X21,RS_X22,RS_X23,RS_X24,RS_X25,RS_X26,RS_X27,RS_X28
+             { maybe we can enable this in the future for leaf functions (it's
+               the frame pointer)
+             ,RS_X29 }],
+            first_int_imreg,[]);
+
+        rg[R_MMREGISTER]:=trgcpu.create(R_MMREGISTER,R_SUBMMD,
+            [RS_Q0,RS_Q1,RS_Q2,RS_Q3,RS_Q4,RS_Q5,RS_Q6,RS_Q7,
+             RS_Q8,RS_Q9,RS_Q10,RS_Q11,RS_Q12,RS_Q13,RS_Q14,RS_Q15,
+             RS_Q16,RS_Q17,RS_Q18,RS_Q19,RS_Q20,RS_Q21,RS_Q22,RS_Q23,
+             RS_Q24,RS_Q25,RS_Q26,RS_Q27,RS_Q28,RS_Q29,RS_Q30,RS_Q31],
+            first_mm_imreg,[]);
+      end;
+
+
+    procedure tcgaarch64.done_register_allocators;
+      begin
+        rg[R_INTREGISTER].free;
+        rg[R_FPUREGISTER].free;
+        rg[R_MMREGISTER].free;
+        inherited done_register_allocators;
+      end;
+
+
+    function tcgaarch64.getmmregister(list: TAsmList; size: tcgsize):tregister;
+      begin
+        case size of
+          OS_F32:
+            result:=rg[R_MMREGISTER].getregister(list,R_SUBMMS);
+          OS_F64:
+            result:=rg[R_MMREGISTER].getregister(list,R_SUBMMD)
+          else
+            internalerror(2014102701);
+        end;
+      end;
+
+
+    procedure tcgaarch64.a_call_name(list: TAsmList; const s: string; weak: boolean);
+      begin
+        if not weak then
+          list.concat(taicpu.op_sym(A_BL,current_asmdata.RefAsmSymbol(s)))
+        else
+          list.concat(taicpu.op_sym(A_BL,current_asmdata.WeakRefAsmSymbol(s)));
+      end;
+
+
+    procedure tcgaarch64.a_call_reg(list:TAsmList;Reg:tregister);
+      begin
+        list.concat(taicpu.op_reg(A_BLR,reg));
+      end;
+
+
+    {********************** load instructions ********************}
+
+    procedure tcgaarch64.a_load_const_reg(list: TAsmList; size: tcgsize; a: tcgint; reg : tregister);
+      var
+        preva: tcgint;
+        opc: tasmop;
+        shift,maxshift: byte;
+        so: tshifterop;
+        reginited: boolean;
+        mask: tcgint;
+      begin
+        { if we load a value into a 32 bit register, it is automatically
+          zero-extended to 64 bit }
+        if (high(a)=0) and
+           (size in [OS_64,OS_S64]) then
+          begin
+            size:=OS_32;
+            reg:=makeregsize(reg,size);
+          end;
+        { values <= 32 bit are stored in a 32 bit register }
+        if not(size in [OS_64,OS_S64]) then
+          a:=cardinal(a);
+
+        if size in [OS_64,OS_S64] then
+          begin
+            mask:=-1;
+            maxshift:=64;
+          end
+        else
+          begin
+            mask:=$ffffffff;
+            maxshift:=32;
+          end;
+        { single movn enough? (to be extended) }
+        shift:=16;
+        preva:=a;
+        repeat
+          if (a shr shift)=(mask shr shift) then
+            begin
+              if shift=16 then
+                list.concat(taicpu.op_reg_const(A_MOVN,reg,not(word(preva))))
+              else
+                begin
+                  shifterop_reset(so);
+                  so.shiftmode:=SM_LSL;
+                  so.shiftimm:=shift-16;
+                  list.concat(taicpu.op_reg_const_shifterop(A_MOVN,reg,not(word(preva)),so));
+                end;
+              exit;
+            end;
+          { only try the next 16 bits if the current one is all 1 bits, since
+            the movn will set all lower bits to 1 }
+          if word(a shr (shift-16))<>$ffff then
+            break;
+          inc(shift,16);
+        until shift=maxshift;
+        reginited:=false;
+        shift:=0;
+        { can be optimized later to use more movn }
+        repeat
+          { leftover is shifterconst? (don't check if we can represent it just
+            as effectively with movz/movk, as this check is expensive) }
+          if ((shift<tcgsize2size[size]*(8 div 2)) and
+              (word(a)<>0) and
+              ((a shr 16)<>0)) and
+             is_shifter_const(a shl shift,size) then
+            begin
+              if reginited then
+                list.concat(taicpu.op_reg_reg_const(A_ORR,reg,reg,a shl shift))
+              else
+                list.concat(taicpu.op_reg_reg_const(A_ORR,reg,makeregsize(NR_XZR,size),a shl shift));
+              exit;
+            end;
+          { set all 16 bit parts <> 0 }
+          if (word(a)<>0) or
+             ((shift=0) and
+              (a=0)) then
+            if shift=0 then
+              begin
+                list.concat(taicpu.op_reg_const(A_MOVZ,reg,word(a)));
+                reginited:=true;
+              end
+            else
+              begin
+                shifterop_reset(so);
+                so.shiftmode:=SM_LSL;
+                so.shiftimm:=shift;
+                if not reginited then
+                  begin
+                    opc:=A_MOVZ;
+                    reginited:=true;
+                  end
+                else
+                  opc:=A_MOVK;
+                list.concat(taicpu.op_reg_const_shifterop(opc,reg,word(a),so));
+              end;
+            preva:=a;
+            a:=a shr 16;
+           inc(shift,16);
+        until word(preva)=preva;
+        if not reginited then
+          internalerror(2014102702);
+      end;
+
+
+    procedure tcgaarch64.a_load_const_ref(list: TAsmList; size: tcgsize; a: tcgint; const ref: treference);
+      var
+        reg: tregister;
+      begin
+        { use the zero register if possible }
+        if a=0 then
+          begin
+            if size in [OS_64,OS_S64] then
+              reg:=NR_XZR
+            else
+              reg:=NR_WZR;
+            a_load_reg_ref(list,size,size,reg,ref);
+          end
+        else
+          inherited;
+      end;
+
+
+    procedure tcgaarch64.a_load_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
+      var
+        oppostfix:toppostfix;
+        hreg: tregister;
+      begin
+        if tcgsize2Size[fromsize]>=tcgsize2Size[tosize] then
+          fromsize:=tosize
+        { have a 32 bit register but need a 64 bit one? }
+        else if tosize in [OS_64,OS_S64] then
+          begin
+            { sign extend if necessary }
+            if fromsize in [OS_S8,OS_S16,OS_S32] then
+              begin
+                { can't overwrite reg, may be a constant reg }
+                hreg:=getintregister(list,tosize);
+                a_load_reg_reg(list,fromsize,tosize,reg,hreg);
+                reg:=hreg;
+              end
+            else
+              { top 32 bit are zero by default }
+              reg:=makeregsize(reg,OS_64);
+            fromsize:=tosize;
+          end;
+        if (ref.alignment<>0) and
+           (ref.alignment<tcgsize2size[tosize]) then
+          begin
+            a_load_reg_ref_unaligned(list,fromsize,tosize,reg,ref);
+          end
+        else
+          begin
+            case tosize of
+              { signed integer registers }
+              OS_8,
+              OS_S8:
+                oppostfix:=PF_B;
+              OS_16,
+              OS_S16:
+                oppostfix:=PF_H;
+              OS_32,
+              OS_S32,
+              OS_64,
+              OS_S64:
+                oppostfix:=PF_None;
+              else
+                InternalError(200308299);
+            end;
+            handle_load_store(list,A_STR,tosize,oppostfix,reg,ref);
+          end;
+      end;
+
+
+    procedure tcgaarch64.a_load_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister);
+      var
+        oppostfix:toppostfix;
+      begin
+        if tcgsize2Size[fromsize]>=tcgsize2Size[tosize] then
+          fromsize:=tosize;
+        { ensure that all bits of the 32/64 register are always correctly set:
+           * default behaviour is always to zero-extend to the entire (64 bit)
+             register -> unsigned 8/16/32 bit loads only exist with a 32 bit
+             target register, as the upper 32 bit will be zeroed implicitly
+             -> always make target register 32 bit
+           * signed loads exist both with 32 and 64 bit target registers,
+             depending on whether the value should be sign extended to 32 or
+             to 64 bit (if sign extended to 32 bit, the upper 32 bits of the
+             corresponding 64 bit register are again zeroed) -> no need to
+             change anything (we only have 32 and 64 bit registers), except that
+             when loading an OS_S32 to a 32 bit register, we don't need/can't
+             use sign extension
+        }
+        if fromsize in [OS_8,OS_16,OS_32] then
+          reg:=makeregsize(reg,OS_32);
+        if (ref.alignment<>0) and
+           (ref.alignment<tcgsize2size[fromsize]) then
+          begin
+            a_load_ref_reg_unaligned(list,fromsize,tosize,ref,reg);
+            exit;
+          end;
+        case fromsize of
+          { signed integer registers }
+          OS_8:
+            oppostfix:=PF_B;
+          OS_S8:
+            oppostfix:=PF_SB;
+          OS_16:
+            oppostfix:=PF_H;
+          OS_S16:
+            oppostfix:=PF_SH;
+          OS_S32:
+            if getsubreg(reg)=R_SUBD then
+              oppostfix:=PF_NONE
+            else
+              oppostfix:=PF_SW;
+          OS_32,
+          OS_64,
+          OS_S64:
+            oppostfix:=PF_None;
+          else
+            InternalError(200308297);
+        end;
+        handle_load_store(list,A_LDR,fromsize,oppostfix,reg,ref);
+
+        { clear upper 16 bits if the value was negative }
+        if (fromsize=OS_S8) and (tosize=OS_16) then
+          a_load_reg_reg(list,fromsize,tosize,reg,reg);
+      end;
+
+
+    procedure tcgaarch64.a_load_ref_reg_unaligned(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; register: tregister);
+      var
+        href: treference;
+        hreg1, hreg2, tmpreg: tregister;
+      begin
+        if fromsize in [OS_64,OS_S64] then
+          begin
+            { split into two 32 bit loads }
+            hreg1:=makeregsize(register,OS_32);
+            hreg2:=getintregister(list,OS_32);
+            if target_info.endian=endian_big then
+              begin
+                tmpreg:=hreg1;
+                hreg1:=hreg2;
+                hreg2:=tmpreg;
+              end;
+            { can we use LDP? }
+            if (ref.alignment=4) and
+               (simple_ref_type(A_LDP,OS_32,PF_None,ref)=sr_simple) then
+              list.concat(taicpu.op_reg_reg_ref(A_LDP,hreg1,hreg2,ref))
+            else
+              begin
+                a_load_ref_reg(list,OS_32,OS_32,ref,hreg1);
+                href:=ref;
+                inc(href.offset,4);
+                a_load_ref_reg(list,OS_32,OS_32,href,hreg2);
+              end;
+            list.concat(taicpu.op_reg_reg_const_const(A_BFI,register,makeregsize(hreg2,OS_64),32,32));
+          end
+       else
+         inherited;
+      end;
+
+
+    procedure tcgaarch64.a_load_reg_reg(list:TAsmList;fromsize,tosize:tcgsize;reg1,reg2:tregister);
+      var
+        instr: taicpu;
+      begin
+        { we use both 32 and 64 bit registers -> insert conversion when when
+          we have to truncate/sign extend inside the (32 or 64 bit) register
+          holding the value, and when we sign extend from a 32 to a 64 bit
+          register }
+        if (tcgsize2size[fromsize]>tcgsize2size[tosize]) or
+           ((tcgsize2size[fromsize]=tcgsize2size[tosize]) and
+            (fromsize<>tosize) and
+            not(fromsize in [OS_32,OS_S32,OS_64,OS_S64])) or
+           ((fromsize in [OS_S8,OS_S16,OS_S32]) and
+            (tosize in [OS_64,OS_S64])) or
+           { needs to mask out the sign in the top 16 bits }
+           ((fromsize=OS_S8) and
+            (tosize=OS_16)) then
+          begin
+            case tosize of
+              OS_8:
+                list.concat(setoppostfix(taicpu.op_reg_reg(A_UXT,reg2,makeregsize(reg1,OS_32)),PF_B));
+              OS_16:
+                list.concat(setoppostfix(taicpu.op_reg_reg(A_UXT,reg2,makeregsize(reg1,OS_32)),PF_H));
+              OS_S8:
+                list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_B));
+              OS_S16:
+                list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_H));
+              { while "mov wN, wM" automatically inserts a zero-extension and
+                hence we could encode a 64->32 bit move like that, the problem
+                is that we then can't distinguish 64->32 from 32->32 moves, and
+                the 64->32 truncation could be removed altogether... So use a
+                different instruction }
+              OS_32,
+              OS_S32:
+                { in theory, reg1 should be 64 bit here (since fromsize>tosize),
+                  but because of the way location_force_register() tries to
+                  avoid superfluous zero/sign extensions, it's not always the
+                  case -> also force reg1 to to 64 bit }
+                list.concat(taicpu.op_reg_reg_const_const(A_UBFIZ,makeregsize(reg2,OS_64),makeregsize(reg1,OS_64),0,32));
+              OS_64,
+              OS_S64:
+                list.concat(setoppostfix(taicpu.op_reg_reg(A_SXT,reg2,makeregsize(reg1,OS_32)),PF_W));
+              else
+                internalerror(2002090901);
+            end;
+          end
+        else
+          begin
+            { 32 -> 32 bit move implies zero extension (sign extensions have
+              been handled above) -> also use for 32 <-> 64 bit moves }
+            if not(fromsize in [OS_64,OS_S64]) or
+               not(tosize in [OS_64,OS_S64]) then
+              instr:=taicpu.op_reg_reg(A_MOV,makeregsize(reg2,OS_32),makeregsize(reg1,OS_32))
+            else
+              instr:=taicpu.op_reg_reg(A_MOV,reg2,reg1);
+            list.Concat(instr);
+            { Notify the register allocator that we have written a move instruction so
+             it can try to eliminate it. }
+            add_move_instruction(instr);
+          end;
+      end;
+
+
+    procedure tcgaarch64.a_loadaddr_ref_reg(list: TAsmList; const ref: treference; r: tregister);
+      var
+         href: treference;
+         so: tshifterop;
+         op: tasmop;
+      begin
+        op:=A_LDR;
+        href:=ref;
+        { simplify as if we're going to perform a regular 64 bit load, using
+          "r" as the new base register if possible/necessary }
+        make_simple_ref(list,op,OS_ADDR,PF_None,href,r);
+        { load literal? }
+        if assigned(href.symbol) then
+          begin
+            if (href.base<>NR_NO) or
+               (href.index<>NR_NO) or
+               not assigned(href.symboldata) then
+              internalerror(2014110912);
+            list.concat(taicpu.op_reg_sym_ofs(A_ADR,r,href.symbol,href.offset));
+          end
+        else
+          begin
+            if href.index<>NR_NO then
+              begin
+                if href.shiftmode<>SM_None then
+                  begin
+                    { "add" supports a supperset of the shift modes supported by
+                      load/store instructions }
+                    shifterop_reset(so);
+                    so.shiftmode:=href.shiftmode;
+                    so.shiftimm:=href.shiftimm;
+                    list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,r,href.base,href.index,so));
+                  end
+                else
+                  a_op_reg_reg_reg(list,OP_ADD,OS_ADDR,href.index,href.base,r);
+              end
+            else if href.offset<>0 then
+              a_op_const_reg_reg(list,OP_ADD,OS_ADDR,href.offset,href.base,r)
+            else
+              a_load_reg_reg(list,OS_ADDR,OS_ADDR,href.base,r);
+          end;
+      end;
+
+
+    procedure tcgaarch64.a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister);
+      begin
+        internalerror(2014122107)
+      end;
+
+
+    procedure tcgaarch64.a_loadfpu_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister);
+      begin
+        internalerror(2014122108)
+      end;
+
+
+    procedure tcgaarch64.a_loadfpu_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference);
+      begin
+        internalerror(2014122109)
+      end;
+
+
+    procedure tcgaarch64.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tcgsize; reg1, reg2: tregister; shuffle: pmmshuffle);
+      var
+        instr: taicpu;
+      begin
+        if assigned(shuffle) and
+           not shufflescalar(shuffle) then
+          internalerror(2014122104);
+        if fromsize=tosize then
+          begin
+            instr:=taicpu.op_reg_reg(A_FMOV,reg2,reg1);
+            { Notify the register allocator that we have written a move
+              instruction so it can try to eliminate it. }
+            add_move_instruction(instr);
+          end
+        else
+          begin
+            if (reg_cgsize(reg1)<>fromsize) or
+               (reg_cgsize(reg2)<>tosize) then
+              internalerror(2014110913);
+            instr:=taicpu.op_reg_reg(A_FCVT,reg2,reg1);
+          end;
+        list.Concat(instr);
+      end;
+
+
+    procedure tcgaarch64.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tcgsize; const ref: treference; reg: tregister; shuffle: pmmshuffle);
+       var
+         tmpreg: tregister;
+       begin
+         if assigned(shuffle) and
+            not shufflescalar(shuffle) then
+           internalerror(2014122105);
+         tmpreg:=NR_NO;
+         if (fromsize<>tosize) then
+           begin
+             tmpreg:=reg;
+             reg:=getmmregister(list,fromsize);
+           end;
+         handle_load_store(list,A_LDR,fromsize,PF_None,reg,ref);
+         if (fromsize<>tosize) then
+           a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpreg,nil);
+       end;
+
+
+     procedure tcgaarch64.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tcgsize; reg: tregister; const ref: treference; shuffle: pmmshuffle);
+       var
+         tmpreg: tregister;
+       begin
+         if assigned(shuffle) and
+            not shufflescalar(shuffle) then
+           internalerror(2014122106);
+         if (fromsize<>tosize) then
+           begin
+             tmpreg:=getmmregister(list,tosize);
+             a_loadmm_reg_reg(list,fromsize,tosize,reg,tmpreg,nil);
+             reg:=tmpreg;
+           end;
+         handle_load_store(list,A_STR,tosize,PF_NONE,reg,ref);
+       end;
+
+
+     procedure tcgaarch64.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle);
+       begin
+         if not shufflescalar(shuffle) then
+           internalerror(2014122801);
+         if not(tcgsize2size[fromsize] in [4,8]) or
+            (tcgsize2size[fromsize]<>tcgsize2size[tosize]) then
+           internalerror(2014122803);
+         list.concat(taicpu.op_reg_reg(A_INS,mmreg,intreg));
+       end;
+
+
+     procedure tcgaarch64.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tcgsize; mmreg, intreg: tregister; shuffle: pmmshuffle);
+       begin
+         if not shufflescalar(shuffle) then
+           internalerror(2014122802);
+         if not(tcgsize2size[fromsize] in [4,8]) or
+            (tcgsize2size[fromsize]<>tcgsize2size[tosize]) then
+           internalerror(2014122804);
+         list.concat(taicpu.op_reg_reg(A_UMOV,intreg,mmreg));
+       end;
+
+
+    procedure tcgaarch64.a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size: tcgsize; src, dst: tregister; shuffle: pmmshuffle);
+      begin
+        case op of
+          { "xor Vx,Vx" is used to initialize global regvars to 0 }
+          OP_XOR:
+            begin
+              if (src<>dst) or
+                 (reg_cgsize(src)<>size) or
+                 assigned(shuffle) then
+                internalerror(2015011401);
+              case size of
+                OS_F32,
+                OS_F64:
+                  list.concat(taicpu.op_reg_const(A_MOVI,makeregsize(dst,OS_F64),0));
+                else
+                  internalerror(2015011402);
+              end;
+            end
+          else
+            internalerror(2015011403);
+        end;
+      end;
+
+
+    procedure tcgaarch64.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister);
+      var
+        bitsize,
+        signbit: longint;
+      begin
+        if srcsize in [OS_64,OS_S64] then
+          begin
+            bitsize:=64;
+            signbit:=6;
+          end
+        else
+          begin
+            bitsize:=32;
+            signbit:=5;
+          end;
+        { source is 0 -> dst will have to become 255 }
+        list.concat(taicpu.op_reg_const(A_CMP,src,0));
+        if reverse then
+          begin
+            list.Concat(taicpu.op_reg_reg(A_CLZ,makeregsize(dst,srcsize),src));
+            { xor 31/63 is the same as setting the lower 5/6 bits to
+              "31/63-(lower 5/6 bits of dst)" }
+            list.Concat(taicpu.op_reg_reg_const(A_EOR,dst,dst,bitsize-1));
+          end
+        else
+          begin
+            list.Concat(taicpu.op_reg_reg(A_RBIT,makeregsize(dst,srcsize),src));
+            list.Concat(taicpu.op_reg_reg(A_CLZ,dst,dst));
+          end;
+        { set dst to -1 if src was 0 }
+        list.Concat(taicpu.op_reg_reg_reg_cond(A_CSINV,dst,dst,makeregsize(NR_XZR,dstsize),C_NE));
+        { mask the -1 to 255 if src was 0 (anyone find a two-instruction
+          branch-free version? All of mine are 3...) }
+        list.Concat(setoppostfix(taicpu.op_reg_reg(A_UXT,dst,dst),PF_B));
+      end;
+
+
+    procedure tcgaarch64.a_load_reg_ref_unaligned(list: TAsmList; fromsize, tosize: tcgsize; register: tregister; const ref: treference);
+      var
+        href: treference;
+        hreg1, hreg2, tmpreg: tregister;
+      begin
+        if fromsize in [OS_64,OS_S64] then
+          begin
+            { split into two 32 bit stores }
+            hreg1:=makeregsize(register,OS_32);
+            hreg2:=getintregister(list,OS_32);
+            a_op_const_reg_reg(list,OP_SHR,OS_64,32,register,makeregsize(hreg2,OS_64));
+            if target_info.endian=endian_big then
+              begin
+                tmpreg:=hreg1;
+                hreg1:=hreg2;
+                hreg2:=tmpreg;
+              end;
+            { can we use STP? }
+            if (ref.alignment=4) and
+               (simple_ref_type(A_STP,OS_32,PF_None,ref)=sr_simple) then
+              list.concat(taicpu.op_reg_reg_ref(A_STP,hreg1,hreg2,ref))
+            else
+              begin
+                a_load_reg_ref(list,OS_32,OS_32,hreg1,ref);
+                href:=ref;
+                inc(href.offset,4);
+                a_load_reg_ref(list,OS_32,OS_32,hreg2,href);
+              end;
+          end
+       else
+         inherited;
+      end;
+
+
+    procedure tcgaarch64.maybeadjustresult(list: TAsmList; op: topcg; size: tcgsize; dst: tregister);
+      const
+        overflowops = [OP_MUL,OP_IMUL,OP_SHL,OP_ADD,OP_SUB,OP_NOT,OP_NEG];
+      begin
+        if (op in overflowops) and
+           (size in [OS_8,OS_S8,OS_16,OS_S16]) then
+          a_load_reg_reg(list,OS_32,size,makeregsize(dst,OS_32),makeregsize(dst,OS_32))
+      end;
+
+
+    procedure tcgaarch64.a_op_const_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; reg: tregister);
+      begin
+        optimize_op_const(size,op,a);
+        case op of
+          OP_NONE:
+            exit;
+          OP_MOVE:
+            a_load_const_reg(list,size,a,reg);
+          OP_NEG,OP_NOT:
+            internalerror(200306011);
+          else
+            a_op_const_reg_reg(list,op,size,a,reg,reg);
+        end;
+      end;
+
+
+    procedure tcgaarch64.a_op_reg_reg(list:TAsmList;op:topcg;size:tcgsize;src,dst:tregister);
+      begin
+        Case op of
+          OP_NEG,
+          OP_NOT:
+            begin
+              list.concat(taicpu.op_reg_reg(TOpCG2AsmOpReg[op],dst,src));
+              maybeadjustresult(list,op,size,dst);
+            end
+          else
+            a_op_reg_reg_reg(list,op,size,src,dst,dst);
+        end;
+      end;
+
+
+    procedure tcgaarch64.a_op_const_reg_reg(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister);
+      var
+        l: tlocation;
+      begin
+        a_op_const_reg_reg_checkoverflow(list,op,size,a,src,dst,false,l);
+      end;
+
+
+    procedure tcgaarch64.a_op_reg_reg_reg(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister);
+      var
+        hreg: tregister;
+      begin
+        { no ROLV opcode... }
+        if op=OP_ROL then
+          begin
+            case size of
+              OS_32,OS_S32,
+              OS_64,OS_S64:
+                begin
+                  hreg:=getintregister(list,size);
+                  a_load_const_reg(list,size,tcgsize2size[size]*8,hreg);
+                  a_op_reg_reg(list,OP_SUB,size,src1,hreg);
+                  a_op_reg_reg_reg(list,OP_ROR,size,hreg,src2,dst);
+                  exit;
+                end;
+              else
+                internalerror(2014111005);
+            end;
+          end
+        else if (op=OP_ROR) and
+           not(size in [OS_32,OS_S32,OS_64,OS_S64]) then
+          internalerror(2014111006);
+        if TOpCG2AsmOpReg[op]=A_NONE then
+          internalerror(2014111007);
+        list.concat(taicpu.op_reg_reg_reg(TOpCG2AsmOpReg[op],dst,src2,src1));
+        maybeadjustresult(list,op,size,dst);
+      end;
+
+
+    procedure tcgaarch64.a_op_const_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; a: tcgint; src, dst: tregister; setflags : boolean; var ovloc : tlocation);
+      var
+        shiftcountmask: longint;
+        constreg: tregister;
+      begin
+        { add/sub instructions have only positive immediate operands }
+        if (op in [OP_ADD,OP_SUB]) and
+           (a<0) then
+          begin
+            if op=OP_ADD then
+              op:=op_SUB
+            else
+              op:=OP_ADD;
+            { avoid range/overflow error in case a = low(tcgint) }
+{$push}{$r-}{$q-}
+            a:=-a;
+{$pop}
+          end;
+        ovloc.loc:=LOC_VOID;
+        optimize_op_const(size,op,a);
+        case op of
+          OP_NONE:
+            begin
+              a_load_reg_reg(list,size,size,src,dst);
+              exit;
+            end;
+          OP_MOVE:
+            begin
+              a_load_const_reg(list,size,a,dst);
+              exit;
+            end;
+        end;
+        case op of
+          OP_ADD,
+          OP_SUB:
+            begin
+              handle_reg_imm12_reg(list,TOpCG2AsmOpImm[op],size,src,a,dst,NR_NO,setflags,true);
+              { on a 64 bit target, overflows with smaller data types
+                are handled via range errors }
+              if setflags and
+                 (size in [OS_64,OS_S64]) then
+                begin
+                  location_reset(ovloc,LOC_FLAGS,OS_8);
+                  if size=OS_64 then
+                    if op=OP_ADD then
+                      ovloc.resflags:=F_CS
+                    else
+                      ovloc.resflags:=F_CC
+                  else
+                    ovloc.resflags:=F_VS;
+                end;
+            end;
+          OP_OR,
+          OP_AND,
+          OP_XOR:
+            begin
+              if not(size in [OS_64,OS_S64]) then
+                a:=cardinal(a);
+              if is_shifter_const(a,size) then
+                list.concat(taicpu.op_reg_reg_const(TOpCG2AsmOpReg[op],dst,src,a))
+              else
+                begin
+                  constreg:=getintregister(list,size);
+                  a_load_const_reg(list,size,a,constreg);
+                  a_op_reg_reg_reg(list,op,size,constreg,src,dst);
+                end;
+            end;
+          OP_SHL,
+          OP_SHR,
+          OP_SAR:
+            begin
+              if size in [OS_64,OS_S64] then
+                shiftcountmask:=63
+              else
+                shiftcountmask:=31;
+              if (a and shiftcountmask)<>0 Then
+                list.concat(taicpu.op_reg_reg_const(
+                  TOpCG2AsmOpImm[Op],dst,src,a and shiftcountmask))
+              else
+                a_load_reg_reg(list,size,size,src,dst);
+              if (a and not(tcgint(shiftcountmask)))<>0 then
+                internalError(2014112101);
+            end;
+          OP_ROL,
+          OP_ROR:
+            begin
+              case size of
+                OS_32,OS_S32:
+                  if (a and not(tcgint(31)))<>0 then
+                    internalError(2014112102);
+                OS_64,OS_S64:
+                  if (a and not(tcgint(63)))<>0 then
+                    internalError(2014112103);
+                else
+                  internalError(2014112104);
+              end;
+              { there's only a ror opcode }
+              if op=OP_ROL then
+                a:=(tcgsize2size[size]*8)-a;
+              list.concat(taicpu.op_reg_reg_const(A_ROR,dst,src,a));
+            end;
+          OP_MUL,
+          OP_IMUL,
+          OP_DIV,
+          OP_IDIV:
+            begin
+              constreg:=getintregister(list,size);
+              a_load_const_reg(list,size,a,constreg);
+              a_op_reg_reg_reg_checkoverflow(list,op,size,constreg,src,dst,setflags,ovloc);
+            end;
+          else
+            internalerror(2014111403);
+        end;
+        maybeadjustresult(list,op,size,dst);
+      end;
+
+
+    procedure tcgaarch64.a_op_reg_reg_reg_checkoverflow(list: TAsmList; op: topcg; size: tcgsize; src1, src2, dst: tregister; setflags : boolean; var ovloc : tlocation);
+      var
+        tmpreg1: tregister;
+      begin
+        ovloc.loc:=LOC_VOID;
+        { overflow can only occur with 64 bit calculations on 64 bit cpus }
+        if setflags and
+           (size in [OS_64,OS_S64]) then
+          begin
+            case op of
+              OP_ADD,
+              OP_SUB:
+                begin
+                  list.concat(setoppostfix(taicpu.op_reg_reg_reg(TOpCG2AsmOpReg[op],dst,src2,src1),PF_S));
+                  ovloc.loc:=LOC_FLAGS;
+                  if size=OS_64 then
+                    if op=OP_ADD then
+                      ovloc.resflags:=F_CS
+                    else
+                      ovloc.resflags:=F_CC
+                  else
+                    ovloc.resflags:=F_VS;
+                  { finished; since we won't call through to a_op_reg_reg_reg,
+                    adjust the result here if necessary }
+                  maybeadjustresult(list,op,size,dst);
+                  exit;
+                end;
+              OP_MUL:
+                begin
+                  { check whether the upper 64 bit of the 128 bit product is 0 }
+                  tmpreg1:=getintregister(list,OS_64);
+                  list.concat(taicpu.op_reg_reg_reg(A_UMULH,tmpreg1,src2,src1));
+                  list.concat(taicpu.op_reg_const(A_CMP,tmpreg1,0));
+                  ovloc.loc:=LOC_FLAGS;
+                  ovloc.resflags:=F_NE;
+                  { still have to perform the actual multiplication  }
+                end;
+              OP_IMUL:
+                begin
+                  { check whether the sign bit of the (128 bit) result is the
+                    same as "sign bit of src1" xor "signbit of src2" (if so, no
+                    overflow and the xor-product of all sign bits is 0) }
+                  tmpreg1:=getintregister(list,OS_64);
+                  list.concat(taicpu.op_reg_reg_reg(A_SMULH,tmpreg1,src2,src1));
+                  list.concat(taicpu.op_reg_reg_reg(A_EOR,tmpreg1,tmpreg1,src1));
+                  list.concat(taicpu.op_reg_reg_reg(A_EOR,tmpreg1,tmpreg1,src2));
+                  list.concat(taicpu.op_reg_const(A_TST,tmpreg1,$80000000));
+                  ovloc.loc:=LOC_FLAGS;
+                  ovloc.resflags:=F_NE;
+                  { still have to perform the actual multiplication }
+                end;
+              OP_IDIV,
+              OP_DIV:
+                begin
+                  { not handled here, needs div-by-zero check (dividing by zero
+                    just gives a 0 result on aarch64), and low(int64) div -1
+                    check for overflow) }
+                  internalerror(2014122101);
+                end;
+            end;
+          end;
+        a_op_reg_reg_reg(list,op,size,src1,src2,dst);
+      end;
+
+
+
+  {*************** compare instructructions ****************}
+
+    procedure tcgaarch64.a_cmp_const_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel);
+      var
+        op: tasmop;
+      begin
+        if a>=0 then
+          op:=A_CMP
+        else
+          op:=A_CMN;
+        { avoid range/overflow error in case a=low(tcgint) }
+{$push}{$r-}{$q-}
+        handle_reg_imm12_reg(list,op,size,reg,abs(a),NR_XZR,NR_NO,false,false);
+{$pop}
+        a_jmp_cond(list,cmp_op,l);
+      end;
+
+
+    procedure tcgaarch64.a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1,reg2: tregister; l: tasmlabel);
+      begin
+        list.concat(taicpu.op_reg_reg(A_CMP,reg2,reg1));
+        a_jmp_cond(list,cmp_op,l);
+      end;
+
+
+    procedure tcgaarch64.a_jmp_always(list: TAsmList; l: TAsmLabel);
+      var
+        ai: taicpu;
+      begin
+        ai:=TAiCpu.op_sym(A_B,current_asmdata.RefAsmSymbol(l.name));
+        ai.is_jmp:=true;
+        list.Concat(ai);
+      end;
+
+
+    procedure tcgaarch64.a_jmp_name(list: TAsmList; const s: string);
+      var
+        ai: taicpu;
+      begin
+        ai:=TAiCpu.op_sym(A_B,current_asmdata.RefAsmSymbol(s));
+        ai.is_jmp:=true;
+        list.Concat(ai);
+      end;
+
+
+    procedure tcgaarch64.a_jmp_cond(list: TAsmList; cond: TOpCmp; l: TAsmLabel);
+      var
+        ai: taicpu;
+      begin
+        ai:=TAiCpu.op_sym(A_B,l);
+        ai.is_jmp:=true;
+        ai.SetCondition(TOpCmp2AsmCond[cond]);
+        list.Concat(ai);
+      end;
+
+
+    procedure tcgaarch64.a_jmp_flags(list: TAsmList; const f: tresflags; l: tasmlabel);
+      var
+        ai : taicpu;
+      begin
+        ai:=Taicpu.op_sym(A_B,l);
+        ai.is_jmp:=true;
+        ai.SetCondition(flags_to_cond(f));
+        list.Concat(ai);
+      end;
+
+
+    procedure tcgaarch64.g_flags2reg(list: TAsmList; size: tcgsize; const f: tresflags; reg: tregister);
+      begin
+        list.concat(taicpu.op_reg_cond(A_CSET,reg,flags_to_cond(f)));
+      end;
+
+
+    procedure tcgaarch64.g_overflowcheck(list: TAsmList; const loc: tlocation; def: tdef);
+      begin
+        { we need an explicit overflow location, because there are many
+          possibilities (not just the overflow flag, which is only used for
+          signed add/sub) }
+        internalerror(2014112303);
+      end;
+
+
+    procedure tcgaarch64.g_overflowcheck_loc(list: TAsmList; const loc: tlocation; def: tdef; ovloc : tlocation);
+      var
+        hl : tasmlabel;
+        hflags : tresflags;
+      begin
+        if not(cs_check_overflow in current_settings.localswitches) then
+          exit;
+        current_asmdata.getjumplabel(hl);
+        case ovloc.loc of
+          LOC_FLAGS:
+            begin
+              hflags:=ovloc.resflags;
+              inverse_flags(hflags);
+              cg.a_jmp_flags(list,hflags,hl);
+            end;
+          else
+            internalerror(2014112304);
+        end;
+        a_call_name(list,'FPC_OVERFLOW',false);
+        a_label(list,hl);
+      end;
+
+  { *********** entry/exit code and address loading ************ }
+
+    function tcgaarch64.save_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister): longint;
+      var
+        ref: treference;
+        sr: tsuperregister;
+        pairreg: tregister;
+      begin
+        result:=0;
+        reference_reset_base(ref,NR_SP,-16,16);
+        ref.addressmode:=AM_PREINDEXED;
+        pairreg:=NR_NO;
+        { store all used registers pairwise }
+        for sr:=lowsr to highsr do
+          if sr in rg[rt].used_in_proc then
+            if pairreg=NR_NO then
+              pairreg:=newreg(rt,sr,sub)
+            else
+              begin
+                inc(result,16);
+                list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,newreg(rt,sr,sub),ref));
+                pairreg:=NR_NO
+              end;
+        { one left -> store twice (stack must be 16 bytes aligned) }
+        if pairreg<>NR_NO then
+          begin
+            list.concat(taicpu.op_reg_reg_ref(A_STP,pairreg,pairreg,ref));
+            inc(result,16);
+          end;
+      end;
+
+
+    procedure FixupOffsets(p:TObject;arg:pointer);
+      var
+        sym: tabstractnormalvarsym absolute p;
+      begin
+        if (tsym(p).typ in [paravarsym,localvarsym]) and
+          (sym.localloc.loc=LOC_REFERENCE) and
+          (sym.localloc.reference.base=NR_STACK_POINTER_REG) then
+          begin
+            sym.localloc.reference.base:=NR_FRAME_POINTER_REG;
+            dec(sym.localloc.reference.offset,PLongint(arg)^);
+          end;
+      end;
+
+
+    procedure tcgaarch64.g_proc_entry(list: TAsmList; localsize: longint; nostackframe: boolean);
+      var
+        ref: treference;
+        totalstackframesize: longint;
+      begin
+        if nostackframe then
+          exit;
+        { stack pointer has to be aligned to 16 bytes at all times }
+        localsize:=align(localsize,16);
+
+        { save stack pointer and return address }
+        reference_reset_base(ref,NR_SP,-16,16);
+        ref.addressmode:=AM_PREINDEXED;
+        list.concat(taicpu.op_reg_reg_ref(A_STP,NR_FP,NR_LR,ref));
+        { initialise frame pointer }
+        a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_SP,NR_FP);
+
+        totalstackframesize:=localsize;
+        { save modified integer registers }
+        inc(totalstackframesize,
+          save_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE));
+        { only the lower 64 bits of the modified vector registers need to be
+          saved; if the caller needs the upper 64 bits, it has to save them
+          itself }
+        inc(totalstackframesize,
+          save_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD));
+
+        { allocate stack space }
+        if localsize<>0 then
+          begin
+            localsize:=align(localsize,16);
+            current_procinfo.final_localsize:=localsize;
+            handle_reg_imm12_reg(list,A_SUB,OS_ADDR,NR_SP,localsize,NR_SP,NR_IP0,false,true);
+          end;
+        { By default, we use the frame pointer to access parameters passed via
+          the stack and the stack pointer to address local variables and temps
+          because
+           a) we can use bigger positive than negative offsets (so accessing
+              locals via negative offsets from the frame pointer would be less
+              efficient)
+           b) we don't know the local size while generating the code, so
+              accessing the parameters via the stack pointer is not possible
+              without copying them
+          The problem with this is the get_frame() intrinsic:
+           a) it must return the same value as what we pass as parentfp
+              parameter, since that's how it's used in the TP-style objects unit
+           b) its return value must usable to access all local data from a
+              routine (locals and parameters), since it's all the nested
+              routines have access to
+           c) its return value must be usable to construct a backtrace, as it's
+              also used by the exception handling routines
+
+          The solution we use here, based on something similar that's done in
+          the MIPS port, is to generate all accesses to locals in the routine
+          itself SP-relative, and then after the code is generated and the local
+          size is known (namely, here), we change all SP-relative variables/
+          parameters into FP-relative ones. This means that they'll be accessed
+          less efficiently from nested routines, but those accesses are indirect
+          anyway and at least this way they can be accessed at all
+        }
+        if current_procinfo.has_nestedprocs then
+          begin
+            current_procinfo.procdef.localst.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
+            current_procinfo.procdef.parast.SymList.ForEachCall(@FixupOffsets,@totalstackframesize);
+          end;
+      end;
+
+
+    procedure tcgaarch64.g_maybe_got_init(list : TAsmList);
+      begin
+        { nothing to do on Darwin; check on ELF targets }
+        if not(target_info.system in systems_darwin) then
+          internalerror(2014112601);
+      end;
+
+
+    procedure tcgaarch64.g_restore_registers(list:TAsmList);
+      begin
+        { done in g_proc_exit }
+      end;
+
+
+    procedure tcgaarch64.load_regs(list: TAsmList; rt: tregistertype; lowsr, highsr: tsuperregister; sub: tsubregister);
+      var
+        ref: treference;
+        sr, highestsetsr: tsuperregister;
+        pairreg: tregister;
+        regcount: longint;
+      begin
+        reference_reset_base(ref,NR_SP,16,16);
+        ref.addressmode:=AM_POSTINDEXED;
+        { highest reg stored twice? }
+        regcount:=0;
+        highestsetsr:=RS_NO;
+        for sr:=lowsr to highsr do
+          if sr in rg[rt].used_in_proc then
+            begin
+              inc(regcount);
+              highestsetsr:=sr;
+            end;
+        if odd(regcount) then
+          begin
+            list.concat(taicpu.op_reg_ref(A_LDR,newreg(rt,highestsetsr,sub),ref));
+            highestsetsr:=pred(highestsetsr);
+          end;
+        { load all (other) used registers pairwise }
+        pairreg:=NR_NO;
+        for sr:=highestsetsr downto lowsr do
+          if sr in rg[rt].used_in_proc then
+            if pairreg=NR_NO then
+              pairreg:=newreg(rt,sr,sub)
+            else
+              begin
+                list.concat(taicpu.op_reg_reg_ref(A_LDP,newreg(rt,sr,sub),pairreg,ref));
+                pairreg:=NR_NO
+              end;
+        { There can't be any register left }
+        if pairreg<>NR_NO then
+          internalerror(2014112602);
+      end;
+
+
+
+    procedure tcgaarch64.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
+      var
+        ref: treference;
+        regsstored: boolean;
+        sr: tsuperregister;
+      begin
+        if not nostackframe then
+          begin
+            { if no registers have been stored, we don't have to subtract the
+              allocated temp space from the stack pointer }
+            regsstored:=false;
+            for sr:=RS_X19 to RS_X28 do
+              if sr in rg[R_INTREGISTER].used_in_proc then
+                begin
+                  regsstored:=true;
+                  break;
+                end;
+            if not regsstored then
+              for sr:=RS_D8 to RS_D15 do
+                if sr in rg[R_MMREGISTER].used_in_proc then
+                  begin
+                    regsstored:=true;
+                    break;
+                  end;
+            { restore registers (and stack pointer) }
+            if regsstored then
+              begin
+                if current_procinfo.final_localsize<>0 then
+                  handle_reg_imm12_reg(list,A_ADD,OS_ADDR,NR_SP,current_procinfo.final_localsize,NR_SP,NR_IP0,false,true);
+                load_regs(list,R_MMREGISTER,RS_D8,RS_D15,R_SUBMMD);
+                load_regs(list,R_INTREGISTER,RS_X19,RS_X28,R_SUBWHOLE);
+              end
+            else if current_procinfo.final_localsize<>0 then
+              { restore stack pointer }
+              a_load_reg_reg(list,OS_ADDR,OS_ADDR,NR_FP,NR_SP);
+
+            { restore framepointer and return address }
+            reference_reset_base(ref,NR_SP,16,16);
+            ref.addressmode:=AM_POSTINDEXED;
+            list.concat(taicpu.op_reg_reg_ref(A_LDP,NR_FP,NR_LR,ref));
+          end;
+
+        { return }
+        list.concat(taicpu.op_none(A_RET));
+      end;
+
+
+    procedure tcgaarch64.g_save_registers(list : TAsmList);
+      begin
+        { done in g_proc_entry }
+      end;
+
+
+    { ************* concatcopy ************ }
+
+    procedure tcgaarch64.g_concatcopy_move(list : TAsmList;const source,dest : treference;len : tcgint);
+      var
+        paraloc1,paraloc2,paraloc3 : TCGPara;
+        pd : tprocdef;
+      begin
+        pd:=search_system_proc('MOVE');
+        paraloc1.init;
+        paraloc2.init;
+        paraloc3.init;
+        paramanager.getintparaloc(pd,1,paraloc1);
+        paramanager.getintparaloc(pd,2,paraloc2);
+        paramanager.getintparaloc(pd,3,paraloc3);
+        a_load_const_cgpara(list,OS_SINT,len,paraloc3);
+        a_loadaddr_ref_cgpara(list,dest,paraloc2);
+        a_loadaddr_ref_cgpara(list,source,paraloc1);
+        paramanager.freecgpara(list,paraloc3);
+        paramanager.freecgpara(list,paraloc2);
+        paramanager.freecgpara(list,paraloc1);
+        alloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
+        alloccpuregisters(list,R_MMREGISTER,paramanager.get_volatile_registers_mm(pocall_default));
+        a_call_name(list,'FPC_MOVE',false);
+        dealloccpuregisters(list,R_MMREGISTER,paramanager.get_volatile_registers_mm(pocall_default));
+        dealloccpuregisters(list,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
+        paraloc3.done;
+        paraloc2.done;
+        paraloc1.done;
+      end;
+
+
+    procedure tcgaarch64.g_concatcopy(list: TAsmList; const source, dest: treference; len: tcgint);
+
+      var
+        sourcebasereplaced, destbasereplaced: boolean;
+
+      { get optimal memory operation to use for loading/storing data
+        in an unrolled loop }
+      procedure getmemop(scaledop, unscaledop: tasmop; const startref, endref: treference; opsize: tcgsize; postfix: toppostfix; out memop: tasmop; out needsimplify: boolean);
+        begin
+          if (simple_ref_type(scaledop,opsize,postfix,startref)=sr_simple) and
+             (simple_ref_type(scaledop,opsize,postfix,endref)=sr_simple) then
+            begin
+              memop:=unscaledop;
+              needsimplify:=true;
+            end
+          else if (unscaledop<>A_NONE) and
+             (simple_ref_type(unscaledop,opsize,postfix,startref)=sr_simple) and
+             (simple_ref_type(unscaledop,opsize,postfix,endref)=sr_simple) then
+            begin
+              memop:=unscaledop;
+              needsimplify:=false;
+            end
+          else
+            begin
+              memop:=scaledop;
+              needsimplify:=true;
+            end;
+        end;
+
+      { adjust the offset and/or addressing mode after a load/store so it's
+        correct for the next one of the same size }
+      procedure updaterefafterloadstore(var ref: treference; oplen: longint);
+        begin
+          case ref.addressmode of
+            AM_OFFSET:
+              inc(ref.offset,oplen);
+            AM_POSTINDEXED:
+              { base register updated by instruction, next offset can remain
+                the same }
+              ;
+            AM_PREINDEXED:
+              begin
+                { base register updated by instruction -> next instruction can
+                  use post-indexing with offset = sizeof(operation) }
+                ref.offset:=0;
+                ref.addressmode:=AM_OFFSET;
+              end;
+          end;
+        end;
+
+      { generate a load/store and adjust the reference offset to the next
+        memory location if necessary }
+      procedure genloadstore(list: TAsmList; op: tasmop; reg: tregister; var ref: treference; postfix: toppostfix; opsize: tcgsize);
+        begin
+          list.concat(setoppostfix(taicpu.op_reg_ref(op,reg,ref),postfix));
+          updaterefafterloadstore(ref,tcgsize2size[opsize]);
+        end;
+
+      { generate a dual load/store (ldp/stp) and adjust the reference offset to
+        the next memory location if necessary }
+      procedure gendualloadstore(list: TAsmList; op: tasmop; reg1, reg2: tregister; var ref: treference; postfix: toppostfix; opsize: tcgsize);
+        begin
+          list.concat(setoppostfix(taicpu.op_reg_reg_ref(op,reg1,reg2,ref),postfix));
+          updaterefafterloadstore(ref,tcgsize2size[opsize]*2);
+        end;
+
+      { turn a reference into a pre- or post-indexed reference for use in a
+        load/store of a particular size }
+      procedure makesimpleforcopy(list: TAsmList; var scaledop: tasmop; opsize: tcgsize; postfix: toppostfix; forcepostindexing: boolean; var ref: treference; var basereplaced: boolean);
+        var
+          tmpreg: tregister;
+          scaledoffset: longint;
+          orgaddressmode: taddressmode;
+        begin
+          scaledoffset:=tcgsize2size[opsize];
+          if scaledop in [A_LDP,A_STP] then
+            scaledoffset:=scaledoffset*2;
+          { can we use the reference as post-indexed without changes? }
+          if forcepostindexing then
+            begin
+              orgaddressmode:=ref.addressmode;
+              ref.addressmode:=AM_POSTINDEXED;
+              if (orgaddressmode=AM_POSTINDEXED) or
+                 ((ref.offset=0) and
+                  (simple_ref_type(scaledop,opsize,postfix,ref)=sr_simple)) then
+                begin
+                  { just change the post-indexed offset to the access size }
+                  ref.offset:=scaledoffset;
+                  { and replace the base register if that didn't happen yet
+                    (could be sp or a regvar) }
+                  if not basereplaced then
+                    begin
+                      tmpreg:=getaddressregister(list);
+                      a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg);
+                      ref.base:=tmpreg;
+                      basereplaced:=true;
+                    end;
+                  exit;
+                end;
+              ref.addressmode:=orgaddressmode;
+            end;
+{$ifdef dummy}
+          This could in theory be useful in case you have a concatcopy from
+          e.g. x1+255 to x1+267 *and* the reference is aligned, but this seems
+          very unlikely. Disabled because it still needs fixes, as it
+          also generates pre-indexed loads right now at the very end for the
+          left-over gencopies
+
+          { can we turn it into a pre-indexed reference for free? (after the
+            first operation, it will be turned into an offset one) }
+          if not forcepostindexing and
+             (ref.offset<>0) then
+            begin
+              orgaddressmode:=ref.addressmode;
+              ref.addressmode:=AM_PREINDEXED;
+              tmpreg:=ref.base;
+              if not basereplaced and
+                 (ref.base=tmpreg) then
+                begin
+                  tmpreg:=getaddressregister(list);
+                  a_load_reg_reg(list,OS_ADDR,OS_ADDR,ref.base,tmpreg);
+                  ref.base:=tmpreg;
+                  basereplaced:=true;
+                end;
+              if simple_ref_type(scaledop,opsize,postfix,ref)<>sr_simple then
+                make_simple_ref(list,scaledop,opsize,postfix,ref,NR_NO);
+              exit;
+            end;
+{$endif dummy}
+          if not forcepostindexing then
+            begin
+              ref.addressmode:=AM_OFFSET;
+              make_simple_ref(list,scaledop,opsize,postfix,ref,NR_NO);
+              { this may still cause problems if the final offset is no longer
+                a simple ref; it's a bit complicated to pass all information
+                through at all places and check that here, so play safe: we
+                currently never generate unrolled copies for more than 64
+                bytes (32 with non-double-register copies) }
+              if ref.index=NR_NO then
+                begin
+                  if ((scaledop in [A_LDP,A_STP]) and
+                      (ref.offset<((64-8)*tcgsize2size[opsize]))) or
+                     ((scaledop in [A_LDUR,A_STUR]) and
+                      (ref.offset<(255-8*tcgsize2size[opsize]))) or
+                     ((scaledop in [A_LDR,A_STR]) and
+                      (ref.offset<((4096-8)*tcgsize2size[opsize]))) then
+                    exit;
+                end;
+            end;
+          tmpreg:=getaddressregister(list);
+          a_loadaddr_ref_reg(list,ref,tmpreg);
+          basereplaced:=true;
+          if forcepostindexing then
+            begin
+              reference_reset_base(ref,tmpreg,scaledoffset,ref.alignment);
+              ref.addressmode:=AM_POSTINDEXED;
+            end
+          else
+            begin
+              reference_reset_base(ref,tmpreg,0,ref.alignment);
+              ref.addressmode:=AM_OFFSET;
+            end
+        end;
+
+      { prepare a reference for use by gencopy. This is done both after the
+        unrolled and regular copy loop -> get rid of post-indexing mode, make
+        sure ref is valid }
+      procedure preparecopy(list: tasmlist; scaledop, unscaledop: tasmop; var ref: treference; opsize: tcgsize; postfix: toppostfix; out op: tasmop; var basereplaced: boolean);
+        var
+          simplify: boolean;
+        begin
+          if ref.addressmode=AM_POSTINDEXED then
+            ref.offset:=tcgsize2size[opsize];
+          getmemop(scaledop,scaledop,ref,ref,opsize,postfix,op,simplify);
+          if simplify then
+            begin
+              makesimpleforcopy(list,scaledop,opsize,postfix,false,ref,basereplaced);
+              op:=scaledop;
+            end;
+        end;
+
+      { generate a copy from source to dest of size opsize/postfix }
+      procedure gencopy(list: TAsmList; var source, dest: treference; postfix: toppostfix; opsize: tcgsize);
+        var
+          reg: tregister;
+          loadop, storeop: tasmop;
+        begin
+          preparecopy(list,A_LDR,A_LDUR,source,opsize,postfix,loadop,sourcebasereplaced);
+          preparecopy(list,A_STR,A_STUR,dest,opsize,postfix,storeop,destbasereplaced);
+          reg:=getintregister(list,opsize);
+          genloadstore(list,loadop,reg,source,postfix,opsize);
+          genloadstore(list,storeop,reg,dest,postfix,opsize);
+        end;
+
+
+      { copy the leftovers after an unrolled or regular copy loop }
+      procedure gencopyleftovers(list: TAsmList; var source, dest: treference; len: longint);
+        begin
+          { stop post-indexing if we did so in the loop, since in that case all
+            offsets definitely can be represented now }
+          if source.addressmode=AM_POSTINDEXED then
+            begin
+              source.addressmode:=AM_OFFSET;
+              source.offset:=0;
+            end;
+          if dest.addressmode=AM_POSTINDEXED then
+            begin
+              dest.addressmode:=AM_OFFSET;
+              dest.offset:=0;
+            end;
+          { transfer the leftovers }
+          if len>=8 then
+            begin
+              dec(len,8);
+              gencopy(list,source,dest,PF_NONE,OS_64);
+            end;
+          if len>=4 then
+            begin
+              dec(len,4);
+              gencopy(list,source,dest,PF_NONE,OS_32);
+            end;
+          if len>=2 then
+            begin
+              dec(len,2);
+              gencopy(list,source,dest,PF_H,OS_16);
+            end;
+          if len>=1 then
+            begin
+              dec(len);
+              gencopy(list,source,dest,PF_B,OS_8);
+            end;
+        end;
+
+
+      const
+        { load_length + loop dec + cbnz }
+        loopoverhead=12;
+        { loop overhead + load + store }
+        totallooplen=loopoverhead + 8;
+      var
+        totalalign: longint;
+        maxlenunrolled: tcgint;
+        loadop, storeop: tasmop;
+        opsize: tcgsize;
+        postfix: toppostfix;
+        tmpsource, tmpdest: treference;
+        scaledstoreop, unscaledstoreop,
+        scaledloadop, unscaledloadop: tasmop;
+        regs: array[1..8] of tregister;
+        countreg: tregister;
+        i, regcount: longint;
+        hl: tasmlabel;
+        simplifysource, simplifydest: boolean;
+      begin
+        if len=0 then
+          exit;
+        sourcebasereplaced:=false;
+        destbasereplaced:=false;
+        { maximum common alignment }
+        totalalign:=max(1,newalignment(source.alignment,dest.alignment));
+        { use a simple load/store? }
+        if (len in [1,2,4,8]) and
+           ((totalalign>=(len div 2)) or
+            (source.alignment=len) or
+            (dest.alignment=len)) then
+          begin
+            opsize:=int_cgsize(len);
+            a_load_ref_ref(list,opsize,opsize,source,dest);
+            exit;
+          end;
+
+        { alignment > length is not useful, and would break some checks below }
+        while totalalign>len do
+          totalalign:=totalalign div 2;
+
+        { operation sizes to use based on common alignment }
+        case totalalign of
+          1:
+            begin
+              postfix:=PF_B;
+              opsize:=OS_8;
+            end;
+          2:
+            begin
+              postfix:=PF_H;
+              opsize:=OS_16;
+            end;
+          4:
+            begin
+              postfix:=PF_None;
+              opsize:=OS_32;
+            end
+          else
+            begin
+              totalalign:=8;
+              postfix:=PF_None;
+              opsize:=OS_64;
+            end;
+        end;
+        { maximum length to handled with an unrolled loop (4 loads + 4 stores) }
+        maxlenunrolled:=min(totalalign,8)*4;
+        { ldp/stp -> 2 registers per instruction }
+        if (totalalign>=4) and
+           (len>=totalalign*2) then
+          begin
+            maxlenunrolled:=maxlenunrolled*2;
+            scaledstoreop:=A_STP;
+            scaledloadop:=A_LDP;
+            unscaledstoreop:=A_NONE;
+            unscaledloadop:=A_NONE;
+          end
+        else
+          begin
+            scaledstoreop:=A_STR;
+            scaledloadop:=A_LDR;
+            unscaledstoreop:=A_STUR;
+            unscaledloadop:=A_LDUR;
+          end;
+        { we only need 4 instructions extra to call FPC_MOVE }
+        if cs_opt_size in current_settings.optimizerswitches then
+          maxlenunrolled:=maxlenunrolled div 2;
+        if (len>maxlenunrolled) and
+           (len>totalalign*8) then
+          begin
+            g_concatcopy_move(list,source,dest,len);
+            exit;
+          end;
+
+        simplifysource:=true;
+        simplifydest:=true;
+        tmpsource:=source;
+        tmpdest:=dest;
+        { can we directly encode all offsets in an unrolled loop? }
+        if len<=maxlenunrolled then
+          begin
+{$ifdef extdebug}
+            list.concat(tai_comment.Create(strpnew('concatcopy unrolled loop; len/opsize/align: '+tostr(len)+'/'+tostr(tcgsize2size[opsize])+'/'+tostr(totalalign))));
+{$endif extdebug}
+            { the leftovers will be handled separately -> -(len mod opsize) }
+            inc(tmpsource.offset,len-(len mod tcgsize2size[opsize]));
+            { additionally, the last regular load/store will be at
+              offset+len-opsize (if len-(len mod opsize)>len) }
+            if tmpsource.offset>source.offset then
+              dec(tmpsource.offset,tcgsize2size[opsize]);
+            getmemop(scaledloadop,unscaledloadop,source,tmpsource,opsize,postfix,loadop,simplifysource);
+            inc(tmpdest.offset,len-(len mod tcgsize2size[opsize]));
+            if tmpdest.offset>dest.offset then
+              dec(tmpdest.offset,tcgsize2size[opsize]);
+            getmemop(scaledstoreop,unscaledstoreop,dest,tmpdest,opsize,postfix,storeop,simplifydest);
+            tmpsource:=source;
+            tmpdest:=dest;
+            { if we can't directly encode all offsets, simplify }
+            if simplifysource then
+              begin
+                loadop:=scaledloadop;
+                makesimpleforcopy(list,loadop,opsize,postfix,false,tmpsource,sourcebasereplaced);
+              end;
+            if simplifydest then
+              begin
+                storeop:=scaledstoreop;
+                makesimpleforcopy(list,storeop,opsize,postfix,false,tmpdest,destbasereplaced);
+              end;
+            regcount:=len div tcgsize2size[opsize];
+            { in case we transfer two registers at a time, we copy an even
+              number of registers }
+            if loadop=A_LDP then
+              regcount:=regcount and not(1);
+            { initialise for dfa }
+            regs[low(regs)]:=NR_NO;
+            { max 4 loads/stores -> max 8 registers (in case of ldp/stdp) }
+            for i:=1 to regcount do
+              regs[i]:=getintregister(list,opsize);
+            if loadop=A_LDP then
+              begin
+                { load registers }
+                for i:=1 to (regcount div 2) do
+                  gendualloadstore(list,loadop,regs[i*2-1],regs[i*2],tmpsource,postfix,opsize);
+                { store registers }
+                for i:=1 to (regcount div 2) do
+                  gendualloadstore(list,storeop,regs[i*2-1],regs[i*2],tmpdest,postfix,opsize);
+              end
+            else
+              begin
+                for i:=1 to regcount do
+                  genloadstore(list,loadop,regs[i],tmpsource,postfix,opsize);
+                for i:=1 to regcount do
+                  genloadstore(list,storeop,regs[i],tmpdest,postfix,opsize);
+              end;
+            { leftover }
+            len:=len-regcount*tcgsize2size[opsize];
+{$ifdef extdebug}
+            list.concat(tai_comment.Create(strpnew('concatcopy unrolled loop leftover: '+tostr(len))));
+{$endif extdebug}
+          end
+        else
+          begin
+{$ifdef extdebug}
+            list.concat(tai_comment.Create(strpnew('concatcopy regular loop; len/align: '+tostr(len)+'/'+tostr(totalalign))));
+{$endif extdebug}
+            { regular loop -> definitely use post-indexing }
+            loadop:=scaledloadop;
+            makesimpleforcopy(list,loadop,opsize,postfix,true,tmpsource,sourcebasereplaced);
+            storeop:=scaledstoreop;
+            makesimpleforcopy(list,storeop,opsize,postfix,true,tmpdest,destbasereplaced);
+            current_asmdata.getjumplabel(hl);
+            countreg:=getintregister(list,OS_32);
+            if loadop=A_LDP then
+              a_load_const_reg(list,OS_32,len div tcgsize2size[opsize]*2,countreg)
+            else
+              a_load_const_reg(list,OS_32,len div tcgsize2size[opsize],countreg);
+            a_label(list,hl);
+            a_op_const_reg(list,OP_SUB,OS_32,1,countreg);
+            if loadop=A_LDP then
+              begin
+                regs[1]:=getintregister(list,opsize);
+                regs[2]:=getintregister(list,opsize);
+                gendualloadstore(list,loadop,regs[1],regs[2],tmpsource,postfix,opsize);
+                gendualloadstore(list,storeop,regs[1],regs[2],tmpdest,postfix,opsize);
+              end
+            else
+              begin
+                regs[1]:=getintregister(list,opsize);
+                genloadstore(list,loadop,regs[1],tmpsource,postfix,opsize);
+                genloadstore(list,storeop,regs[1],tmpdest,postfix,opsize);
+              end;
+            list.concat(taicpu.op_reg_sym_ofs(A_CBNZ,countreg,hl,0));
+            len:=len mod tcgsize2size[opsize];
+          end;
+        gencopyleftovers(list,tmpsource,tmpdest,len);
+      end;
+
+
+    procedure tcgaarch64.g_adjust_self_value(list:TAsmList;procdef: tprocdef;ioffset: tcgint);
+      begin
+        { This method is integrated into g_intf_wrapper and shouldn't be called separately }
+        InternalError(2013020102);
+      end;
+
+
+    procedure tcgaarch64.g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint);
+      var
+        make_global: boolean;
+        href: treference;
+        hsym: tsym;
+        paraloc: pcgparalocation;
+        op: tasmop;
+      begin
+        if not(procdef.proctypeoption in [potype_function,potype_procedure]) then
+          Internalerror(200006137);
+        if not assigned(procdef.struct) or
+           (procdef.procoptions*[po_classmethod, po_staticmethod,
+             po_methodpointer, po_interrupt, po_iocheck]<>[]) then
+          Internalerror(200006138);
+        if procdef.owner.symtabletype<>ObjectSymtable then
+          Internalerror(200109191);
+
+        make_global:=false;
+        if (not current_module.is_unit) or create_smartlink_library or
+           (procdef.owner.defowner.owner.symtabletype=globalsymtable) then
+          make_global:=true;
+
+        if make_global then
+          list.concat(Tai_symbol.Createname_global(labelname,AT_FUNCTION,0))
+        else
+          list.concat(Tai_symbol.Createname(labelname,AT_FUNCTION,0));
+
+        { set param1 interface to self  }
+        procdef.init_paraloc_info(callerside);
+        hsym:=tsym(procdef.parast.Find('self'));
+        if not(assigned(hsym) and
+          (hsym.typ=paravarsym)) then
+          internalerror(2010103101);
+        paraloc:=tparavarsym(hsym).paraloc[callerside].location;
+        if assigned(paraloc^.next) then
+          InternalError(2013020101);
+
+        case paraloc^.loc of
+          LOC_REGISTER:
+            handle_reg_imm12_reg(list,A_SUB,paraloc^.size,paraloc^.register,ioffset,paraloc^.register,NR_IP0,false,true);
+          else
+            internalerror(2010103102);
+        end;
+
+        if (po_virtualmethod in procdef.procoptions) and
+            not is_objectpascal_helper(procdef.struct) then
+          begin
+            if (procdef.extnumber=$ffff) then
+              Internalerror(200006139);
+            { mov  0(%rdi),%rax ; load vmt}
+            reference_reset_base(href,paraloc^.register,0,sizeof(pint));
+            getcpuregister(list,NR_IP0);
+            a_load_ref_reg(list,OS_ADDR,OS_ADDR,href,NR_IP0);
+            { jmp *vmtoffs(%eax) ; method offs }
+            reference_reset_base(href,NR_IP0,tobjectdef(procdef.struct).vmtmethodoffset(procdef.extnumber),sizeof(pint));
+            op:=A_LDR;
+            make_simple_ref(list,op,OS_ADDR,PF_None,href,NR_IP0);
+            list.concat(taicpu.op_reg_ref(op,NR_IP0,href));
+            ungetcpuregister(list,NR_IP0);
+            list.concat(taicpu.op_reg(A_BR,NR_IP0));
+          end
+        else
+          a_jmp_name(list,procdef.mangledname);
+        list.concat(Tai_symbol_end.Createname(labelname));
+      end;
+
+
+    procedure create_codegen;
+      begin
+        cg:=tcgaarch64.Create;
+        cg128:=tcg128.Create;
+      end;
+
+end.

+ 200 - 38
compiler/aarch64/cpubase.pas

@@ -1,7 +1,8 @@
 {
     Copyright (c) 1998-2012 by Florian Klaempfl and Peter Vreman
+    Copyright (c) 2014 by Jonas Maebe and Florian Klaempfl
 
-    Contains the base types for ARM64
+    Contains the base types for Aarch64
 
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -68,14 +69,22 @@ unit cpubase;
       { Available Superregisters }
       {$i ra64sup.inc}
 
+      RS_IP0 = RS_X16;
+      RS_IP1 = RS_X17;
+
       R_SUBWHOLE = R_SUBQ;
 
       { Available Registers }
       {$i ra64con.inc}
 
+      NR_IP0 = NR_X16;
+      NR_IP1 = NR_X17;
+
       { Integer Super registers first and last }
       first_int_supreg = RS_X0;
-      first_int_imreg = $20;
+      { xzr and sp take up a separate super register because some instructions
+        are ambiguous otherwise }
+      first_int_imreg = $21;
 
       { Integer Super registers first and last }
       first_fpu_supreg = RS_S0;
@@ -92,7 +101,7 @@ unit cpubase;
         The value of this constant is equal to the constant
         PARM_BOUNDARY / BITS_PER_UNIT in the GCC source.
       }
-      std_param_align = 4;
+      std_param_align = 8;
 
       { TODO: Calculate bsstart}
       regnumber_count_bsstart = 128;
@@ -109,7 +118,7 @@ unit cpubase;
         {$i ra64dwa.inc}
       );
       { registers which may be destroyed by calls }
-      VOLATILE_INTREGISTERS = [RS_X0..RS_X18,RS_X29..RS_X30];
+      VOLATILE_INTREGISTERS = [RS_X0..RS_X18,RS_X30];
       VOLATILE_MMREGISTERS =  [RS_D0..RS_D7,RS_D16..RS_D31];
 
     type
@@ -126,16 +135,23 @@ unit cpubase;
       TOpPostfix = (PF_None,
         { update condition flags }
         PF_S,
-        { load/store }
-        PF_B,PF_SB,PF_H,PF_SH
+        { load/store sizes }
+        PF_B,PF_SB,PF_H,PF_SH,PF_W,PF_SW
       );
 
       TOpPostfixes = set of TOpPostfix;
 
     const
-      oppostfix2str : array[TOpPostfix] of string[2] = ('',
+      tcgsizep2size: array[OS_NO..OS_F128] of byte =
+        {OS_NO }
+        (0,
+        {OS_8,OS_16,OS_32,OS_64,OS_128,OS_S8,OS_S16,OS_S32,OS_S64,OS_S128}
+            0,    1,    2,    3,     4,    0,     1,     2,     3,      4,
+        {OS_F32,OS_F64,OS_F80,OS_C64,OS_F128,}
+             2,      3,     0,     3,      4);
+      oppostfix2str: array[TOpPostfix] of string[2] = ('',
         's',
-        'b','sb','h','sh');
+        'b','sb','h','sh','w','sw');
 
 {*****************************************************************************
                                 Conditions
@@ -150,13 +166,15 @@ unit cpubase;
       TAsmConds = set of TAsmCond;
 
     const
+      C_CS = C_HS;
+      C_CC = C_LO;
       cond2str : array[TAsmCond] of string[2]=('',
         'eq','ne','hs','lo','mi','pl','vs','vc','hi','ls',
         'ge','lt','gt','le','al','nv'
       );
 
       uppercond2str : array[TAsmCond] of string[2]=('',
-        'EQ','NE','hs','LO','MI','PL','VS','VC','HI','LS',
+        'EQ','NE','HS','LO','MI','PL','VS','VC','HI','LS',
         'GE','LT','GT','LE','AL','NV'
       );
 
@@ -168,12 +186,28 @@ unit cpubase;
       TResFlags = (F_EQ,F_NE,F_CS,F_CC,F_MI,F_PL,F_VS,F_VC,F_HI,F_LS,
         F_GE,F_LT,F_GT,F_LE);
 
+    const
+      F_HS = F_CS;
+      F_LO = F_CC;
+
 {*****************************************************************************
                                 Operands
 *****************************************************************************}
 
+    type
       taddressmode = (AM_OFFSET,AM_PREINDEXED,AM_POSTINDEXED);
-      tshiftmode = (SM_None,SM_LSL,SM_LSR,SM_ASR,SM_ROR);
+
+      tshiftmode = (SM_None,
+                    { shifted register instructions. LSL can also be used for
+                      the index register of certain loads/stores }
+                    SM_LSL,SM_LSR,SM_ASR,
+                    { extended register instructions: zero/sign extension +
+                        optional shift (interpreted as LSL after extension)
+                       -- the index register of certain loads/stores can be
+                          extended via (s|u)xtw with a shiftval of either 0 or
+                          log2(transfer size of the load/store)
+                    }
+                    SM_UXTB,SM_UXTH,SM_UXTW,SM_UXTX,SM_SXTB,SM_SXTH,SM_SXTW,SM_SXTX);
 
       tupdatereg = (UR_None,UR_Update);
 
@@ -184,12 +218,6 @@ unit cpubase;
         shiftimm : byte;
       end;
 
-      tcpumodeflag = (mfA, mfI, mfF);
-      tcpumodeflags = set of tcpumodeflag;
-
-      tspecialregflag = (srC, srX, srS, srF);
-      tspecialregflags = set of tspecialregflag;
-
 {*****************************************************************************
                                  Constants
 *****************************************************************************}
@@ -201,6 +229,10 @@ unit cpubase;
       maxfpuregs = 32;
       maxaddrregs = 0;
 
+      shiftedregmodes = [SM_LSL,SM_UXTB,SM_UXTH,SM_UXTW,SM_UXTX,SM_SXTB,SM_SXTH,SM_SXTW,SM_SXTX];
+      extendedregmodes = [SM_LSL,SM_LSR,SM_ASR];
+
+
 {*****************************************************************************
                                 Operand Sizes
 *****************************************************************************}
@@ -232,17 +264,23 @@ unit cpubase;
                           Generic Register names
 *****************************************************************************}
 
-      NR_SP = NR_XZR;
-      RS_SP = RS_XZR;
-      NR_WSP = NR_WZR;
-      RS_WSP = RS_WZR;
+
+      NR_FP = NR_X29;
+      RS_FP = RS_X29;
+      NR_WFP = NR_W29;
+      RS_WFP = RS_W29;
+
+      NR_LR = NR_X30;
+      RS_LR = RS_X30;
+      NR_WLR = NR_W30;
+      RS_WLR = RS_W30;
 
       { Stack pointer register }
       NR_STACK_POINTER_REG = NR_SP;
       RS_STACK_POINTER_REG = RS_SP;
-      { Frame pointer register (initialized in tarmprocinfo.init_framepointer) }
-      RS_FRAME_POINTER_REG: tsuperregister = RS_X29;
-      NR_FRAME_POINTER_REG: tregister = NR_X29;
+      { Frame pointer register }
+      NR_FRAME_POINTER_REG = NR_X29;
+      RS_FRAME_POINTER_REG = RS_X29;
       { Register for addressing absolute data in a position independant way,
         such as in PIC code. The exact meaning is ABI specific. For
         further information look at GCC source : PIC_OFFSET_TABLE_REGNUM
@@ -307,6 +345,9 @@ unit cpubase;
 
     function dwarf_reg(r:tregister):shortint;
 
+    function is_shifter_const(d: aint; size: tcgsize): boolean;
+
+
   implementation
 
     uses
@@ -329,13 +370,24 @@ unit cpubase;
     function cgsize2subreg(regtype: tregistertype; s:Tcgsize):Tsubregister;
       begin
         case regtype of
+          R_INTREGISTER:
+            begin
+              case s of
+                { there's only Wn and Xn }
+                OS_64,
+                OS_S64:
+                  cgsize2subreg:=R_SUBWHOLE;
+                else
+                  cgsize2subreg:=R_SUBD;
+                end;
+            end;
           R_MMREGISTER:
             begin
               case s of
                 OS_F32:
-                  cgsize2subreg:=R_SUBFS;
+                  cgsize2subreg:=R_SUBMMS;
                 OS_F64:
-                  cgsize2subreg:=R_SUBFD;
+                  cgsize2subreg:=R_SUBMMD;
                 else
                   internalerror(2009112701);
               end;
@@ -349,18 +401,22 @@ unit cpubase;
     function reg_cgsize(const reg: tregister): tcgsize;
       begin
         case getregtype(reg) of
-          R_INTREGISTER :
-            reg_cgsize:=OS_32;
-          R_FPUREGISTER :
-            reg_cgsize:=OS_F80;
+          R_INTREGISTER:
+            case getsubreg(reg) of
+              R_SUBD:
+                result:=OS_32
+              else
+                result:=OS_64;
+            end;
           R_MMREGISTER :
             begin
               case getsubreg(reg) of
-                R_SUBFD,
-                R_SUBWHOLE:
+                R_SUBMMD:
                   result:=OS_F64;
-                R_SUBFS:
+                R_SUBMMS:
                   result:=OS_F32;
+                R_SUBMMWHOLE:
+                  result:=OS_M128;
                 else
                   internalerror(2009112903);
               end;
@@ -373,9 +429,7 @@ unit cpubase;
 
     function is_calljmp(o:tasmop):boolean;{$ifdef USEINLINE}inline;{$endif USEINLINE}
       begin
-        { This isn't 100% perfect because the arm allows jumps also by writing to PC=R15.
-          To overcome this problem we simply forbid that FPC generates jumps by loading R15 }
-        is_calljmp:= o in [A_B,A_BLR,A_RET];
+        is_calljmp:=o in [A_B,A_BL,A_BLR,A_RET,A_CBNZ,A_CBZ];
       end;
 
 
@@ -391,8 +445,8 @@ unit cpubase;
 
     function flags_to_cond(const f: TResFlags) : TAsmCond;
       const
-        flag_2_cond: array[F_EQ..F_LE] of TAsmCond =
-          (C_EQ,C_NE,C_HI,C_LO,C_MI,C_PL,C_VS,C_VC,C_HI,C_LS,
+        flag_2_cond: array[TResFlags] of TAsmCond =
+          (C_EQ,C_NE,C_HS,C_LO,C_MI,C_PL,C_VS,C_VC,C_HI,C_LS,
            C_GE,C_LT,C_GT,C_LE);
       begin
         if f>high(flag_2_cond) then
@@ -434,7 +488,7 @@ unit cpubase;
     function inverse_cond(const c: TAsmCond): TAsmCond; {$ifdef USEINLINE}inline;{$endif USEINLINE}
       const
         inverse: array[TAsmCond] of TAsmCond=(C_None,
-          C_NE,C_EQ,C_LO,C_HI,C_PL,C_MI,C_VC,C_VS,C_LS,C_HI,
+          C_NE,C_EQ,C_LO,C_HS,C_PL,C_MI,C_VC,C_VS,C_LS,C_HI,
           C_LT,C_GE,C_LE,C_GT,C_None,C_None
         );
       begin
@@ -456,4 +510,112 @@ unit cpubase;
       end;
 
 
+    function is_shifter_const(d: aint; size: tcgsize): boolean;
+      var
+         pattern, checkpattern: qword;
+         patternlen, maxbits, replicatedlen: longint;
+         rightmostone, rightmostzero, checkbit, secondrightmostbit: longint;
+      begin
+        result:=false;
+        { patterns with all bits 0 or 1 cannot be represented this way }
+        if (d=0) then
+          exit;
+        case size of
+          OS_64,
+          OS_S64:
+            begin
+              if d=-1 then
+                exit;
+              maxbits:=64;
+            end
+          else
+            begin
+              if longint(d)=-1 then
+                exit;
+              { we'll generate a 32 bit pattern -> ignore upper sign bits in
+                case of negative longint value }
+              d:=cardinal(d);
+              maxbits:=32;
+            end;
+        end;
+        { "The Logical (immediate) instructions accept a bitmask immediate value
+          that is a 32-bit pattern or a 64-bit pattern viewed as a vector of
+          identical elements of size e = 2, 4, 8, 16, 32 or, 64 bits. Each
+          element contains the same sub-pattern, that is a single run of
+          1 to (e - 1) nonzero bits from bit 0 followed by zero bits, then
+          rotated by 0 to (e - 1) bits." (ARMv8 ARM)
+
+          Rather than generating all possible patterns and checking whether they
+          match our constant, we check whether the lowest 2/4/8/... bits are
+          a valid pattern, and if so whether the constant consists of a
+          replication of this pattern. Such a valid pattern has the form of
+          either (regexp notation)
+            * 1+0+1*
+            * 0+1+0* }
+        patternlen:=2;
+        while patternlen<=maxbits do
+          begin
+            { try lowest <patternlen> bits of d as pattern }
+            if patternlen<>64 then
+              pattern:=qword(d) and ((qword(1) shl patternlen)-1)
+            else
+              pattern:=qword(d);
+            { valid pattern? If it contains too many 1<->0 transitions, larger
+              parts of d cannot be a valid pattern either }
+            rightmostone:=BsfQWord(pattern);
+            rightmostzero:=BsfQWord(not(pattern));
+            { pattern all ones or zeroes -> not a valid pattern (but larger ones
+              can still be valid, since we have too few transitions) }
+            if (rightmostone<patternlen) and
+               (rightmostzero<patternlen) then
+              begin
+                if rightmostone>rightmostzero then
+                  begin
+                    { we have .*1*0* -> check next zero position by shifting
+                      out the existing zeroes (shr rightmostone), inverting and
+                      then again looking for the rightmost one position }
+                    checkpattern:=not(pattern);
+                    checkbit:=rightmostone;
+                  end
+                else
+                  begin
+                    { same as above, but for .*0*1* }
+                    checkpattern:=pattern;
+                    checkbit:=rightmostzero;
+                  end;
+                secondrightmostbit:=BsfQWord(checkpattern shr checkbit)+checkbit;
+                { if this position is >= patternlen -> ok (1 transition),
+                  otherwise we now have 2 transitions and have to check for a
+                  third (if there is one, abort)
+
+                  bsf returns 255 if no 1 bit is found, so in that case it's
+                  also ok
+                  }
+                if secondrightmostbit<patternlen then
+                  begin
+                    secondrightmostbit:=BsfQWord(not(checkpattern) shr secondrightmostbit)+secondrightmostbit;
+                    if secondrightmostbit<patternlen then
+                      exit;
+                  end;
+                { ok, this is a valid pattern, now does d consist of a
+                  repetition of this pattern? }
+                replicatedlen:=patternlen;
+                checkpattern:=pattern;
+                while replicatedlen<maxbits do
+                  begin
+                    { douplicate current pattern }
+                    checkpattern:=checkpattern or (checkpattern shl replicatedlen);
+                    replicatedlen:=replicatedlen*2;
+                  end;
+                if qword(d)=checkpattern then
+                  begin
+                    { yes! }
+                    result:=true;
+                    exit;
+                  end;
+              end;
+            patternlen:=patternlen*2;
+          end;
+      end;
+
 end.

+ 2 - 2
compiler/aarch64/cpuinfo.pas

@@ -103,12 +103,12 @@ Const
                                  { no need to write info about those }
                                  [cs_opt_level1,cs_opt_level2,cs_opt_level3]+
                                  [cs_opt_regvar,cs_opt_loopunroll,cs_opt_tailrecursion,
-				  cs_opt_stackframe,cs_opt_nodecse,cs_opt_reorder_fields,cs_opt_fastmath];
+				  cs_opt_nodecse,cs_opt_reorder_fields,cs_opt_fastmath];
 
    level1optimizerswitches = genericlevel1optimizerswitches;
    level2optimizerswitches = genericlevel2optimizerswitches + level1optimizerswitches +
      [cs_opt_regvar,cs_opt_stackframe,cs_opt_tailrecursion,cs_opt_nodecse];
-   level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [cs_opt_scheduler{,cs_opt_loopunroll}];
+   level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [{,cs_opt_loopunroll}];
    level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [];
 
 Implementation

+ 40 - 0
compiler/aarch64/cpunode.pas

@@ -0,0 +1,40 @@
+{******************************************************************************
+    Copyright (c) 2014 by Florian Klaempfl
+
+    Includes the aarch64 code generator
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ *****************************************************************************}
+unit cpunode;
+
+{$I fpcdefs.inc}
+
+interface
+{ This unit is used to define the specific CPU implementations. All needed
+actions are included in the INITALIZATION part of these units. This explains
+the behaviour of such a unit having just a USES clause! }
+
+implementation
+
+  uses
+    ncgbas,ncgflw,ncgcal,ncgcnv,ncgld,ncgmem,ncgcon,ncgset,ncgobjc,
+    ncpuadd,ncpumat,ncpumem,ncpuinl,ncpucnv,ncpuset,
+    { this not really a node }
+    rgcpu,
+    { symtable }
+    symcpu;
+
+end.

+ 456 - 520
compiler/aarch64/cpupara.pas

@@ -1,5 +1,5 @@
 {
-    Copyright (c) 2003-2012 by Florian Klaempfl and others
+    Copyright (c) 2013-2014 by Jonas Maebe, Florian Klaempfl and others
 
     AArch64 specific calling conventions
 
@@ -34,19 +34,24 @@ unit cpupara;
 
     type
        taarch64paramanager = class(tparamanager)
-          function get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;override;
-          function get_volatile_registers_fpu(calloption : tproccalloption):tcpuregisterset;override;
-          function get_volatile_registers_mm(calloption : tproccalloption):tcpuregisterset;override;
-          function push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;override;
-          function ret_in_param(def:tdef;pd:tabstractprocdef):boolean;override;
-          procedure getintparaloc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);override;
-          function create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;override;
-          function create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;override;
-          function get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
+          function get_volatile_registers_int(calloption: tproccalloption): tcpuregisterset; override;
+          function get_volatile_registers_fpu(calloption: tproccalloption): tcpuregisterset; override;
+          function get_volatile_registers_mm(calloption: tproccalloption): tcpuregisterset; override;
+          function push_addr_param(varspez: tvarspez; def: tdef; calloption: tproccalloption): boolean; override;
+          function ret_in_param(def: tdef; pd: tabstractprocdef):boolean;override;
+          function create_paraloc_info(p: tabstractprocdef; side: tcallercallee):longint;override;
+          function create_varargs_paraloc_info(p: tabstractprocdef; varargspara: tvarargsparalist):longint;override;
+          function get_funcretloc(p: tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
+          function param_use_paraloc(const cgpara: tcgpara): boolean; override;
          private
-          procedure init_values(var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword; var sparesinglereg: tregister);
-          function create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
-            var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword; var sparesinglereg: tregister; isvariadic: boolean):longint;
+          curintreg,
+          curmmreg: tsuperregister;
+          curstackoffset: aword;
+
+          procedure init_para_alloc_values;
+          procedure alloc_para(out result: tcgpara; p: tabstractprocdef; varspez: tvarspez; side: tcallercallee; paradef: tdef; isvariadic, isdelphinestedcc: boolean);
+
+          procedure create_paraloc_info_intern(p: tabstractprocdef; side: tcallercallee; paras: tparalist; isvariadic: boolean);
        end;
 
   implementation
@@ -56,6 +61,13 @@ unit cpupara;
        rgobj,
        defutil,symsym,symtable;
 
+    const
+      RS_FIRST_INT_PARAM_SUPREG = RS_X0;
+      RS_LAST_INT_PARAM_SUPREG = RS_X7;
+      { Q0/D0/S0/H0/B0 all have the same superregister number }
+      RS_FIRST_MM_PARAM_SUPREG = RS_D0;
+      RS_LAST_MM_PARAM_SUPREG = RS_D7;
+
 
     function taarch64paramanager.get_volatile_registers_int(calloption : tproccalloption):tcpuregisterset;
       begin
@@ -75,47 +87,90 @@ unit cpupara;
       end;
 
 
-    procedure taarch64paramanager.getintparaloc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);
+    function is_hfa_internal(p: tdef; var basedef: tdef; var elecount: longint): boolean;
       var
-        paraloc : pcgparalocation;
-        def : tdef;
+        i: longint;
+        sym: tsym;
+        tmpelecount: longint;
       begin
-        if nr<1 then
-          internalerror(2002070801);
-        def:=tparavarsym(pd.paras[nr-1]).vardef;
-        cgpara.reset;
-        cgpara.size:=def_cgsize(def);
-        cgpara.intsize:=tcgsize2size[cgpara.size];
-        cgpara.alignment:=std_param_align;
-        cgpara.def:=def;
-        paraloc:=cgpara.add_location;
-        with paraloc^ do
-          begin
-            size:=OS_INT;
-            { the four first parameters are passed into registers }
-            if nr<=8 then
-              begin
-                loc:=LOC_REGISTER;
-                register:=newreg(R_INTREGISTER,RS_X0+nr-1,R_SUBWHOLE);
-              end
-            else
-              begin
-                { the other parameters are passed on the stack }
-                loc:=LOC_REFERENCE;
-                reference.index:=NR_STACK_POINTER_REG;
-                reference.offset:=(nr-9)*8;
-              end;
-          end;
+        result:=false;
+        case p.typ of
+          arraydef:
+            begin
+              if is_special_array(p) then
+                exit;
+              case tarraydef(p).elementdef.typ of
+                floatdef:
+                  begin
+                    { an array of empty records has no influence }
+                    if tarraydef(p).elementdef.size=0 then
+                      begin
+                        result:=true;
+                        exit
+                      end;
+                    tmpelecount:=0;
+                    if not is_hfa_internal(tarraydef(p).elementdef,basedef,tmpelecount) then
+                      exit;
+                    { tmpelecount now contains the number of hfa elements in a
+                      single array element (e.g. 2 if it's an array of a record
+                      containing two singles) -> multiply by number of elements
+                      in the array }
+                    inc(elecount,tarraydef(p).elecount*tmpelecount);
+                    if elecount>4 then
+                      exit;
+                  end;
+                else
+                  result:=is_hfa_internal(tarraydef(p).elementdef,basedef,elecount);
+                end;
+            end;
+          floatdef:
+            begin
+              if not assigned(basedef) then
+                basedef:=p
+              else if basedef<>p then
+                exit;
+              inc(elecount);
+              result:=true;
+            end;
+          recorddef:
+            begin
+              for i:=0 to tabstractrecorddef(p).symtable.symlist.count-1 do
+                begin
+                  sym:=tsym(tabstractrecorddef(p).symtable.symlist[i]);
+                  if sym.typ<>fieldvarsym then
+                    continue;
+                  if not is_hfa_internal(tfieldvarsym(sym).vardef,basedef,elecount) then
+                    exit
+                end;
+              result:=true;
+            end;
+          else
+            exit
+        end;
       end;
 
 
-    function Is_HFA(p : tdef) : boolean;
+    { Returns whether a def is a "homogeneous float array" at the machine level.
+      This means that in the memory layout, the def only consists of maximally
+      4 floating point values that appear consecutively in memory }
+    function is_hfa(p: tdef; out basedef: tdef) : boolean;
+      var
+        elecount: longint;
       begin
         result:=false;
+        basedef:=nil;
+        elecount:=0;
+        result:=is_hfa_internal(p,basedef,elecount);
+        result:=
+          result and
+          (elecount>0) and
+          (p.size=basedef.size*elecount)
       end;
 
 
-    function getparaloc(calloption : tproccalloption; p : tdef; isvariadic: boolean) : tcgloc;
+    function getparaloc(calloption: tproccalloption; p: tdef): tcgloc;
+      var
+        hfabasedef: tdef;
       begin
          { Later, the LOC_REFERENCE is in most cases changed into LOC_REGISTER
            if push_addr_param for the def is true
@@ -134,7 +189,10 @@ unit cpupara;
             classrefdef:
               getparaloc:=LOC_REGISTER;
             recorddef:
-              getparaloc:=LOC_REGISTER;
+              if not is_hfa(p,hfabasedef) then
+                getparaloc:=LOC_REGISTER
+              else
+                getparaloc:=LOC_MMREGISTER;
             objectdef:
               getparaloc:=LOC_REGISTER;
             stringdef:
@@ -147,12 +205,12 @@ unit cpupara;
             filedef:
               getparaloc:=LOC_REGISTER;
             arraydef:
-              getparaloc:=LOC_REFERENCE;
-            setdef:
-              if is_smallset(p) then
+              if not is_hfa(p,hfabasedef) then
                 getparaloc:=LOC_REGISTER
               else
-                getparaloc:=LOC_REFERENCE;
+                getparaloc:=LOC_MMREGISTER;
+            setdef:
+              getparaloc:=LOC_REGISTER;
             variantdef:
               getparaloc:=LOC_REGISTER;
             { avoid problems with errornous definitions }
@@ -164,7 +222,9 @@ unit cpupara;
       end;
 
 
-    function taarch64paramanager.push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;
+    function taarch64paramanager.push_addr_param(varspez: tvarspez; def :tdef; calloption: tproccalloption): boolean;
+      var
+        hfabasedef: tdef;
       begin
         result:=false;
         if varspez in [vs_var,vs_out,vs_constref] then
@@ -174,19 +234,36 @@ unit cpupara;
           end;
         case def.typ of
           objectdef:
-            result:=not(Is_HFA(def) and (is_object(def) and ((varspez=vs_const) or (def.size=0));
+            result:=is_object(def);
           recorddef:
-            { note: should this ever be changed, make sure that const records
-                are always passed by reference for calloption=pocall_mwpascal }
-            result:=(varspez=vs_const) or (def.size=0);
+            { ABI: any composite > 16 bytes that not a hfa/hva
+              Special case: MWPascal, which passes all const parameters by
+                reference for compatibility reasons
+            }
+            result:=
+              ((varspez=vs_const) and
+               (calloption=pocall_mwpascal)) or
+              (not is_hfa(def,hfabasedef) and
+               (def.size>16));
           variantdef,
           formaldef:
             result:=true;
+          { arrays are composites and hence treated the same as records by the
+            ABI (watch out for C, where an array is a pointer)
+            Also: all other platforms pass const arrays by reference. Do the
+              same here, because there is too much hacky code out there that
+              relies on this ("array[0..0] of x" passed as const parameter and
+              then indexed beyond its bounds) }
           arraydef:
-            result:=(tarraydef(def).highrange>=tarraydef(def).lowrange) or
-                             is_open_array(def) or
-                             is_array_of_const(def) or
-                             is_array_constructor(def);
+            result:=
+              (calloption in cdecl_pocalls) or
+              is_open_array(def) or
+              is_array_of_const(def) or
+              is_array_constructor(def) or
+              ((tarraydef(def).highrange>=tarraydef(def).lowrange) and
+               ((varspez=vs_const) or
+                (not is_hfa(def,hfabasedef) and
+                 (def.size>16))));
           setdef :
             result:=def.size>16;
           stringdef :
@@ -195,511 +272,370 @@ unit cpupara;
       end;
 
 
-    function taarch64paramanager.ret_in_param(def:tdef;pd:tabstractprocdef):boolean;
-      var
-        i: longint;
-        sym: tsym;
-        fpufield: boolean;
+    function taarch64paramanager.ret_in_param(def: tdef; pd: tabstractprocdef): boolean;
       begin
         if handle_common_ret_in_param(def,pd,result) then
           exit;
-        case def.typ of
-          recorddef:
-            begin
-              result:=def.size>4;
-              if not result and
-                 (target_info.abi in [abi_default,abi_armeb]) then
-                begin
-                  { in case of the old ARM abi (APCS), a struct is returned in
-                    a register only if it is simple. And what is a (non-)simple
-                    struct:
-
-                    "A non-simple type is any non-floating-point type of size
-                     greater than one word (including structures containing only
-                     floating-point fields), and certain single-word structured
-                     types."
-                       (-- ARM APCS documentation)
-
-                    So only floating point types or more than one word ->
-                    definitely non-simple (more than one word is already
-                    checked above). This includes unions/variant records with
-                    overlaid floating point and integer fields.
-
-                    Smaller than one word struct types are simple if they are
-                    "integer-like", and:
-
-                    "A structure is termed integer-like if its size is less than
-                    or equal to one word, and the offset of each of its
-                    addressable subfields is zero."
-                      (-- ARM APCS documentation)
-
-                    An "addressable subfield" is a field of which you can take
-                    the address, which in practive means any non-bitfield.
-                    In Pascal, there is no way to express the difference that
-                    you can have in C between "char" and "int :8". In this
-                    context, we use the fake distinction that a type defined
-                    inside the record itself (such as "a: 0..255;") indicates
-                    a bitpacked field while a field using a different type
-                    (such as "a: byte;") is not.
-                  }
-                  for i:=0 to trecorddef(def).symtable.SymList.count-1 do
-                    begin
-                      sym:=tsym(trecorddef(def).symtable.SymList[i]);
-                      if sym.typ<>fieldvarsym then
-                        continue;
-                      { bitfield -> ignore }
-                      if (trecordsymtable(trecorddef(def).symtable).usefieldalignment=bit_alignment) and
-                         (tfieldvarsym(sym).vardef.typ in [orddef,enumdef]) and
-                         (tfieldvarsym(sym).vardef.owner.defowner=def) then
-                        continue;
-                      { all other fields must be at offset zero }
-                      if tfieldvarsym(sym).fieldoffset<>0 then
-                        begin
-                          result:=true;
-                          exit;
-                        end;
-                      { floating point field -> also by reference }
-                      if tfieldvarsym(sym).vardef.typ=floatdef then
-                        begin
-                          result:=true;
-                          exit;
-                        end;
-                    end;
-                end;
-            end;
-          procvardef:
-            if not tprocvardef(def).is_addressonly then
-              result:=true
-            else
-              result:=false
-          else
-            result:=inherited ret_in_param(def,pd);
-        end;
-      end;
-
-
-    procedure taarch64paramanager.init_values(var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword; var sparesinglereg: tregister);
-      begin
-        curintreg:=RS_R0;
-        curfloatreg:=RS_F0;
-        curmmreg:=RS_D0;
-        cur_stack_offset:=0;
-        sparesinglereg := NR_NO;
+        { ABI: if the parameter would be passed in registers, it is returned
+            in those registers; otherwise, it's returned by reference }
+        result:=push_addr_param(vs_value,def,pd.proccalloption);
       end;
 
 
-    function taarch64paramanager.create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist;
-        var curintreg, curfloatreg, curmmreg: tsuperregister; var cur_stack_offset: aword; var sparesinglereg: tregister; isvariadic: boolean):longint;
-
+    procedure taarch64paramanager.create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee; paras: tparalist; isvariadic: boolean);
       var
-        nextintreg,nextfloatreg,nextmmreg : tsuperregister;
-        paradef : tdef;
-        paraloc : pcgparalocation;
-        stack_offset : aword;
-        hp : tparavarsym;
-        loc : tcgloc;
-        paracgsize   : tcgsize;
-        paralen : longint;
-        i : integer;
-        firstparaloc: boolean;
-
-      procedure assignintreg;
-        begin
-          { In case of po_delphi_nested_cc, the parent frame pointer
-            is always passed on the stack. }
-           if (nextintreg<=RS_R3) and
-              (not(vo_is_parentfp in hp.varoptions) or
-               not(po_delphi_nested_cc in p.procoptions)) then
-             begin
-               paraloc^.loc:=LOC_REGISTER;
-               paraloc^.register:=newreg(R_INTREGISTER,nextintreg,R_SUBWHOLE);
-               inc(nextintreg);
-             end
-           else
-             begin
-               paraloc^.loc:=LOC_REFERENCE;
-               paraloc^.reference.index:=NR_STACK_POINTER_REG;
-               paraloc^.reference.offset:=stack_offset;
-               inc(stack_offset,4);
-            end;
-        end;
-
-
+        hp: tparavarsym;
+        i: longint;
       begin
-        result:=0;
-        nextintreg:=curintreg;
-        nextfloatreg:=curfloatreg;
-        nextmmreg:=curmmreg;
-        stack_offset:=cur_stack_offset;
-
         for i:=0 to paras.count-1 do
           begin
             hp:=tparavarsym(paras[i]);
-            paradef:=hp.vardef;
-
-            hp.paraloc[side].reset;
-
-            { currently only support C-style array of const,
-              there should be no location assigned to the vararg array itself }
-            if (p.proccalloption in cstylearrayofconst) and
-               is_array_of_const(paradef) then
-              begin
-                paraloc:=hp.paraloc[side].add_location;
-                { hack: the paraloc must be valid, but is not actually used }
-                paraloc^.loc:=LOC_REGISTER;
-                paraloc^.register:=NR_R0;
-                paraloc^.size:=OS_ADDR;
-                break;
-              end;
-
-            if push_addr_param(hp.varspez,paradef,p.proccalloption) then
+            { hidden function result parameter is passed in X8 (doesn't have to
+              be valid on return) according to the ABI
+
+              -- don't follow the ABI for managed types, because
+               a) they are passed in registers as parameters, so we should also
+                  return them in a register to be ABI-compliant (which we can't
+                  because the entire compiler is built around the idea that
+                  they are returned by reference, for ref-counting performance
+                  and Delphi-compatibility reasons)
+               b) there are hacks in the system unit that expect that you can
+                  call
+                    function f: com_interface;
+                  as
+                    procedure p(out o: obj);
+                  That can only work in case we do not use x8 to return them
+                  from the function, but the regular first parameter register.
+
+              As the ABI says this behaviour is ok for C++ classes with a
+              non-trivial copy constructor or destructor, it seems reasonable
+              for us to do this for managed types as well.}
+            if (vo_is_funcret in hp.varoptions) and
+               not is_managed_type(hp.vardef) then
               begin
-                paradef:=getpointerdef(paradef);
-                loc:=LOC_REGISTER;
-                paracgsize := OS_ADDR;
-                paralen := tcgsize2size[OS_ADDR];
-              end
-            else
-              begin
-                if not is_special_array(paradef) then
-                  paralen := paradef.size
-                else
-                  paralen := tcgsize2size[def_cgsize(paradef)];
-                loc := getparaloc(p.proccalloption,paradef,isvariadic);
-                if (paradef.typ in [objectdef,arraydef,recorddef]) and
-                  not is_special_array(paradef) and
-                  (hp.varspez in [vs_value,vs_const]) then
-                  paracgsize := int_cgsize(paralen)
-                else
+                hp.paraloc[side].reset;
+                hp.paraloc[side].size:=OS_ADDR;
+                hp.paraloc[side].alignment:=voidpointertype.alignment;
+                hp.paraloc[side].intsize:=voidpointertype.size;
+                hp.paraloc[side].def:=getpointerdef(hp.vardef);
+                with hp.paraloc[side].add_location^ do
                   begin
-                    paracgsize:=def_cgsize(paradef);
-                    { for things like formaldef }
-                    if (paracgsize=OS_NO) then
-                      begin
-                        paracgsize:=OS_ADDR;
-                        paralen:=tcgsize2size[OS_ADDR];
-                        paradef:=voidpointertype;
-                      end;
+                    size:=OS_ADDR;
+                    def:=hp.paraloc[side].def;
+                    loc:=LOC_REGISTER;
+                    register:=NR_X8;
                   end
-              end;
-
-             hp.paraloc[side].size:=paracgsize;
-             hp.paraloc[side].Alignment:=std_param_align;
-             hp.paraloc[side].intsize:=paralen;
-             hp.paraloc[side].def:=paradef;
-             firstparaloc:=true;
-
-{$ifdef EXTDEBUG}
-             if paralen=0 then
-               internalerror(200410311);
-{$endif EXTDEBUG}
-             while paralen>0 do
-               begin
-                 paraloc:=hp.paraloc[side].add_location;
-
-                 if (loc=LOC_REGISTER) and (paracgsize in [OS_F32,OS_F64,OS_F80]) then
-                   case paracgsize of
-                     OS_F32:
-                       paraloc^.size:=OS_32;
-                     OS_F64:
-                       paraloc^.size:=OS_32;
-                     else
-                       internalerror(2005082901);
-                   end
-                 else if (paracgsize in [OS_NO,OS_64,OS_S64]) then
-                   paraloc^.size := OS_32
-                 else
-                   paraloc^.size:=paracgsize;
-                 case loc of
-                    LOC_REGISTER:
-                      begin
-                        { align registers for eabi }
-                        if (target_info.abi in [abi_eabi,abi_eabihf]) and
-                           firstparaloc and
-                           (paradef.alignment=8) then
-                          begin
-                            if (nextintreg in [RS_R1,RS_R3]) then
-                              inc(nextintreg)
-                            else if nextintreg>RS_R3 then
-                              stack_offset:=align(stack_offset,8);
-                          end;
-                        { this is not abi compliant
-                          why? (FK) }
-                        if nextintreg<=RS_R3 then
-                          begin
-                            paraloc^.loc:=LOC_REGISTER;
-                            paraloc^.register:=newreg(R_INTREGISTER,nextintreg,R_SUBWHOLE);
-                            inc(nextintreg);
-                          end
-                        else
-                          begin
-                            { LOC_REFERENCE always contains everything that's left }
-                            paraloc^.loc:=LOC_REFERENCE;
-                            paraloc^.size:=int_cgsize(paralen);
-                            if (side=callerside) then
-                              paraloc^.reference.index:=NR_STACK_POINTER_REG;
-                            paraloc^.reference.offset:=stack_offset;
-                            inc(stack_offset,align(paralen,4));
-                            paralen:=0;
-                         end;
-                      end;
-                    LOC_FPUREGISTER:
-                      begin
-                        if nextfloatreg<=RS_F3 then
-                          begin
-                            paraloc^.loc:=LOC_FPUREGISTER;
-                            paraloc^.register:=newreg(R_FPUREGISTER,nextfloatreg,R_SUBWHOLE);
-                            inc(nextfloatreg);
-                          end
-                        else
-                          begin
-                            paraloc^.loc:=LOC_REFERENCE;
-                            paraloc^.reference.index:=NR_STACK_POINTER_REG;
-                            paraloc^.reference.offset:=stack_offset;
-                            case paraloc^.size of
-                              OS_F32:
-                                inc(stack_offset,4);
-                              OS_F64:
-                                inc(stack_offset,8);
-                              OS_F80:
-                                inc(stack_offset,10);
-                              OS_F128:
-                                inc(stack_offset,16);
-                              else
-                                internalerror(200403201);
-                            end;
-                          end;
-                      end;
-                    LOC_MMREGISTER:
-                      begin
-                        if (nextmmreg<=RS_D7) or
-                           ((paraloc^.size = OS_F32) and
-                            (sparesinglereg<>NR_NO)) then
-                          begin
-                            paraloc^.loc:=LOC_MMREGISTER;
-                            case paraloc^.size of
-                              OS_F32:
-                                if sparesinglereg = NR_NO then 
-                                  begin     
-                                    paraloc^.register:=newreg(R_MMREGISTER,nextmmreg,R_SUBFS);
-                                    sparesinglereg:=newreg(R_MMREGISTER,nextmmreg-RS_S0+RS_S1,R_SUBFS);
-                                    inc(nextmmreg);
-                                  end
-                                else
-                                  begin
-                                    paraloc^.register:=sparesinglereg;
-                                    sparesinglereg := NR_NO;
-                                  end;
-                              OS_F64:
-                                begin
-                                  paraloc^.register:=newreg(R_MMREGISTER,nextmmreg,R_SUBFD);
-                                  inc(nextmmreg);
-                                end;
-                              else
-                                internalerror(2012031601);
-                            end;
-                          end
-                        else
-                          begin
-                            { once a floating point parameters has been placed
-                            on the stack we must not pass any more in vfp regs
-                            even if there is a single precision register still
-                            free}
-                            sparesinglereg := NR_NO;
-                            { LOC_REFERENCE always contains everything that's left }
-                            paraloc^.loc:=LOC_REFERENCE;
-                            paraloc^.size:=int_cgsize(paralen);
-                            if (side=callerside) then
-                              paraloc^.reference.index:=NR_STACK_POINTER_REG;
-                            paraloc^.reference.offset:=stack_offset;
-                            inc(stack_offset,align(paralen,4));
-                            paralen:=0;
-                         end;
-                      end;
-                    LOC_REFERENCE:
-                      begin
-                        if push_addr_param(hp.varspez,paradef,p.proccalloption) then
-                          begin
-                            paraloc^.size:=OS_ADDR;
-                            assignintreg
-                          end
-                        else
-                          begin
-                            { align stack for eabi }
-                            if (target_info.abi in [abi_eabi,abi_eabihf]) and
-                               firstparaloc and
-                               (paradef.alignment=8) then
-                              stack_offset:=align(stack_offset,8);
-
-                             paraloc^.size:=paracgsize;
-                             paraloc^.loc:=LOC_REFERENCE;
-                             paraloc^.reference.index:=NR_STACK_POINTER_REG;
-                             paraloc^.reference.offset:=stack_offset;
-                             inc(stack_offset,align(paralen,4));
-                             paralen:=0
-                          end;
-                      end;
-                    else
-                      internalerror(2002071002);
-                 end;
-                 if side=calleeside then
-                   begin
-                     if paraloc^.loc=LOC_REFERENCE then
-                       begin
-                         paraloc^.reference.index:=NR_FRAME_POINTER_REG;
-                         { on non-Darwin, the framepointer contains the value
-                           of the stack pointer on entry. On Darwin, the
-                           framepointer points to the previously saved
-                           framepointer (which is followed only by the saved
-                           return address -> framepointer + 4 = stack pointer
-                           on entry }
-                         if not(target_info.system in systems_darwin) then
-                           inc(paraloc^.reference.offset,4)
-                         else
-                           inc(paraloc^.reference.offset,8);
-                       end;
-                   end;
-                 dec(paralen,tcgsize2size[paraloc^.size]);
-                 firstparaloc:=false
-               end;
+              end
+            else
+              alloc_para(hp.paraloc[side],p,hp.varspez,side,hp.vardef,isvariadic,
+                (vo_is_parentfp in hp.varoptions) and
+                (po_delphi_nested_cc in p.procoptions));
           end;
-        curintreg:=nextintreg;
-        curfloatreg:=nextfloatreg;
-        curmmreg:=nextmmreg;
-        cur_stack_offset:=stack_offset;
-        result:=cur_stack_offset;
       end;
 
 
     function  taarch64paramanager.get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;
       var
-        paraloc : pcgparalocation;
-        retcgsize  : tcgsize;
+        retcgsize: tcgsize;
       begin
          if set_common_funcretloc_info(p,forcetempdef,retcgsize,result) then
            exit;
 
-        paraloc:=result.add_location;
-        { Return in FPU register? }
-        if result.def.typ=floatdef then
+         { in this case, it must be returned in registers as if it were passed
+           as the first parameter }
+         init_para_alloc_values;
+         alloc_para(result,p,vs_value,side,result.def,false,false);
+         { sanity check (LOC_VOID for empty records) }
+         if not assigned(result.location) or
+            not(result.location^.loc in [LOC_REGISTER,LOC_MMREGISTER,LOC_VOID]) then
+           internalerror(2014113001);
+      end;
+
+    function taarch64paramanager.param_use_paraloc(const cgpara: tcgpara): boolean;
+      begin
+        { we always set up a stack frame -> we can always access the parameters
+          this way }
+        result:=
+          (cgpara.location^.loc=LOC_REFERENCE) and
+          not assigned(cgpara.location^.next);
+      end;
+
+
+    procedure taarch64paramanager.init_para_alloc_values;
+      begin
+        curintreg:=RS_FIRST_INT_PARAM_SUPREG;
+        curmmreg:=RS_FIRST_MM_PARAM_SUPREG;
+        curstackoffset:=0;
+      end;
+
+
+    procedure taarch64paramanager.alloc_para(out result: tcgpara; p: tabstractprocdef; varspez: tvarspez; side: tcallercallee; paradef: tdef; isvariadic, isdelphinestedcc: boolean);
+      var
+        hfabasedef, locdef: tdef;
+        paraloc: pcgparalocation;
+        paralen, stackslotlen: asizeint;
+        loc: tcgloc;
+        paracgsize, locsize: tcgsize;
+        firstparaloc: boolean;
+      begin
+        result.reset;
+
+        { currently only support C-style array of const,
+          there should be no location assigned to the vararg array itself }
+        if (p.proccalloption in cstylearrayofconst) and
+           is_array_of_const(paradef) then
           begin
-            if target_info.abi = abi_eabihf then 
-              begin
-                paraloc^.loc:=LOC_MMREGISTER;
-                case retcgsize of
-                  OS_64,
-                  OS_F64:
-                    begin
-                      paraloc^.register:=NR_MM_RESULT_REG;
-                    end;
-                  OS_32,
-                  OS_F32:
-                    begin
-                      paraloc^.register:=NR_S0;
-                    end;
-                  else
-                    internalerror(2012032501);
-                end;
-                paraloc^.size:=retcgsize;
-              end
-            else if (p.proccalloption in [pocall_softfloat]) or
-               (cs_fp_emulation in current_settings.moduleswitches) or
-               (current_settings.fputype in [fpu_vfpv2,fpu_vfpv3,fpu_vfpv3_d16,fpu_fpv4_s16]) then
-              begin
-                case retcgsize of
-                  OS_64,
-                  OS_F64:
-                    begin
-                      paraloc^.loc:=LOC_REGISTER;
-                      if target_info.endian = endian_big then
-                        paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG
-                      else
-                        paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG;
-                      paraloc^.size:=OS_32;
-                      paraloc:=result.add_location;
-                      paraloc^.loc:=LOC_REGISTER;
-                      if target_info.endian = endian_big then
-                        paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG
-                      else
-                        paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG;
-                      paraloc^.size:=OS_32;
-                    end;
-                  OS_32,
-                  OS_F32:
-                    begin
-                      paraloc^.loc:=LOC_REGISTER;
-                      paraloc^.register:=NR_FUNCTION_RETURN_REG;
-                      paraloc^.size:=OS_32;
-                    end;
-                  else
-                    internalerror(2005082603);
-                end;
-              end
-            else
-              begin
-                paraloc^.loc:=LOC_FPUREGISTER;
-                paraloc^.register:=NR_FPU_RESULT_REG;
-                paraloc^.size:=retcgsize;
-              end;
+            paraloc:=result.add_location;
+            { hack: the paraloc must be valid, but is not actually used }
+            paraloc^.loc:=LOC_REGISTER;
+            paraloc^.register:=NR_X0;
+            paraloc^.size:=OS_ADDR;
+            exit;
+          end;
+
+        if push_addr_param(varspez,paradef,p.proccalloption) then
+          begin
+            paradef:=getpointerdef(paradef);
+            loc:=LOC_REGISTER;
+            paracgsize:=OS_ADDR;
+            paralen:=tcgsize2size[OS_ADDR];
           end
-          { Return in register }
         else
           begin
-            if retcgsize in [OS_64,OS_S64] then
-              begin
-                paraloc^.loc:=LOC_REGISTER;
-                if target_info.endian = endian_big then
-                  paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG
-                else
-                  paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG;
-                paraloc^.size:=OS_32;
-                paraloc:=result.add_location;
-                paraloc^.loc:=LOC_REGISTER;
-                if target_info.endian = endian_big then
-                  paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG
-                else
-                  paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG;
-                paraloc^.size:=OS_32;
-              end
+            if not is_special_array(paradef) then
+              paralen:=paradef.size
+            else
+              paralen:=tcgsize2size[def_cgsize(paradef)];
+            loc:=getparaloc(p.proccalloption,paradef);
+            if (paradef.typ in [objectdef,arraydef,recorddef]) and
+               not is_special_array(paradef) and
+               (varspez in [vs_value,vs_const]) then
+              paracgsize:=int_cgsize(paralen)
             else
               begin
-                paraloc^.loc:=LOC_REGISTER;
-                paraloc^.register:=NR_FUNCTION_RETURN_REG;
-                if (result.intsize<>3) then
-                  paraloc^.size:=retcgsize
-                else
-                  paraloc^.size:=OS_32;
-              end;
+                paracgsize:=def_cgsize(paradef);
+                { for things like formaldef }
+                if paracgsize=OS_NO then
+                  begin
+                    paracgsize:=OS_ADDR;
+                    paralen:=tcgsize2size[OS_ADDR];
+                    paradef:=voidpointertype;
+                  end;
+              end
           end;
+
+          { get hfa basedef if applicable }
+          if not is_hfa(paradef,hfabasedef) then
+            hfabasedef:=nil;
+
+         result.size:=paracgsize;
+         result.alignment:=std_param_align;
+         result.intsize:=paralen;
+         result.def:=paradef;
+
+         { empty record: skipped (explicitly defined by Apple ABI, undefined
+           by general ABI; libffi also skips them in all cases) }
+         if not is_special_array(paradef) and
+            (paradef.size=0) then
+           begin
+             paraloc:=result.add_location;
+             paraloc^.loc:=LOC_VOID;
+             paraloc^.def:=paradef;
+             paraloc^.size:=OS_NO;
+             exit;
+           end;
+
+         { sufficient registers left? }
+         case loc of
+           LOC_REGISTER:
+             begin
+               { In case of po_delphi_nested_cc, the parent frame pointer
+                 is always passed on the stack. }
+               if isdelphinestedcc then
+                 loc:=LOC_REFERENCE
+               else if curintreg+((paralen-1) shr 3)>RS_LAST_INT_PARAM_SUPREG then
+                 begin
+                   { not enough integer registers left -> no more register
+                     parameters, copy all to stack
+                   }
+                   curintreg:=succ(RS_LAST_INT_PARAM_SUPREG);
+                   loc:=LOC_REFERENCE;
+                 end;
+             end;
+           LOC_MMREGISTER:
+             begin;
+               { every hfa element must be passed in a separate register }
+               if (assigned(hfabasedef) and
+                   (curmmreg+(paralen div hfabasedef.size)>RS_LAST_MM_PARAM_SUPREG)) or
+                  (curmmreg+((paralen-1) shr 3)>RS_LAST_MM_PARAM_SUPREG) then
+                 begin
+                   { not enough mm registers left -> no more register
+                     parameters, copy all to stack
+                   }
+                   curmmreg:=succ(RS_LAST_MM_PARAM_SUPREG);
+                   loc:=LOC_REFERENCE;
+                 end;
+             end;
+         end;
+
+         { allocate registers/stack locations }
+         firstparaloc:=true;
+         repeat
+           paraloc:=result.add_location;
+
+           { set paraloc size/def }
+           if assigned(hfabasedef) then
+             begin
+               locsize:=def_cgsize(hfabasedef);
+               locdef:=hfabasedef;
+             end
+           { make sure we don't lose whether or not the type is signed }
+           else if (loc=LOC_REGISTER) and
+                   (paradef.typ<>orddef) then
+             begin
+               locsize:=int_cgsize(paralen);
+               locdef:=get_paraloc_def(paradef,paralen,firstparaloc);
+             end
+           else
+             begin
+               locsize:=paracgsize;
+               locdef:=paradef;
+             end;
+           if locsize in [OS_NO,OS_128,OS_S128] then
+             begin
+               if paralen>4 then
+                 begin
+                   paraloc^.size:=OS_INT;
+                   paraloc^.def:=u64inttype;
+                 end
+               else
+                 begin
+                   { for 3-byte records }
+                   paraloc^.size:=OS_32;
+                   paraloc^.def:=u32inttype;
+                 end;
+             end
+           else
+             begin
+               paraloc^.size:=locsize;
+               paraloc^.def:=locdef;
+             end;
+
+           { paraloc loc }
+           paraloc^.loc:=loc;
+
+           { assign register/stack address }
+           case loc of
+             LOC_REGISTER:
+               begin
+                 paraloc^.register:=newreg(R_INTREGISTER,curintreg,cgsize2subreg(R_INTREGISTER,paraloc^.size));
+                 inc(curintreg);
+                 dec(paralen,tcgsize2size[paraloc^.size]);
+
+                 { "The general ABI specifies that it is the callee's
+                    responsibility to sign or zero-extend arguments having fewer
+                    than 32 bits, and that unused bits in a register are
+                    unspecified. In iOS, however, the caller must perform such
+                    extensions, up to 32 bits." }
+                 if (target_info.abi=abi_aarch64_darwin) and
+                    (side=callerside) and
+                    is_ordinal(paradef) and
+                    (paradef.size<4) then
+                   paraloc^.size:=OS_32;
+
+                 { in case it's a composite, "The argument is passed as though
+                   it had been loaded into the registers from a double-word-
+                   aligned address with an appropriate sequence of LDR
+                   instructions loading consecutive registers from memory" ->
+                   in case of big endian, values in not completely filled
+                   registers must be shifted to the top bits }
+                 if (target_info.endian=endian_big) and
+                    not(paraloc^.size in [OS_64,OS_S64]) and
+                    (paradef.typ in [setdef,recorddef,arraydef,objectdef]) then
+                   paraloc^.shiftval:=-(8-tcgsize2size[paraloc^.size]);
+               end;
+             LOC_MMREGISTER:
+               begin
+                 paraloc^.register:=newreg(R_MMREGISTER,curmmreg,cgsize2subreg(R_MMREGISTER,paraloc^.size));
+                 inc(curmmreg);
+                 dec(paralen,tcgsize2size[paraloc^.size]);
+               end;
+             LOC_REFERENCE:
+               begin
+                  paraloc^.size:=paracgsize;
+                  paraloc^.loc:=LOC_REFERENCE;
+
+                  { the current stack offset may not be properly aligned in
+                    case we're on Darwin have allocated a non-variadic argument
+                    < 8 bytes previously }
+                  if target_info.abi=abi_aarch64_darwin then
+                    curstackoffset:=align(curstackoffset,paraloc^.def.alignment);
+
+                  { on Darwin, non-variadic arguments take up their actual size
+                    on the stack; on other platforms, they take up a multiple of
+                    8 bytes }
+                  if (target_info.abi=abi_aarch64_darwin) and
+                     not isvariadic then
+                    stackslotlen:=paralen
+                  else
+                    stackslotlen:=align(paralen,8);
+
+                  { from the ABI: if arguments occupy partial stack space, they
+                    have to occupy the lowest significant bits of a register
+                    containing that value which is then stored to memory ->
+                    in case of big endian, skip the alignment bytes (if any) }
+                  if target_info.endian=endian_little then
+                    paraloc^.reference.offset:=curstackoffset
+                  else
+                    paraloc^.reference.offset:=curstackoffset+stackslotlen-paralen;
+                  if side=callerside then
+                    paraloc^.reference.index:=NR_STACK_POINTER_REG
+                  else
+                    begin
+                      paraloc^.reference.index:=NR_FRAME_POINTER_REG;
+                      inc(paraloc^.reference.offset,16);
+                    end;
+                  inc(curstackoffset,stackslotlen);
+                  paralen:=0
+               end;
+             else
+               internalerror(2002071002);
+           end;
+         firstparaloc:=false;
+         { <=0 for sign/zero-extended locations }
+         until paralen<=0;
       end;
 
 
-    function taarch64paramanager.create_paraloc_info(p : tabstractprocdef; side: tcallercallee):longint;
-      var
-        cur_stack_offset: aword;
-        curintreg, curfloatreg, curmmreg: tsuperregister;
-        sparesinglereg:tregister;
+    function taarch64paramanager.create_paraloc_info(p: tabstractprocdef; side: tcallercallee):longint;
       begin
-        init_values(curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg);
+        init_para_alloc_values;
 
-        result:=create_paraloc_info_intern(p,side,p.paras,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg,false);
+        create_paraloc_info_intern(p,side,p.paras,false);
+        result:=curstackoffset;
 
         create_funcretloc_info(p,side);
      end;
 
 
-    function taarch64paramanager.create_varargs_paraloc_info(p : tabstractprocdef; varargspara:tvarargsparalist):longint;
-      var
-        cur_stack_offset: aword;
-        curintreg, curfloatreg, curmmreg: tsuperregister;
-        sparesinglereg:tregister;
+    function taarch64paramanager.create_varargs_paraloc_info(p: tabstractprocdef; varargspara: tvarargsparalist):longint;
       begin
-        init_values(curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg);
+        init_para_alloc_values;
 
-        result:=create_paraloc_info_intern(p,callerside,p.paras,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg,true);
-        if (p.proccalloption in cstylearrayofconst) then
-          { just continue loading the parameters in the registers }
-          result:=create_paraloc_info_intern(p,callerside,varargspara,curintreg,curfloatreg,curmmreg,cur_stack_offset,sparesinglereg,true)
+        { non-variadic parameters }
+        create_paraloc_info_intern(p,callerside,p.paras,false);
+        if p.proccalloption in cstylearrayofconst then
+          begin
+            { on Darwin, we cannot use any registers for variadic parameters }
+            if target_info.abi=abi_aarch64_darwin then
+              begin
+                curintreg:=succ(RS_LAST_INT_PARAM_SUPREG);
+                curmmreg:=succ(RS_LAST_MM_PARAM_SUPREG);
+              end;
+            { continue loading the parameters  }
+            create_paraloc_info_intern(p,callerside,varargspara,true);
+            result:=curstackoffset;
+          end
         else
           internalerror(200410231);
       end;

+ 68 - 0
compiler/aarch64/cpupi.pas

@@ -0,0 +1,68 @@
+{
+    Copyright (c) 2002 by Florian Klaempfl
+
+    This unit contains the CPU specific part of tprocinfo
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit cpupi;
+
+{$i fpcdefs.inc}
+
+interface
+
+  uses
+    procinfo,
+    psub;
+
+  type
+    taarch64procinfo=class(tcgprocinfo)
+      constructor create(aparent: tprocinfo); override;
+      procedure set_first_temp_offset; override;
+    end;
+
+implementation
+
+  uses
+    tgobj,
+    cpubase;
+
+  constructor taarch64procinfo.create(aparent: tprocinfo);
+    begin
+      inherited;
+      { use the stack pointer as framepointer, because
+         1) we exactly know the offsets of the temps from the stack pointer
+            after pass 1 (based on the require parameter stack size for called
+            routines), while we don't know it for the frame pointer (it depends
+            on the number of saved registers)
+         2) temp offsets from the stack pointer are positive while those from
+            the frame pointer are negative, and we can directly encode much
+            bigger positive offsets in the instructions
+      }
+      framepointer:=NR_STACK_POINTER_REG;
+    end;
+
+  procedure taarch64procinfo.set_first_temp_offset;
+    begin
+     { leave room for allocated parameters }
+     tg.setfirsttemp(align(maxpushedparasize,16));
+    end;
+
+
+begin
+  cprocinfo:=taarch64procinfo;
+end.

+ 70 - 0
compiler/aarch64/cputarg.pas

@@ -0,0 +1,70 @@
+{
+    Copyright (c) 2001-2002 by Peter Vreman
+
+    Includes the AArch64 dependent target units
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit cputarg;
+
+{$i fpcdefs.inc}
+
+interface
+
+
+implementation
+
+    uses
+      systems { prevent a syntax error when nothing is included }
+
+{**************************************
+             Targets
+**************************************}
+
+    {$ifndef NOTARGETLINUX}
+//      ,t_linux
+    {$endif}
+    {$ifndef NOTARGETBSD}
+      ,t_bsd
+    {$endif}
+
+{**************************************
+             Assemblers
+**************************************}
+
+    {$ifndef NOAGCPUGAS}
+      ,agcpugas
+    {$endif}
+
+{**************************************
+        Assembler Readers
+**************************************}
+
+  {$ifndef NoRaarmgas}
+       ,racpugas
+  {$endif NoRaarmgas}
+
+{**************************************
+             Debuginfo
+**************************************}
+
+  {$ifndef NoDbgDwarf}
+      ,dbgdwarf
+  {$endif NoDbgDwarf}
+      ;
+
+end.

+ 156 - 0
compiler/aarch64/hlcgcpu.pas

@@ -0,0 +1,156 @@
+{
+    Copyright (c) 1998-2010 by Florian Klaempfl and Jonas Maebe
+    Member of the Free Pascal development team
+
+    This unit contains routines to create a pass-through high-level code
+    generator. This is used by most regular code generators.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+
+unit hlcgcpu;
+
+{$i fpcdefs.inc}
+
+interface
+
+  uses
+    symtype,
+    aasmdata,
+    cgbase,cgutils,
+    hlcgobj, hlcg2ll;
+
+  type
+    thlcgaarch64 = class(thlcg2ll)
+      procedure a_load_subsetreg_reg(list: TAsmList; subsetsize, tosize: tdef; const sreg: tsubsetregister; destreg: tregister); override;
+      procedure a_load_subsetreg_subsetreg(list: TAsmlist; fromsubsetsize, tosubsetsize: tdef; const fromsreg, tosreg: tsubsetregister); override;
+     protected
+      procedure a_load_regconst_subsetreg_intern(list: TAsmList; fromsize, subsetsize: tdef; fromreg: tregister; const sreg: tsubsetregister; slopt: tsubsetloadopt); override;
+    end;
+
+  procedure create_hlcodegen;
+
+implementation
+
+  uses
+    defutil,
+    cpubase,aasmcpu,
+    cgobj,cgcpu;
+
+  procedure thlcgaarch64.a_load_subsetreg_reg(list: TAsmList; subsetsize, tosize: tdef; const sreg: tsubsetregister; destreg: tregister);
+    var
+      op: tasmop;
+      tocgsize: tcgsize;
+      tmpdestreg: tregister;
+    begin
+      tocgsize:=def_cgsize(tosize);
+      if (sreg.startbit<>0) or
+         not(sreg.bitlen in [32,64]) then
+        begin
+          if is_signed(subsetsize) then
+            op:=A_SBFX
+          else
+            op:=A_UBFX;
+          { source and destination register of SBFX/UBFX have to be the same size }
+          if (sreg.subsetregsize in [OS_64,OS_S64]) and
+             not(tocgsize in [OS_64,OS_S64]) then
+            tmpdestreg:=cg.getintregister(list,OS_64)
+          else if not(sreg.subsetregsize in [OS_64,OS_S64]) and
+             (tocgsize in [OS_64,OS_S64]) then
+            tmpdestreg:=cg.getintregister(list,OS_32)
+          else
+            tmpdestreg:=destreg;
+          list.concat(taicpu.op_reg_reg_const_const(op,tmpdestreg,sreg.subsetreg,sreg.startbit,sreg.bitlen));
+          { need to sign extend further or truncate? }
+          if (sreg.subsetregsize=OS_S64) and
+             not(tocgsize in [OS_64,OS_S64]) then
+            cg.a_load_reg_reg(list,OS_S64,tocgsize,tmpdestreg,destreg)
+          else if is_signed(subsetsize) and
+             (tocgsize in [OS_8,OS_16]) then
+            cg.a_load_reg_reg(list,OS_32,tocgsize,tmpdestreg,destreg)
+          else if tmpdestreg<>destreg then
+            cg.a_load_reg_reg(list,def_cgsize(subsetsize),tocgsize,tmpdestreg,destreg)
+        end
+      else
+        cg.a_load_reg_reg(list,def_cgsize(subsetsize),tocgsize,sreg.subsetreg,destreg);
+    end;
+
+
+  procedure makeregssamesize(list: tasmlist; fromsize, tosize: tcgsize; orgfromreg, orgtoreg: tregister; out newfromreg, newtoreg: tregister);
+    begin
+      if (fromsize in [OS_S64,OS_64])<>
+         (tosize in [OS_S64,OS_64]) then
+        begin
+          newfromreg:=cg.makeregsize(list,orgfromreg,OS_64);
+          newtoreg:=cg.makeregsize(list,orgtoreg,OS_64);
+        end
+      else
+        begin
+          newfromreg:=orgfromreg;
+          newtoreg:=orgtoreg;
+        end;
+    end;
+
+
+  procedure thlcgaarch64.a_load_subsetreg_subsetreg(list: TAsmlist; fromsubsetsize, tosubsetsize: tdef; const fromsreg, tosreg: tsubsetregister);
+    var
+      fromreg, toreg: tregister;
+
+    begin
+      { BFM can only insert a bitfield that starts at position 0 in the source
+        source or destination register }
+      if (tosreg.startbit=0) and
+         (fromsreg.bitlen>=tosreg.bitlen) then
+        begin
+          makeregssamesize(list,fromsreg.subsetregsize,tosreg.subsetregsize,fromsreg.subsetreg,tosreg.subsetreg,fromreg,toreg);
+          list.concat(taicpu.op_reg_reg_const_const(A_BFXIL,toreg,fromreg,fromsreg.startbit,tosreg.bitlen))
+        end
+      else if (fromsreg.startbit=0) and
+         (fromsreg.bitlen>=tosreg.bitlen) then
+        begin
+          makeregssamesize(list,fromsreg.subsetregsize,tosreg.subsetregsize,fromsreg.subsetreg,tosreg.subsetreg,fromreg,toreg);
+          list.concat(taicpu.op_reg_reg_const_const(A_BFI,toreg,fromreg,tosreg.startbit,tosreg.bitlen))
+        end
+      else
+        inherited;
+    end;
+
+
+  procedure thlcgaarch64.a_load_regconst_subsetreg_intern(list: TAsmList; fromsize, subsetsize: tdef; fromreg: tregister; const sreg: tsubsetregister; slopt: tsubsetloadopt);
+    var
+      toreg: tregister;
+    begin
+      if slopt in [SL_SETZERO,SL_SETMAX] then
+        inherited
+      else if not(sreg.bitlen in [32,64]) then
+        begin
+          makeregssamesize(list,def_cgsize(fromsize),sreg.subsetregsize,fromreg,sreg.subsetreg,fromreg,toreg);
+          list.concat(taicpu.op_reg_reg_const_const(A_BFI,toreg,fromreg,sreg.startbit,sreg.bitlen))
+        end
+      else
+        a_load_reg_reg(list,fromsize,subsetsize,fromreg,sreg.subsetreg);
+    end;
+
+
+  procedure create_hlcodegen;
+    begin
+      hlcg:=thlcgaarch64.create;
+      create_codegen;
+    end;
+
+
+end.

+ 402 - 0
compiler/aarch64/ncpuadd.pas

@@ -0,0 +1,402 @@
+{
+    Copyright (c) 2014 Jonas Maebe
+
+    Code generation for add nodes on AArch64
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit ncpuadd;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+       node,ncgadd,cpubase;
+
+    type
+       taarch64addnode = class(tcgaddnode)
+       private
+          function  GetResFlags(unsigned:Boolean):TResFlags;
+          function  GetFPUResFlags:TResFlags;
+       protected
+          procedure second_addfloat;override;
+          procedure second_cmpfloat;override;
+          procedure second_cmpboolean;override;
+          procedure second_cmpsmallset;override;
+          procedure second_cmpordinal;override;
+          procedure second_addordinal;override;
+          procedure second_add64bit; override;
+          procedure second_cmp64bit; override;
+       public
+          function use_generic_mul32to64: boolean; override;
+       end;
+
+  implementation
+
+    uses
+      systems,
+      cutils,verbose,
+      paramgr,procinfo,
+      aasmtai,aasmdata,aasmcpu,defutil,
+      cgbase,cgcpu,cgutils,
+      cpupara,
+      ncon,nset,nadd,
+      hlcgobj, ncgutil,cgobj;
+
+{*****************************************************************************
+                               taarch64addnode
+*****************************************************************************}
+
+    function taarch64addnode.GetResFlags(unsigned:Boolean):TResFlags;
+      begin
+        case NodeType of
+          equaln:
+            GetResFlags:=F_EQ;
+          unequaln:
+            GetResFlags:=F_NE;
+          else
+            if not(unsigned) then
+              begin
+                if nf_swapped in flags then
+                  case NodeType of
+                    ltn:
+                      GetResFlags:=F_GT;
+                    lten:
+                      GetResFlags:=F_GE;
+                    gtn:
+                      GetResFlags:=F_LT;
+                    gten:
+                      GetResFlags:=F_LE;
+                    else
+                      internalerror(2014082010);
+                  end
+                else
+                  case NodeType of
+                    ltn:
+                      GetResFlags:=F_LT;
+                    lten:
+                      GetResFlags:=F_LE;
+                    gtn:
+                      GetResFlags:=F_GT;
+                    gten:
+                      GetResFlags:=F_GE;
+                    else
+                      internalerror(2014082011);
+                  end;
+              end
+            else
+              begin
+                if nf_swapped in Flags then
+                  case NodeType of
+                    ltn:
+                      GetResFlags:=F_HI;
+                    lten:
+                      GetResFlags:=F_HS;
+                    gtn:
+                      GetResFlags:=F_LO;
+                    gten:
+                      GetResFlags:=F_LS;
+                    else
+                      internalerror(2014082012);
+                  end
+                else
+                  case NodeType of
+                    ltn:
+                      GetResFlags:=F_LO;
+                    lten:
+                      GetResFlags:=F_LS;
+                    gtn:
+                      GetResFlags:=F_HI;
+                    gten:
+                      GetResFlags:=F_HS;
+                    else
+                      internalerror(2014082013);
+                  end;
+              end;
+        end;
+      end;
+
+
+    function taarch64addnode.GetFPUResFlags:TResFlags;
+      begin
+        case NodeType of
+          equaln:
+            result:=F_EQ;
+          unequaln:
+            result:=F_NE;
+          else
+            begin
+              if nf_swapped in Flags then
+                case NodeType of
+                  ltn:
+                    result:=F_GT;
+                  lten:
+                    result:=F_GE;
+                  gtn:
+                    result:=F_LO;
+                  gten:
+                    result:=F_LS;
+                  else
+                    internalerror(2014082014);
+                end
+              else
+                case NodeType of
+                  ltn:
+                    result:=F_LO;
+                  lten:
+                    result:=F_LS;
+                  gtn:
+                    result:=F_GT;
+                  gten:
+                    result:=F_GE;
+                  else
+                    internalerror(2014082015);
+                end;
+            end;
+        end;
+      end;
+
+
+    procedure taarch64addnode.second_addfloat;
+      var
+        op : TAsmOp;
+      begin
+        pass_left_right;
+        if nf_swapped in flags then
+          swapleftright;
+
+        { force fpureg as location, left right doesn't matter
+          as both will be in a fpureg }
+        hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+        hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
+
+        location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
+        location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+
+        case nodetype of
+          addn :
+            begin
+              op:=A_FADD;
+            end;
+          muln :
+            begin
+              op:=A_FMUL;
+            end;
+          subn :
+            begin
+              op:=A_FSUB;
+            end;
+          slashn :
+            begin
+              op:=A_FDIV;
+            end;
+          else
+            internalerror(200306014);
+        end;
+
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,
+           location.register,left.location.register,right.location.register));
+      end;
+
+
+    procedure taarch64addnode.second_cmpfloat;
+      begin
+        pass_left_right;
+        if nf_swapped in flags then
+          swapleftright;
+
+        { force fpureg as location, left right doesn't matter
+          as both will be in a fpureg }
+        hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+        hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
+
+        location_reset(location,LOC_FLAGS,OS_NO);
+        location.resflags:=getfpuresflags;
+
+        { signalling compare so we can get exceptions }
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCMPE,
+             left.location.register,right.location.register));
+      end;
+
+
+    procedure taarch64addnode.second_cmpboolean;
+      begin
+        pass_left_right;
+        force_reg_left_right(true,true);
+
+        if right.location.loc=LOC_CONSTANT then
+          begin
+            if right.location.value>=0 then
+              Tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,A_CMP,left.location.size,left.location.register,right.location.value,NR_XZR,NR_NO,false,false)
+            else
+              { avoid overflow if value=low(int64) }
+{$push}{$r-}{$q-}
+              Tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,A_CMN,left.location.size,left.location.register,-right.location.value,NR_XZR,NR_NO,false,false)
+{$pop}
+          end
+        else
+          current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,left.location.register,right.location.register));
+
+        location_reset(location,LOC_FLAGS,OS_NO);
+        location.resflags:=getresflags(true);
+      end;
+
+
+    procedure taarch64addnode.second_cmpsmallset;
+      var
+        tmpreg : tregister;
+        op: tasmop;
+      begin
+        pass_left_right;
+
+        location_reset(location,LOC_FLAGS,OS_NO);
+
+        force_reg_left_right(true,true);
+
+        if right.location.loc=LOC_CONSTANT then
+          begin
+            { when doing a cmp/cmn on 32 bit, we care whether the *lower 32 bit*
+              is a positive/negative value -> sign extend }
+            if not(right.location.size in [OS_64,OS_S64]) then
+              right.location.value:=longint(right.location.value);
+            if right.location.value>=0 then
+              op:=A_CMP
+            else
+              op:=A_CMN;
+          end
+        else
+          { for DFA }
+          op:=A_NONE;
+
+        case nodetype of
+          equaln,
+          unequaln:
+            begin
+              if right.location.loc=LOC_CONSTANT then
+                tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,op,def_cgsize(resultdef),left.location.register,abs(right.location.value),NR_XZR,NR_NO,false,false)
+              else
+                current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,left.location.register,right.location.register));
+              location.resflags:=getresflags(true);
+            end;
+          lten,
+          gten:
+            begin
+              if (not(nf_swapped in flags) and
+                  (nodetype=lten)) or
+                 ((nf_swapped in flags) and
+                  (nodetype=gten)) then
+                swapleftright;
+              { we can't handle left as a constant yet }
+              if left.location.loc=LOC_CONSTANT then
+                hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
+              tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,left.location.size);
+              if right.location.loc=LOC_CONSTANT then
+                begin
+                  hlcg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_AND,resultdef,right.location.value,left.location.register,tmpreg);
+                  tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,op,def_cgsize(resultdef),tmpreg,abs(right.location.value),NR_XZR,NR_NO,false,false)
+                end
+              else
+                begin
+                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_AND,tmpreg,left.location.register,right.location.register));
+                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,tmpreg,right.location.register));
+                end;
+              location.resflags:=F_EQ;
+            end;
+          else
+            internalerror(2012042701);
+        end;
+      end;
+
+
+    procedure taarch64addnode.second_cmpordinal;
+      var
+        unsigned : boolean;
+      begin
+        pass_left_right;
+        force_reg_left_right(true,true);
+
+        unsigned:=not(is_signed(left.resultdef)) or
+                  not(is_signed(right.resultdef));
+
+        if right.location.loc = LOC_CONSTANT then
+          begin
+            if right.location.value>=0 then
+              Tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,A_CMP,left.location.size,left.location.register,right.location.value,NR_XZR,NR_NO,false,false)
+            else
+{$push}{$r-}{$q-}
+              Tcgaarch64(cg).handle_reg_imm12_reg(current_asmdata.CurrAsmList,A_CMN,left.location.size,left.location.register,-right.location.value,NR_XZR,NR_NO,false,false)
+{$pop}
+          end
+        else
+          current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,left.location.register,right.location.register));
+
+        location_reset(location,LOC_FLAGS,OS_NO);
+        location.resflags:=getresflags(unsigned);
+      end;
+
+
+    procedure taarch64addnode.second_addordinal;
+      const
+        multops: array[boolean] of TAsmOp = (A_SMULL,A_UMULL);
+      var
+        unsigned: boolean;
+      begin
+        { 32x32->64 multiplication }
+        if (nodetype=muln) and
+           is_32bit(left.resultdef) and
+           is_32bit(right.resultdef) and
+           is_64bit(resultdef) then
+          begin
+            unsigned:=not(is_signed(left.resultdef)) or
+                      not(is_signed(right.resultdef));
+            pass_left_right;
+            force_reg_left_right(true,true);
+            { force_reg_left_right can leave right as a LOC_CONSTANT (we can't
+              say "a constant register is okay, but an ordinal constant isn't) }
+            if right.location.loc=LOC_CONSTANT then
+              hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
+            location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
+            location.register:=cg.getintregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
+            current_asmdata.CurrAsmList.Concat(taicpu.op_reg_reg_reg(multops[unsigned],location.register,left.location.register,right.location.register));
+          end
+        else
+          inherited second_addordinal;
+      end;
+
+
+    procedure taarch64addnode.second_add64bit;
+      begin
+        second_addordinal;
+      end;
+
+
+    procedure taarch64addnode.second_cmp64bit;
+      begin
+        second_cmpordinal;
+      end;
+
+
+    function taarch64addnode.use_generic_mul32to64: boolean;
+      begin
+        result:=false;
+      end;
+
+
+begin
+  caddnode:=taarch64addnode;
+end.

+ 201 - 0
compiler/aarch64/ncpucnv.pas

@@ -0,0 +1,201 @@
+{
+    Copyright (c) 2014 by Jonas Maebe
+
+    Generate AArch64 assembler for type converting nodes
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************}
+unit ncpucnv;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+      node,ncnv,ncgcnv;
+
+    type
+      taarch64typeconvnode = class(TCgTypeConvNode)
+        protected
+         function typecheck_int_to_real: tnode; override;
+         function first_int_to_real: tnode; override;
+
+        { procedure second_int_to_int;override; }
+        { procedure second_string_to_string;override; }
+        { procedure second_cstring_to_pchar;override; }
+        { procedure second_string_to_chararray;override; }
+        { procedure second_array_to_pointer;override; }
+        { procedure second_pointer_to_array;override; }
+        { procedure second_chararray_to_string;override; }
+        { procedure second_char_to_string;override; }
+         procedure second_int_to_real;override;
+        { procedure second_real_to_real;override; }
+        { procedure second_cord_to_pointer;override; }
+        { procedure second_proc_to_procvar;override; }
+        { procedure second_bool_to_int;override; }
+         procedure second_int_to_bool;override;
+        { procedure second_load_smallset;override;  }
+        { procedure second_ansistring_to_pchar;override; }
+        { procedure second_pchar_to_string;override; }
+        { procedure second_class_to_intf;override; }
+        { procedure second_char_to_char;override; }
+      end;
+
+implementation
+
+  uses
+    verbose,globals,
+    symdef,aasmdata,aasmbase,
+    defutil,
+    cgbase,cgutils,procinfo,
+    cpubase,aasmcpu,
+    pass_2,cgobj,
+    hlcgobj;
+
+
+{*****************************************************************************
+                             FirstTypeConv
+*****************************************************************************}
+
+  function taarch64typeconvnode.typecheck_int_to_real: tnode;
+    begin
+      { aarch64 supports converting everything to floating point, even fixed
+        point! Unfortunately, it only supports fixed point with a power-of-2
+        fraction, which is not the case for currency.
+
+        Generate the division by 10000 via nodes so the 10000.0 constant can
+        be reused. }
+      if is_currency(resultdef) and
+         not(nf_is_currency in flags) then
+        begin
+          { convert the equivalent int64 value to double without conversion
+            (internal typecast -> will set nf_is_currency flag) }
+          result:=ctypeconvnode.create_internal(left,s64floattype);
+          { turn into currency with conversion, which will divide by 10000
+            (regular typecast) }
+          result:=ctypeconvnode.create(result,s64currencytype);
+          exit;
+        end;
+      { The only other thing we have to take care of: convert values < 32 bit
+        to 32 bit }
+      if left.resultdef.size<4 then
+        begin
+          if is_signed(left.resultdef) then
+            inserttypeconv(left,s32inttype)
+          else
+            inserttypeconv(left,u32inttype)
+        end;
+      result:=inherited;
+    end;
+
+
+  function taarch64typeconvnode.first_int_to_real: tnode;
+    begin
+      result:=nil;
+      expectloc:=LOC_MMREGISTER;
+    end;
+
+
+{*****************************************************************************
+                             SecondTypeConv
+*****************************************************************************}
+
+  procedure taarch64typeconvnode.second_int_to_real;
+    var
+      op: tasmop;
+    begin
+      location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
+      location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+      hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
+      if not(left.location.loc in [LOC_REGISTER,LOC_CREGISTER]) then
+        internalerror(2014120401);
+      case left.location.size of
+        OS_32,
+        OS_64:
+          op:=A_UCVTF;
+        OS_S32,
+        OS_S64,
+        { for currency and comp }
+        OS_F64:
+          op:=A_SCVTF;
+        else
+          internalerror(2014120402);
+      end;
+      current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,location.register,left.location.register));
+      { no scaling for currency, that's handled in pass_typecheck }
+    end;
+
+
+  procedure taarch64typeconvnode.second_int_to_bool;
+    var
+      resflags: tresflags;
+      hlabel,oldTrueLabel,oldFalseLabel : tasmlabel;
+    begin
+      if (nf_explicit in flags) and
+         not(left.expectloc in [LOC_FLAGS,LOC_JUMP]) then
+        begin
+          inherited;
+          exit;
+        end;
+
+      { can't use the generic code, as it assumes that OP_OR automatically sets
+        the flags. We can also do things more efficiently directly }
+
+      oldTrueLabel:=current_procinfo.CurrTrueLabel;
+      oldFalseLabel:=current_procinfo.CurrFalseLabel;
+      current_asmdata.getjumplabel(current_procinfo.CurrTrueLabel);
+      current_asmdata.getjumplabel(current_procinfo.CurrFalseLabel);
+      secondpass(left);
+      if codegenerror then
+       exit;
+
+      case left.location.loc of
+        LOC_CREFERENCE,
+        LOC_REFERENCE,
+        LOC_REGISTER,
+        LOC_CREGISTER,
+        LOC_JUMP:
+          begin
+             hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
+             current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,left.location.register,0));
+             resflags:=F_NE;
+          end;
+        LOC_FLAGS :
+          resflags:=left.location.resflags;
+        else
+          internalerror(2014122902);
+      end;
+      { load flags to register }
+      location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
+      location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
+      if is_cbool(resultdef) then
+        begin
+          current_asmdata.CurrAsmList.concat(taicpu.op_reg_cond(A_CSETM,location.register,flags_to_cond(resflags)));
+            { truncate? (in case cbools are ever made unsigned) }
+            if resultdef.size<4 then
+              cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_32,location.size,location.register,location.register);
+        end
+      else
+        cg.g_flags2reg(current_asmdata.CurrAsmList,location.size,resflags,location.register);
+      cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
+      current_procinfo.CurrTrueLabel:=oldTrueLabel;
+      current_procinfo.CurrFalseLabel:=oldFalseLabel;
+    end;
+
+
+begin
+   ctypeconvnode:=taarch64typeconvnode;
+end.

+ 184 - 0
compiler/aarch64/ncpuinl.pas

@@ -0,0 +1,184 @@
+{
+    Copyright (c) 1998-2002 by Florian Klaempfl
+
+    Generates ARM inline nodes
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit ncpuinl;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+      node,ninl,ncginl;
+
+    type
+      taarch64inlinenode = class(tcgInlineNode)
+        function first_abs_real: tnode; override;
+        function first_sqr_real: tnode; override;
+        function first_sqrt_real: tnode; override;
+        function first_round_real: tnode; override;
+        function first_trunc_real: tnode; override;
+        procedure second_abs_real; override;
+        procedure second_sqr_real; override;
+        procedure second_sqrt_real; override;
+        procedure second_abs_long; override;
+        procedure second_round_real; override;
+        procedure second_trunc_real; override;
+        procedure second_get_frame; override;
+      private
+        procedure load_fpu_location;
+      end;
+
+
+implementation
+
+    uses
+      globtype,verbose,globals,
+      cpuinfo, defutil,symdef,aasmdata,aasmcpu,
+      cgbase,cgutils,pass_1,pass_2,
+      cpubase,ncgutil,cgobj,cgcpu, hlcgobj;
+
+{*****************************************************************************
+                              taarch64inlinenode
+*****************************************************************************}
+
+    procedure taarch64inlinenode.load_fpu_location;
+      begin
+        secondpass(left);
+        hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+        location_copy(location,left.location);
+        location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+        location.loc:=LOC_MMREGISTER;
+      end;
+
+
+    function taarch64inlinenode.first_abs_real : tnode;
+      begin
+        expectloc:=LOC_MMREGISTER;
+        result:=nil;
+      end;
+
+
+    function taarch64inlinenode.first_sqr_real : tnode;
+      begin
+        expectloc:=LOC_MMREGISTER;
+        result:=nil;
+      end;
+
+
+    function taarch64inlinenode.first_sqrt_real : tnode;
+      begin
+        expectloc:=LOC_MMREGISTER;
+        result:=nil;
+      end;
+
+
+    function taarch64inlinenode.first_round_real: tnode;
+      begin
+        expectloc:=LOC_MMREGISTER;
+        result:=nil;
+      end;
+
+
+    function taarch64inlinenode.first_trunc_real: tnode;
+      begin
+        expectloc:=LOC_MMREGISTER;
+        result:=nil;
+      end;
+
+
+    procedure taarch64inlinenode.second_abs_real;
+      begin
+        load_fpu_location;
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FABS,location.register,left.location.register));
+      end;
+
+
+    procedure taarch64inlinenode.second_sqr_real;
+      begin
+        load_fpu_location;
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_FMUL,location.register,left.location.register,left.location.register));
+      end;
+
+
+    procedure taarch64inlinenode.second_sqrt_real;
+      begin
+        load_fpu_location;
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FSQRT,location.register,left.location.register));
+      end;
+
+
+    procedure taarch64inlinenode.second_abs_long;
+      var
+        opsize : tcgsize;
+        hp : taicpu;
+      begin
+        secondpass(left);
+        opsize:=def_cgsize(left.resultdef);
+        hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
+        location:=left.location;
+        location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
+
+        current_asmdata.CurrAsmList.concat(setoppostfix(taicpu.op_reg_reg(A_NEG,location.register,left.location.register),PF_S));
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_cond(A_CSEL,location.register,location.register,left.location.register,C_GE));
+      end;
+
+
+    procedure taarch64inlinenode.second_round_real;
+      var
+        hreg: tregister;
+      begin
+        secondpass(left);
+        hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+        location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
+        location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
+        hreg:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
+        { round as floating point using current rounding mode }
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FRINTX,hreg,left.location.register));
+        { convert to signed integer rounding towards zero (there's no "round to
+          integer using current rounding mode") }
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCVTZS,location.register,hreg));
+      end;
+
+
+    procedure taarch64inlinenode.second_trunc_real;
+      begin
+        secondpass(left);
+        hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+        location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
+        location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
+        { convert to signed integer rounding towards zero }
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCVTZS,location.register,left.location.register));
+      end;
+
+
+    procedure taarch64inlinenode.second_get_frame;
+      begin
+        location_reset(location,LOC_CREGISTER,OS_ADDR);
+        { this routine is used to get the frame pointer for backtracing
+          purposes. current_procinfo.framepointer is set to SP because that one
+          is used to access temps. On most platforms these two frame pointers
+          are the same, but not on AArch64. }
+        location.register:=NR_FRAME_POINTER_REG;
+      end;
+
+begin
+  cinlinenode:=taarch64inlinenode;
+end.

+ 196 - 0
compiler/aarch64/ncpumat.pas

@@ -0,0 +1,196 @@
+{
+    Copyright (c) 1998-2002, 2014 by Florian Klaempfl and Jonas Maebe
+
+    Generate AArch64 assembler for math nodes
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit ncpumat;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+      node,nmat,ncgmat;
+
+    type
+      taarch64moddivnode = class(tmoddivnode)
+         function pass_1: tnode; override;
+         procedure pass_generate_code;override;
+      end;
+
+      taarch64notnode = class(tcgnotnode)
+         procedure second_boolean;override;
+      end;
+
+      taarch64unaryminusnode = class(tcgunaryminusnode)
+         procedure second_float; override;
+      end;
+
+implementation
+
+    uses
+      globtype,systems,constexp,
+      cutils,verbose,globals,
+      symconst,symdef,
+      aasmbase,aasmcpu,aasmtai,aasmdata,
+      defutil,
+      cgbase,cgobj,hlcgobj,pass_2,procinfo,
+      ncon,
+      cpubase,
+      ncgutil,cgcpu,cgutils;
+
+{*****************************************************************************
+                             taarch64moddivnode
+*****************************************************************************}
+
+    function taarch64moddivnode.pass_1: tnode;
+      begin
+        result:=inherited pass_1;
+        if not assigned(result) then
+          include(current_procinfo.flags,pi_do_call);
+      end;
+
+
+    procedure taarch64moddivnode.pass_generate_code;
+      var
+         op         : tasmop;
+         tmpreg,
+         numerator,
+         divider,
+         resultreg  : tregister;
+         hl : tasmlabel;
+         overflowloc: tlocation;
+      begin
+       secondpass(left);
+       secondpass(right);
+
+       { set result location }
+       location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
+       location.register:=cg.getintregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
+       resultreg:=location.register;
+
+       { put numerator in register }
+       hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
+       numerator:=left.location.register;
+
+       { load divider in a register }
+       hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
+       divider:=right.location.register;
+
+       { start division }
+       if is_signed(left.resultdef) then
+         op:=A_SDIV
+       else
+         op:=A_UDIV;
+       current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(op,location.register,numerator,divider));
+
+       { no divide-by-zero detection available in hardware, emulate (if it's a
+         constant, this will have been detected earlier already) }
+       if (right.nodetype<>ordconstn) then
+         begin
+           current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,
+             right.location.register,0));
+
+           current_asmdata.getjumplabel(hl);
+           current_asmdata.CurrAsmList.concat(taicpu.op_cond_sym(A_B,C_NE,hl));
+           cg.a_call_name(current_asmdata.CurrAsmList,'FPC_DIVBYZERO',false);
+           cg.a_label(current_asmdata.CurrAsmList,hl);
+         end;
+
+       { in case of overflow checking, also check for low(int64) div (-1)
+         (no hardware support for this either) }
+       if (cs_check_overflow in current_settings.localswitches) and
+          is_signed(left.resultdef) and
+          ((right.nodetype<>ordconstn) or
+           (tordconstnode(right).value=-1)) then
+         begin
+           { num=ffff... and div=8000... <=>
+             num xor not(div xor 8000...) = 0
+             (and we have the "eon" operation, which performs "xor not(...)" }
+           tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,left.resultdef);
+           hlcg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_XOR,left.resultdef,low(int64),left.location.register,tmpreg);
+           current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_EON,
+             tmpreg,left.location.register,tmpreg));
+           current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,tmpreg,0));
+           { now the zero/equal flag is set in case we divided low(int64) by
+             (-1) }
+           location_reset(overflowloc,LOC_FLAGS,OS_NO);
+           overflowloc.resflags:=F_EQ;
+           cg.g_overflowcheck_loc(current_asmdata.CurrAsmList,location,resultdef,overflowloc);
+         end;
+
+       { in case of modulo, multiply result again by the divider and subtract
+         from the numerator }
+       if nodetype=modn then
+         current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg_reg(A_MSUB,resultreg,
+           resultreg,divider,numerator));
+    end;
+
+
+{*****************************************************************************
+                               taarch64notnode
+*****************************************************************************}
+
+    procedure taarch64notnode.second_boolean;
+      begin
+        if not handle_locjump then
+          begin
+            secondpass(left);
+            case left.location.loc of
+              LOC_FLAGS :
+                begin
+                  location_copy(location,left.location);
+                  inverse_flags(location.resflags);
+                end;
+              LOC_REGISTER, LOC_CREGISTER,
+              LOC_REFERENCE, LOC_CREFERENCE,
+              LOC_SUBSETREG, LOC_CSUBSETREG,
+              LOC_SUBSETREF, LOC_CSUBSETREF:
+                begin
+                  hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
+                  current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_CMP,
+                    left.location.register,0));
+                  location_reset(location,LOC_FLAGS,OS_NO);
+                  location.resflags:=F_EQ;
+               end;
+              else
+                internalerror(2003042401);
+            end;
+          end;
+      end;
+
+
+{*****************************************************************************
+                                   taarch64unaryminusnode
+*****************************************************************************}
+
+    procedure taarch64unaryminusnode.second_float;
+      begin
+        secondpass(left);
+        hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+        location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
+        location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FNEG,location.register,left.location.register));
+      end;
+
+begin
+   cmoddivnode:=taarch64moddivnode;
+   cnotnode:=taarch64notnode;
+   cunaryminusnode:=taarch64unaryminusnode;
+end.

+ 142 - 0
compiler/aarch64/ncpumem.pas

@@ -0,0 +1,142 @@
+{
+    Copyright (c) 2014 by Jonas Maebe
+
+    Generate AArch64 code for in memory related nodes
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit ncpumem;
+
+{$i fpcdefs.inc}
+
+interface
+
+  uses
+    globtype,
+    cgbase,
+    symtype,
+    node,nmem,ncgmem;
+
+  type
+    taarch64loadparentfpnode = class(tcgloadparentfpnode)
+      procedure pass_generate_code; override;
+    end;
+
+    taarch64vecnode = class(tcgvecnode)
+     protected
+      function valid_index_size(size: tcgsize): boolean; override;
+     public
+       procedure update_reference_reg_mul(maybe_const_reg: tregister; regsize: tdef; l: aint); override;
+    end;
+
+implementation
+
+  uses
+    cutils,verbose,
+    defutil,
+    aasmdata,cpubase,
+    cgutils,
+    cgobj;
+
+  { taarch64loadparentfpnode }
+
+  procedure taarch64loadparentfpnode.pass_generate_code;
+    begin
+      inherited pass_generate_code;
+      { see the comments in tcgaarch64.g_proc_entry }
+      if (location.loc in [LOC_REGISTER,LOC_CREGISTER]) and
+         (location.register=NR_STACK_POINTER_REG) then
+        if (kind=lpf_forpara) then
+          location.register:=NR_FRAME_POINTER_REG
+        else
+          begin
+            { load stack pointer in a different register, as many instructions
+              cannot directly work with the stack pointer. The register
+              allocator can merge them if possible }
+            location.register:=cg.getaddressregister(current_asmdata.CurrAsmList);
+            cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,NR_STACK_POINTER_REG,location.register);
+            location.loc:=LOC_REGISTER;
+          end;
+    end;
+
+
+  { taarch64vecnode }
+
+  function taarch64vecnode.valid_index_size(size: tcgsize): boolean;
+    begin
+      { all sizes are ok if we handle the "reference reg mul", because
+         a) we use a 64 bit register for 64 bit values, and a 32 bit one (that
+            we can sign/zero-extend inside the reference) for smaller values
+         b) for values < 32 bit, the entire 32 bit register always contains the
+            sign/zero-extended version of the value }
+      result:=
+        not is_packed_array(left.resultdef) and
+        (get_mul_size in [1,2,4,8,16]);
+    end;
+
+
+  procedure taarch64vecnode.update_reference_reg_mul(maybe_const_reg: tregister; regsize: tdef; l: aint);
+    var
+      base: tregister;
+      oldoffset: asizeint;
+      shift: byte;
+    begin
+      { we can only scale the index by shl 0..4 }
+      if not(l in [1,2,4,8,16]) then
+        begin
+          inherited;
+          exit;
+        end;
+      { we need a base set and an index available }
+      if (location.reference.base=NR_NO) or
+         (location.reference.index<>NR_NO) then
+        begin
+          { don't integrate the offset yet, make_simple_ref() may be able to
+            handle it more efficiently later (unless an offset is all we have
+            -> optimization for someone that wants to add support for AArch64
+            embedded targets) }
+          oldoffset:=location.reference.offset;
+          location.reference.offset:=0;
+          base:=cg.getaddressregister(current_asmdata.CurrAsmList);
+          cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,location.reference,base);
+          reference_reset_base(location.reference,base,oldoffset,location.reference.alignment);
+        end;
+      shift:=BsfDWord(l);
+      location.reference.index:=maybe_const_reg;
+      { sign/zero-extend? }
+      if regsize.size=8 then
+        if shift<>0 then
+          location.reference.shiftmode:=SM_LSL
+        else
+          location.reference.shiftmode:=SM_NONE
+      else if is_signed(regsize) then
+        location.reference.shiftmode:=SM_SXTW
+      else if shift<>0 then
+        location.reference.shiftmode:=SM_UXTW
+      else
+        { the upper 32 bits are always already zero-extended -> just use 64 bit
+          register }
+        location.reference.index:=cg.makeregsize(current_asmdata.CurrAsmList,location.reference.index,OS_64);
+      location.reference.shiftimm:=shift;
+      location.reference.alignment:=newalignment(location.reference.alignment,l);
+    end;
+
+
+begin
+  cloadparentfpnode:=taarch64loadparentfpnode;
+  cvecnode:=taarch64vecnode;
+end.

+ 175 - 0
compiler/aarch64/ncpuset.pas

@@ -0,0 +1,175 @@
+{
+    Copyright (c) 2015 by Jonas Maebe
+
+    Generate AArch64 assembler for in set/case nodes
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit ncpuset;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+       node,nset,ncgset,cpubase,cgbase,cgobj,aasmbase,aasmtai,aasmdata,globtype;
+
+    type
+       taarch64casenode = class(tcgcasenode)
+         protected
+           procedure optimizevalues(var max_linear_list: aint; var max_dist: aword);override;
+           function  has_jumptable: boolean;override;
+           procedure genjumptable(hp: pcaselabel ;min_, max_: aint);override;
+       end;
+
+
+implementation
+
+    uses
+      systems,
+      verbose,globals,constexp,
+      symconst,symdef,defutil,
+      paramgr,
+      cpuinfo,
+      pass_2,cgcpu,
+      ncon,
+      tgobj,ncgutil,regvars,rgobj,aasmcpu,
+      procinfo,
+      cgutils;
+
+{*****************************************************************************
+                            TCGCASENODE
+*****************************************************************************}
+
+
+    procedure taarch64casenode.optimizevalues(var max_linear_list: aint; var max_dist: aword);
+      begin
+        max_linear_list:=10;
+      end;
+    
+
+    function taarch64casenode.has_jumptable: boolean;
+      begin
+        has_jumptable:=true;
+      end;
+
+
+    procedure taarch64casenode.genjumptable(hp: pcaselabel; min_, max_: aint);
+      var
+        last: TConstExprInt;
+        tablelabel: TAsmLabel;
+        basereg,indexreg,jumpreg: TRegister;
+        href: TReference;
+        opcgsize: tcgsize;
+        sectype: TAsmSectiontype;
+        jtitemconsttype: taiconst_type;
+
+      procedure genitem(list:TAsmList;t : pcaselabel);
+        var
+          i : aint;
+        begin
+          if assigned(t^.less) then
+            genitem(list,t^.less);
+          { fill possible hole }
+          i:=last.svalue+1;
+          while i<=t^._low.svalue-1 do
+            begin
+              list.concat(Tai_const.Create_rel_sym(jtitemconsttype,tablelabel,elselabel));
+              inc(i);
+            end;
+          i:=t^._low.svalue;
+          while i<=t^._high.svalue do
+            begin
+              list.concat(Tai_const.Create_rel_sym(jtitemconsttype,tablelabel,blocklabel(t^.blockid)));
+              inc(i);
+            end;
+          last:=t^._high;
+          if assigned(t^.greater) then
+            genitem(list,t^.greater);
+        end;
+
+      begin
+        if not(target_info.system in systems_darwin) then
+          jtitemconsttype:=aitconst_32bit
+        else
+          { see https://gmplib.org/list-archives/gmp-bugs/2012-December/002836.html }
+          jtitemconsttype:=aitconst_darwin_dwarf_delta32;
+
+        last:=min_;
+        opcgsize:=def_cgsize(opsize);
+        { a <= x <= b <-> unsigned(x-a) <= (b-a) }
+        cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SUB,opcgsize,aint(min_),hregister);
+        if not(jumptable_no_range) then
+          begin
+             { case expr greater than max_ => goto elselabel }
+             cg.a_cmp_const_reg_label(current_asmdata.CurrAsmList,opcgsize,OC_A,aint(max_)-aint(min_),hregister,elselabel);
+             min_:=0;
+          end;
+        { local label in order to avoid using GOT }
+        current_asmdata.getlabel(tablelabel,alt_data);
+        indexreg:=cg.makeregsize(current_asmdata.CurrAsmList,hregister,OS_ADDR);
+        cg.a_load_reg_reg(current_asmdata.CurrAsmList,opcgsize,OS_ADDR,hregister,indexreg);
+        { load table address }
+        reference_reset_symbol(href,tablelabel,0,4);
+        basereg:=cg.getaddressregister(current_asmdata.CurrAsmList);
+        cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,href,basereg);
+        { load table slot, 32-bit sign extended }
+        reference_reset_base(href,basereg,0,4);
+        href.index:=indexreg;
+        href.shiftmode:=SM_LSL;
+        href.shiftimm:=2;
+        jumpreg:=cg.getaddressregister(current_asmdata.CurrAsmList);
+        cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_S32,OS_ADDR,href,jumpreg);
+        { add table address }
+        cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_ADD,OS_ADDR,basereg,jumpreg);
+        { and finally jump }
+        current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_BR,jumpreg));
+        { generate jump table }
+        if not(target_info.system in systems_darwin) then
+          sectype:=sec_rodata
+        else
+          begin
+            { on Mac OS X, dead code stripping ("smart linking") happens based on
+              global symbols: every global/static symbol (symbols that do not
+              start with "L") marks the start of a new "subsection" that is
+              discarded by the linker if there are no references to this symbol.
+              This means that if you put the jump table in the rodata section, it
+              will become part of the block of data associated with the previous
+              non-L-label in the rodata section and stay or be thrown away
+              depending on whether that block of data is referenced. Therefore,
+              jump tables must be added in the code section and since aktlocaldata
+              is inserted right after the routine, it will become part of the
+              same subsection that contains the routine's code }
+            sectype:=sec_code;
+          end;
+        new_section(current_procinfo.aktlocaldata,sectype,current_procinfo.procdef.mangledname,4);
+        if target_info.system in systems_darwin then
+          begin
+            { additionally, these tables are now marked via ".data_region jt32"
+              and ".end_data_region" }
+            current_procinfo.aktlocaldata.concat(tai_directive.Create(asd_data_region,'jt32'));
+          end;
+        current_procinfo.aktlocaldata.concat(Tai_label.Create(tablelabel));
+        genitem(current_procinfo.aktlocaldata,hp);
+        if target_info.system in systems_darwin then
+          current_procinfo.aktlocaldata.concat(tai_directive.Create(asd_end_data_region,''));
+      end;
+
+
+begin
+   ccasenode:=taarch64casenode;
+end.

+ 5 - 0
compiler/aarch64/ra64con.inc

@@ -64,6 +64,8 @@ NR_W30 = tregister($0104001E);
 NR_X30 = tregister($0105001E);
 NR_WZR = tregister($0104001F);
 NR_XZR = tregister($0105001F);
+NR_WSP = tregister($01040020);
+NR_SP = tregister($01050020);
 NR_B0 = tregister($04010000);
 NR_H0 = tregister($04030000);
 NR_S0 = tregister($04090000);
@@ -225,3 +227,6 @@ NR_S31 = tregister($0409001F);
 NR_D31 = tregister($040a001F);
 NR_Q31 = tregister($0405001F);
 NR_NZCV = tregister($05000000);
+NR_FPCR = tregister($05000001);
+NR_FPSR = tregister($05000002);
+NR_TPIDR_EL0 = tregister($05000003);

+ 162 - 157
compiler/aarch64/ra64dwa.inc

@@ -64,164 +64,169 @@
 30,
 31,
 31,
+31,
+31,
+64,
+64,
+64,
+64,
+64,
+65,
+65,
+65,
+65,
+65,
+66,
+66,
+66,
+66,
+66,
+67,
+67,
+67,
+67,
+67,
+68,
+68,
+68,
+68,
+68,
+69,
+69,
+69,
+69,
+69,
+70,
+70,
+70,
+70,
+70,
+71,
+71,
+71,
+71,
+71,
+72,
+72,
+72,
+72,
+72,
+73,
+73,
+73,
+73,
+73,
+74,
+74,
+74,
+74,
+74,
+75,
+75,
+75,
+75,
+75,
+76,
+76,
+76,
+76,
+76,
+77,
+77,
+77,
+77,
+77,
+78,
+78,
+78,
+78,
+78,
+79,
+79,
+79,
+79,
+79,
+80,
+80,
+80,
+80,
+80,
+81,
+81,
+81,
+81,
+81,
+82,
+82,
+82,
+82,
+82,
+83,
+83,
+83,
+83,
+83,
+84,
+84,
+84,
+84,
+84,
+85,
+85,
+85,
+85,
+85,
+86,
+86,
+86,
+86,
+86,
+87,
+87,
+87,
+87,
+87,
+88,
+88,
+88,
+88,
+88,
+89,
+89,
+89,
+89,
+89,
+90,
+90,
+90,
+90,
+90,
+91,
+91,
+91,
+91,
+91,
+92,
+92,
+92,
+92,
+92,
+93,
+93,
+93,
+93,
+93,
+94,
+94,
+94,
+94,
+94,
+95,
+95,
+95,
+95,
+95,
 0,
 0,
 0,
-0,
-0,
-1,
-1,
-1,
-1,
-1,
-2,
-2,
-2,
-2,
-2,
-3,
-3,
-3,
-3,
-3,
-4,
-4,
-4,
-4,
-4,
-5,
-5,
-5,
-5,
-5,
-6,
-6,
-6,
-6,
-6,
-7,
-7,
-7,
-7,
-7,
-8,
-8,
-8,
-8,
-8,
-9,
-9,
-9,
-9,
-9,
-10,
-10,
-10,
-10,
-10,
-11,
-11,
-11,
-11,
-11,
-12,
-12,
-12,
-12,
-12,
-13,
-13,
-13,
-13,
-13,
-14,
-14,
-14,
-14,
-14,
-15,
-15,
-15,
-15,
-15,
-16,
-16,
-16,
-16,
-16,
-17,
-17,
-17,
-17,
-17,
-18,
-18,
-18,
-18,
-18,
-19,
-19,
-19,
-19,
-19,
-20,
-20,
-20,
-20,
-20,
-21,
-21,
-21,
-21,
-21,
-22,
-22,
-22,
-22,
-22,
-23,
-23,
-23,
-23,
-23,
-24,
-24,
-24,
-24,
-24,
-25,
-25,
-25,
-25,
-25,
-26,
-26,
-26,
-26,
-26,
-27,
-27,
-27,
-27,
-27,
-28,
-28,
-28,
-28,
-28,
-29,
-29,
-29,
-29,
-29,
-30,
-30,
-30,
-30,
-30,
-31,
-31,
-31,
-31,
-31,
 0

+ 1 - 1
compiler/aarch64/ra64nor.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from a64reg.dat }
-226
+231

+ 6 - 1
compiler/aarch64/ra64num.inc

@@ -64,6 +64,8 @@ tregister($0104001E),
 tregister($0105001E),
 tregister($0104001F),
 tregister($0105001F),
+tregister($01040020),
+tregister($01050020),
 tregister($04010000),
 tregister($04030000),
 tregister($04090000),
@@ -224,4 +226,7 @@ tregister($0403001F),
 tregister($0409001F),
 tregister($040a001F),
 tregister($0405001F),
-tregister($05000000)
+tregister($05000000),
+tregister($05000001),
+tregister($05000002),
+tregister($05000003)

+ 101 - 96
compiler/aarch64/ra64rni.inc

@@ -32,6 +32,7 @@
 59,
 61,
 63,
+65,
 2,
 4,
 6,
@@ -64,102 +65,7 @@
 60,
 62,
 64,
-65,
-70,
-75,
-80,
-85,
-90,
-95,
-100,
-105,
-110,
-115,
-120,
-125,
-130,
-135,
-140,
-145,
-150,
-155,
-160,
-165,
-170,
-175,
-180,
-185,
-190,
-195,
-200,
-205,
-210,
-215,
-220,
 66,
-71,
-76,
-81,
-86,
-91,
-96,
-101,
-106,
-111,
-116,
-121,
-126,
-131,
-136,
-141,
-146,
-151,
-156,
-161,
-166,
-171,
-176,
-181,
-186,
-191,
-196,
-201,
-206,
-211,
-216,
-221,
-69,
-74,
-79,
-84,
-89,
-94,
-99,
-104,
-109,
-114,
-119,
-124,
-129,
-134,
-139,
-144,
-149,
-154,
-159,
-164,
-169,
-174,
-179,
-184,
-189,
-194,
-199,
-204,
-209,
-214,
-219,
-224,
 67,
 72,
 77,
@@ -224,4 +130,103 @@
 213,
 218,
 223,
-225
+71,
+76,
+81,
+86,
+91,
+96,
+101,
+106,
+111,
+116,
+121,
+126,
+131,
+136,
+141,
+146,
+151,
+156,
+161,
+166,
+171,
+176,
+181,
+186,
+191,
+196,
+201,
+206,
+211,
+216,
+221,
+226,
+69,
+74,
+79,
+84,
+89,
+94,
+99,
+104,
+109,
+114,
+119,
+124,
+129,
+134,
+139,
+144,
+149,
+154,
+159,
+164,
+169,
+174,
+179,
+184,
+189,
+194,
+199,
+204,
+209,
+214,
+219,
+224,
+70,
+75,
+80,
+85,
+90,
+95,
+100,
+105,
+110,
+115,
+120,
+125,
+130,
+135,
+140,
+145,
+150,
+155,
+160,
+165,
+170,
+175,
+180,
+185,
+190,
+195,
+200,
+205,
+210,
+215,
+220,
+225,
+227,
+228,
+229,
+230

+ 48 - 43
compiler/aarch64/ra64sri.inc

@@ -1,8 +1,39 @@
 { don't edit, this file is generated from a64reg.dat }
 0,
-65,
+67,
+72,
+117,
+122,
+127,
+132,
+137,
+142,
+147,
+152,
+157,
+162,
+77,
+167,
+172,
+177,
+182,
+187,
+192,
+197,
+202,
+207,
+212,
+82,
+217,
+222,
+87,
+92,
+97,
+102,
+107,
+112,
 70,
-115,
+75,
 120,
 125,
 130,
@@ -12,8 +43,8 @@
 150,
 155,
 160,
-75,
 165,
+80,
 170,
 175,
 180,
@@ -23,15 +54,18 @@
 200,
 205,
 210,
-80,
 215,
-220,
 85,
+220,
+225,
 90,
 95,
 100,
 105,
 110,
+115,
+228,
+229,
 68,
 73,
 118,
@@ -64,9 +98,9 @@
 103,
 108,
 113,
-66,
+227,
 71,
-116,
+76,
 121,
 126,
 131,
@@ -76,8 +110,8 @@
 151,
 156,
 161,
-76,
 166,
+81,
 171,
 176,
 181,
@@ -87,16 +121,16 @@
 201,
 206,
 211,
-81,
 216,
-221,
 86,
+221,
+226,
 91,
 96,
 101,
 106,
 111,
-225,
+116,
 69,
 74,
 119,
@@ -129,38 +163,8 @@
 104,
 109,
 114,
-67,
-72,
-117,
-122,
-127,
-132,
-137,
-142,
-147,
-152,
-157,
-162,
-77,
-167,
-172,
-177,
-182,
-187,
-192,
-197,
-202,
-207,
-212,
-82,
-217,
-222,
-87,
-92,
-97,
-102,
-107,
-112,
+66,
+230,
 1,
 3,
 21,
@@ -192,6 +196,7 @@
 15,
 17,
 19,
+65,
 63,
 2,
 4,

+ 162 - 157
compiler/aarch64/ra64sta.inc

@@ -64,164 +64,169 @@
 30,
 31,
 31,
+31,
+31,
+64,
+64,
+64,
+64,
+64,
+65,
+65,
+65,
+65,
+65,
+66,
+66,
+66,
+66,
+66,
+67,
+67,
+67,
+67,
+67,
+68,
+68,
+68,
+68,
+68,
+69,
+69,
+69,
+69,
+69,
+70,
+70,
+70,
+70,
+70,
+71,
+71,
+71,
+71,
+71,
+72,
+72,
+72,
+72,
+72,
+73,
+73,
+73,
+73,
+73,
+74,
+74,
+74,
+74,
+74,
+75,
+75,
+75,
+75,
+75,
+76,
+76,
+76,
+76,
+76,
+77,
+77,
+77,
+77,
+77,
+78,
+78,
+78,
+78,
+78,
+79,
+79,
+79,
+79,
+79,
+80,
+80,
+80,
+80,
+80,
+81,
+81,
+81,
+81,
+81,
+82,
+82,
+82,
+82,
+82,
+83,
+83,
+83,
+83,
+83,
+84,
+84,
+84,
+84,
+84,
+85,
+85,
+85,
+85,
+85,
+86,
+86,
+86,
+86,
+86,
+87,
+87,
+87,
+87,
+87,
+88,
+88,
+88,
+88,
+88,
+89,
+89,
+89,
+89,
+89,
+90,
+90,
+90,
+90,
+90,
+91,
+91,
+91,
+91,
+91,
+92,
+92,
+92,
+92,
+92,
+93,
+93,
+93,
+93,
+93,
+94,
+94,
+94,
+94,
+94,
+95,
+95,
+95,
+95,
+95,
 0,
 0,
 0,
-0,
-0,
-1,
-1,
-1,
-1,
-1,
-2,
-2,
-2,
-2,
-2,
-3,
-3,
-3,
-3,
-3,
-4,
-4,
-4,
-4,
-4,
-5,
-5,
-5,
-5,
-5,
-6,
-6,
-6,
-6,
-6,
-7,
-7,
-7,
-7,
-7,
-8,
-8,
-8,
-8,
-8,
-9,
-9,
-9,
-9,
-9,
-10,
-10,
-10,
-10,
-10,
-11,
-11,
-11,
-11,
-11,
-12,
-12,
-12,
-12,
-12,
-13,
-13,
-13,
-13,
-13,
-14,
-14,
-14,
-14,
-14,
-15,
-15,
-15,
-15,
-15,
-16,
-16,
-16,
-16,
-16,
-17,
-17,
-17,
-17,
-17,
-18,
-18,
-18,
-18,
-18,
-19,
-19,
-19,
-19,
-19,
-20,
-20,
-20,
-20,
-20,
-21,
-21,
-21,
-21,
-21,
-22,
-22,
-22,
-22,
-22,
-23,
-23,
-23,
-23,
-23,
-24,
-24,
-24,
-24,
-24,
-25,
-25,
-25,
-25,
-25,
-26,
-26,
-26,
-26,
-26,
-27,
-27,
-27,
-27,
-27,
-28,
-28,
-28,
-28,
-28,
-29,
-29,
-29,
-29,
-29,
-30,
-30,
-30,
-30,
-30,
-31,
-31,
-31,
-31,
-31,
 0

+ 6 - 1
compiler/aarch64/ra64std.inc

@@ -64,6 +64,8 @@
 'x30',
 'wzr',
 'xzr',
+'wsp',
+'sp',
 'b0',
 'h0',
 's0',
@@ -224,4 +226,7 @@
 's31',
 'd31',
 'q31',
-'nzcv'
+'nzcv',
+'fpcr',
+'fpsr',
+'tpidr_el0'

+ 5 - 0
compiler/aarch64/ra64sup.inc

@@ -64,6 +64,8 @@ RS_W30 = $1E;
 RS_X30 = $1E;
 RS_WZR = $1F;
 RS_XZR = $1F;
+RS_WSP = $20;
+RS_SP = $20;
 RS_B0 = $00;
 RS_H0 = $00;
 RS_S0 = $00;
@@ -225,3 +227,6 @@ RS_S31 = $1F;
 RS_D31 = $1F;
 RS_Q31 = $1F;
 RS_NZCV = $00;
+RS_FPCR = $01;
+RS_FPSR = $02;
+RS_TPIDR_EL0 = $03;

+ 88 - 0
compiler/aarch64/racpu.pas

@@ -0,0 +1,88 @@
+{
+    Copyright (c) 1998-2003 by Carl Eric Codere and Peter Vreman
+    Copyright (c) 2014 by Jonas Maebe
+
+    Handles the common AArch64 assembler reader routines
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit racpu;
+
+{$i fpcdefs.inc}
+
+  interface
+
+    uses
+      cgbase,
+      cpubase,
+      aasmtai,aasmdata,
+      rautils;
+
+    type
+      TAArch64Operand=class(TOperand)
+      end;
+
+      TAArch64Instruction=class(TInstruction)
+        oppostfix : toppostfix;
+        function ConcatInstruction(p:TAsmList) : tai;override;
+        function Is64bit: boolean;
+        function cgsize: tcgsize;
+      end;
+
+  implementation
+
+    uses
+      verbose,
+      aasmcpu;
+
+    function TAArch64Instruction.ConcatInstruction(p:TAsmList) : tai;
+      begin
+        result:=inherited ConcatInstruction(p);
+        taicpu(result).oppostfix:=oppostfix;
+      end;
+
+
+    function TAArch64Instruction.Is64bit: boolean;
+      begin
+        result:=
+          (operands[1].opr.typ=OPR_REGISTER) and
+          (getsubreg(operands[1].opr.reg)=R_SUBQ);
+      end;
+
+    function TAArch64Instruction.cgsize: tcgsize;
+      begin
+        if ops<1 then
+          internalerror(2014122001);
+        if operands[1].opr.typ<>OPR_REGISTER then
+          internalerror(2014122002);
+        result:=reg_cgsize(operands[1].opr.reg);
+        { a 32 bit integer register could actually be 16 or 8 bit }
+        if result=OS_32 then
+          case oppostfix of
+            PF_B:
+              result:=OS_8;
+            PF_SB:
+              result:=OS_S8;
+            PF_H:
+              result:=OS_16;
+            PF_SH:
+              result:=OS_S16;
+          end;
+      end;
+
+
+end.

+ 1053 - 0
compiler/aarch64/racpugas.pas

@@ -0,0 +1,1053 @@
+{
+    Copyright (c) 1998-2002 by Carl Eric Codere and Peter Vreman
+    Copyright (c) 2014 by Jonas Maebe
+
+    Does the parsing for the AArch64 GNU AS styled inline assembler.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+Unit racpugas;
+
+{$i fpcdefs.inc}
+
+  Interface
+
+    uses
+      raatt,racpu,
+      cpubase;
+
+    type
+      taarch64attreader = class(tattreader)
+        actoppostfix : TOpPostfix;
+        function is_asmopcode(const s: string):boolean;override;
+        function is_register(const s:string):boolean;override;
+        procedure handleopcode;override;
+        procedure BuildReference(oper: taarch64operand; is64bit: boolean);
+        procedure BuildOperand(oper: taarch64operand; is64bit: boolean);
+        function TryBuildShifterOp(instr: taarch64instruction; opnr: longint) : boolean;
+        procedure BuildOpCode(instr: taarch64instruction);
+        procedure ReadSym(oper: taarch64operand; is64bit: boolean);
+        procedure ConvertCalljmp(instr: taarch64instruction);
+        function ToConditionCode(const hs: string; is_operand: boolean): tasmcond;
+      end;
+
+
+  Implementation
+
+    uses
+      { helpers }
+      cutils,
+      { global }
+      globtype,verbose,
+      systems,aasmbase,aasmtai,aasmdata,aasmcpu,
+      { symtable }
+      symconst,symsym,
+      procinfo,
+      rabase,rautils,
+      cgbase,cgutils;
+
+
+    function taarch64attreader.is_register(const s:string):boolean;
+      type
+        treg2str = record
+          name : string[3];
+          reg : tregister;
+        end;
+
+      const
+        extraregs : array[0..3] of treg2str = (
+          (name: 'FP' ; reg: NR_FP),
+          (name: 'LR' ; reg: NR_LR),
+          (name: 'IP0'; reg: NR_IP0),
+          (name: 'IP1'; reg: NR_IP1));
+
+      var
+        i : longint;
+
+      begin
+        result:=inherited is_register(s);
+        { reg found?
+          possible aliases are always 2 or 3 chars
+        }
+        if result or not(length(s) in [2,3]) then
+          exit;
+        for i:=low(extraregs) to high(extraregs) do
+          begin
+            if s=extraregs[i].name then
+              begin
+                actasmregister:=extraregs[i].reg;
+                result:=true;
+                actasmtoken:=AS_REGISTER;
+                exit;
+              end;
+          end;
+      end;
+
+
+    procedure taarch64attreader.ReadSym(oper: taarch64operand; is64bit: boolean);
+      var
+         tempstr, mangledname : string;
+         typesize,l,k: aint;
+      begin
+        tempstr:=actasmpattern;
+        Consume(AS_ID);
+        { typecasting? }
+        if (actasmtoken=AS_LPAREN) and
+           SearchType(tempstr,typesize) then
+          begin
+            oper.hastype:=true;
+            Consume(AS_LPAREN);
+            BuildOperand(oper,is64bit);
+            Consume(AS_RPAREN);
+            if oper.opr.typ in [OPR_REFERENCE,OPR_LOCAL] then
+              oper.SetSize(typesize,true);
+          end
+        else
+          if not oper.SetupVar(tempstr,false) then
+            Message1(sym_e_unknown_id,tempstr);
+        { record.field ? }
+        if actasmtoken=AS_DOT then
+          begin
+            BuildRecordOffsetSize(tempstr,l,k,mangledname,false);
+            if (mangledname<>'') then
+              Message(asmr_e_invalid_reference_syntax);
+            inc(oper.opr.ref.offset,l);
+          end;
+      end;
+
+
+    Procedure taarch64attreader.BuildReference(oper: taarch64operand; is64bit: boolean);
+
+      procedure do_error;
+        begin
+          Message(asmr_e_invalid_reference_syntax);
+          RecoverConsume(false);
+        end;
+
+
+      procedure test_end(require_rbracket : boolean);
+        begin
+          if require_rbracket then begin
+            if not(actasmtoken=AS_RBRACKET) then
+              begin
+                do_error;
+                exit;
+              end
+            else
+              Consume(AS_RBRACKET);
+            if (actasmtoken=AS_NOT) then
+              begin
+                oper.opr.ref.addressmode:=AM_PREINDEXED;
+                Consume(AS_NOT);
+              end;
+          end;
+          if not(actasmtoken in [AS_SEPARATOR,AS_end]) then
+            do_error
+          else
+            begin
+{$IFDEF debugasmreader}
+              writeln('TEST_end_FINAL_OK. Created the following ref:');
+              writeln('oper.opr.ref.shiftimm=',oper.opr.ref.shiftimm);
+              writeln('oper.opr.ref.shiftmode=',ord(oper.opr.ref.shiftmode));
+              writeln('oper.opr.ref.index=',ord(oper.opr.ref.index));
+              writeln('oper.opr.ref.base=',ord(oper.opr.ref.base));
+              writeln('oper.opr.ref.signindex=',ord(oper.opr.ref.signindex));
+              writeln('oper.opr.ref.addressmode=',ord(oper.opr.ref.addressmode));
+              writeln;
+{$endIF debugasmreader}
+            end;
+        end;
+
+
+      function is_shifter_ref_operation(var a : tshiftmode) : boolean;
+        begin
+          a:=SM_NONE;
+          if (actasmpattern='LSL') then
+            a:=SM_LSL
+          else if (actasmpattern='UXTW') then
+            a:=SM_UXTW
+          else if (actasmpattern='SXTW') then
+            a:=SM_SXTW
+          else if (actasmpattern='SXTX') then
+            a:=SM_SXTX;
+          is_shifter_ref_operation:=not(a=SM_NONE);
+        end;
+
+
+      procedure read_index_shift(require_rbracket : boolean);
+        var
+          shift: aint;
+        begin
+          case actasmtoken of
+            AS_COMMA :
+              begin
+                Consume(AS_COMMA);
+                if not(actasmtoken=AS_ID) then
+                  do_error;
+                if is_shifter_ref_operation(oper.opr.ref.shiftmode) then
+                  begin
+                    Consume(actasmtoken);
+                    if actasmtoken=AS_HASH then
+                      begin
+                        Consume(AS_HASH);
+                        shift:=BuildConstExpression(false,true);
+                        if not(shift in [0,2+ord(is64bit)]) then
+                          do_error;
+                        oper.opr.ref.shiftimm:=shift;
+                        test_end(require_rbracket);
+                      end;
+                   end
+                 else
+                   begin
+                     do_error;
+                     exit;
+                   end;
+              end;
+            AS_RBRACKET :
+              if require_rbracket then
+                test_end(require_rbracket)
+              else
+                begin
+                  do_error;
+                  exit;
+                end;
+            AS_SEPARATOR,AS_END :
+              if not require_rbracket then
+                test_end(false)
+               else
+                 do_error;
+            else
+              begin
+                do_error;
+                exit;
+              end;
+          end;
+        end;
+
+
+      procedure read_index(require_rbracket : boolean);
+        var
+          recname : string;
+          o_int,s_int : aint;
+        begin
+          case actasmtoken of
+            AS_REGISTER :
+              begin
+                if getsupreg(actasmregister)=RS_XZR then
+                  Message1(asmr_e_invalid_ref_register,actasmpattern);
+                oper.opr.ref.index:=actasmregister;
+                Consume(AS_REGISTER);
+                read_index_shift(require_rbracket);
+                exit;
+              end;
+            AS_HASH : // constant
+              begin
+                Consume(AS_HASH);
+(*
+                if actasmtoken=AS_COLON then
+                  begin
+                    consume(AS_COLON);
+                    { GNU-style lower 12 bits of address of non-GOT-based
+                      access }
+                    if (actasmpattern='LO12') then
+                      begin
+                        consume(actasmtoken);
+                        consume(AS_COLON);
+                        if not oper.SetupVar(actasmpattern,false) then
+                          begin
+                            do_error;
+                            exit
+                          end;
+                        consume(AS_ID);
+                        oper.opr.ref.refaddr:=addr_??? (not gotpageoffset);
+                      end
+                    else
+                      begin
+                        do_error;
+                        exit
+                      end;
+                  end
+                else
+*)
+                  begin
+                    o_int:=BuildConstExpression(false,true);
+                    inc(oper.opr.ref.offset,o_int);
+                  end;
+                test_end(require_rbracket);
+                exit;
+              end;
+            AS_ID :
+              begin
+                recname:=actasmpattern;
+                Consume(AS_ID);
+                { Apple-style got page offset }
+                if actasmtoken=AS_AT then
+                  begin
+                    if not oper.SetupVar(recname,false) then
+                      begin
+                        do_error;
+                        exit
+                      end;
+                    consume(AS_AT);
+                    if actasmpattern='GOTPAGEOFF' then
+                      begin
+                        consume(actasmtoken);
+                        oper.opr.ref.refaddr:=addr_gotpageoffset;
+                      end
+                    else if actasmpattern='PAGEOFF' then
+                      begin
+                        consume(actasmtoken);
+                        oper.opr.ref.refaddr:=addr_pageoffset;
+                      end
+                    else
+                      begin
+                        do_error;
+                        exit
+                      end;
+                  end
+                else
+                  begin
+                    BuildRecordOffsetSize(recname,o_int,s_int,recname,false);
+                    inc(oper.opr.ref.offset,o_int);
+                  end;
+                test_end(require_rbracket);
+                exit;
+              end;
+            AS_AT:
+              begin
+                do_error;
+                exit;
+              end;
+            AS_RBRACKET :
+              begin
+                if require_rbracket then
+                  begin
+                    test_end(require_rbracket);
+                    exit;
+                  end
+                else
+                  begin
+                    do_error; // unexpected rbracket
+                    exit;
+                  end;
+              end;
+            AS_SEPARATOR,AS_end :
+              begin
+                if not require_rbracket then
+                  begin
+                    test_end(false);
+                    exit;
+                  end
+                else
+                  begin
+                    do_error;
+                    exit;
+                  end;
+              end;
+            else
+              begin
+                // unexpected token
+                do_error;
+                exit;
+              end;
+          end; // case
+        end;
+
+
+      procedure try_prepostindexed;
+        begin
+          Consume(AS_RBRACKET);
+          case actasmtoken of
+            AS_COMMA :
+              begin // post-indexed
+                Consume(AS_COMMA);
+                oper.opr.ref.addressmode:=AM_POSTINDEXED;
+                read_index(false);
+                exit;
+              end;
+            AS_NOT :
+              begin   // pre-indexed
+                Consume(AS_NOT);
+                oper.opr.ref.addressmode:=AM_PREINDEXED;
+                test_end(false);
+                exit;
+              end;
+            else
+              begin
+                test_end(false);
+                exit;
+              end;
+          end; // case
+        end;
+
+      begin
+        Consume(AS_LBRACKET);
+        oper.opr.ref.addressmode:=AM_OFFSET; // assume "neither PRE nor POST inc"
+        if actasmtoken=AS_REGISTER then
+          begin
+            if getsupreg(actasmregister)=RS_XZR then
+              Message1(asmr_e_invalid_ref_register,actasmpattern);
+            oper.opr.ref.base:=actasmregister;
+            Consume(AS_REGISTER);
+            case actasmtoken of
+              AS_RBRACKET :
+                begin
+                  try_prepostindexed;
+                  exit;
+                end;
+              AS_COMMA :
+                begin
+                  Consume(AS_COMMA);
+                  read_index(true);
+                  exit;
+                end;
+              else
+                begin
+                  Message(asmr_e_invalid_reference_syntax);
+                  RecoverConsume(false);
+                end;
+            end;
+          end
+        else
+          Begin
+            case actasmtoken of
+              AS_ID :
+                begin
+                  { TODO: local variables and parameters }
+                  Message(asmr_e_invalid_reference_syntax);
+                  RecoverConsume(false);
+                  exit;
+                end;
+              else
+                begin // elsecase
+                  Message(asmr_e_invalid_reference_syntax);
+                  RecoverConsume(false);
+                  exit;
+                end;
+            end;
+          end;
+      end;
+
+
+    function taarch64attreader.TryBuildShifterOp(instr: taarch64instruction; opnr: longint): boolean;
+
+      procedure handlepara(sm : tshiftmode);
+        begin
+          consume(AS_ID);
+          fillchar(instr.operands[opnr].opr,sizeof(instr.operands[opnr].opr),0);
+          instr.operands[opnr].opr.typ:=OPR_SHIFTEROP;
+          instr.operands[opnr].opr.shifterop.shiftmode:=sm;
+          if (sm=SM_LSL) or
+             (actasmtoken=AS_HASH) then
+            begin
+              consume(AS_HASH);
+              instr.operands[opnr].opr.shifterop.shiftimm:=BuildConstExpression(false,false);
+            end;
+        end;
+
+      const
+        shiftmode2str: array[SM_LSL..SM_SXTX] of string[4] =
+          ('LSL','LSR','ASR',
+           'UXTB','UXTH','UXTW','UXTX',
+           'SXTB','SXTH','SXTW','SXTX');
+      var
+        sm: tshiftmode;
+        i: longint;
+        usessp,
+        useszr: boolean;
+      begin
+        result:=false;
+        if (actasmtoken=AS_ID) then
+          begin
+            for sm:=low(shiftmode2str) to high(shiftmode2str) do
+              if actasmpattern=shiftmode2str[sm] then
+                begin
+                  handlepara(sm);
+                  if instr.operands[1].opr.typ=OPR_REGISTER then
+                    begin
+                      { the possible shifter ops depend on whether this
+                        instruction uses sp and/or zr }
+                      usessp:=false;
+                      useszr:=false;
+                      for i:=low(instr.operands) to pred(opnr) do
+                        begin
+                          if (instr.operands[1].opr.typ=OPR_REGISTER) then
+                            case getsupreg(instr.operands[1].opr.reg) of
+                              RS_XZR:
+                                useszr:=true;
+                              RS_SP:
+                                usessp:=true;
+                            end;
+                        end;
+                      result:=valid_shifter_operand(instr.opcode,useszr,usessp,instr.Is64bit,sm,instr.operands[opnr].opr.shifterop.shiftimm);
+                    end
+                end;
+          end;
+      end;
+
+
+    function taarch64attreader.ToConditionCode(const hs: string; is_operand: boolean): tasmcond;
+      begin
+        case actopcode of
+          A_CSEL,A_CSINC,A_CSINV,A_CSNEG,A_CSET,A_CSETM,
+          A_CINC,A_CINV,A_CNEG,A_CCMN,A_CCMP,
+          A_B:
+            begin
+              { search for condition, conditions are always 2 chars }
+              if (is_operand<>(actopcode=A_B)) and
+                 (length(hs)>1) then
+                begin
+                  { workaround for DFA bug }
+                  result:=low(tasmcond);
+                  for result:=low(tasmcond) to high(tasmcond) do
+                    begin
+                      if hs=uppercond2str[result] then
+                        exit;
+                    end;
+                end;
+            end;
+        end;
+        result:=C_None;;
+      end;
+
+
+    Procedure taarch64attreader.BuildOperand(oper: taarch64operand; is64bit: boolean);
+      var
+        expr: string;
+        typesize, l: aint;
+
+        procedure MaybeAddGotAddrMode;
+          begin
+            if actasmtoken=AS_AT then
+              begin
+                consume(AS_AT);
+                if actasmpattern='GOTPAGE' then
+                  oper.opr.ref.refaddr:=addr_gotpage
+                else if actasmpattern='GOTPAGEOFF' then
+                  oper.opr.ref.refaddr:=addr_gotpageoffset
+                else if actasmpattern='PAGE' then
+                  oper.opr.ref.refaddr:=addr_page
+                else if actasmpattern='PAGEOFF' then
+                  oper.opr.ref.refaddr:=addr_pageoffset
+                else
+                  Message(asmr_e_expr_illegal);
+                consume(actasmtoken);
+              end
+            else
+              oper.opr.ref.refaddr:=addr_pic;
+          end;
+
+        procedure AddLabelOperand(hl:tasmlabel);
+          begin
+            if not(actasmtoken in [AS_PLUS,AS_MINUS,AS_LPAREN]) and
+               is_calljmp(actopcode) then
+             begin
+               oper.opr.typ:=OPR_SYMBOL;
+               oper.opr.symbol:=hl;
+             end
+            else if (actopcode=A_ADR) or
+               (actopcode=A_ADRP) then
+              begin
+                oper.InitRef;
+                MaybeAddGotAddrMode;
+                oper.opr.ref.symbol:=hl;
+                if (actasmtoken in [AS_PLUS, AS_MINUS]) then
+                  begin
+                    l:=BuildConstExpression(true,false);
+                    oper.opr.ref.offset:=l;
+                  end;
+              end;
+          end;
+
+
+        procedure MaybeRecordOffset;
+          var
+            mangledname: string;
+            hasdot  : boolean;
+            l,
+            toffset,
+            tsize   : aint;
+          begin
+            if not(actasmtoken in [AS_DOT,AS_PLUS,AS_MINUS]) then
+              exit;
+            l:=0;
+            hasdot:=(actasmtoken=AS_DOT);
+            if hasdot then
+              begin
+                if expr<>'' then
+                  begin
+                    BuildRecordOffsetSize(expr,toffset,tsize,mangledname,false);
+                    if (oper.opr.typ<>OPR_CONSTANT) and
+                       (mangledname<>'') then
+                      Message(asmr_e_wrong_sym_type);
+                    inc(l,toffset);
+                    oper.SetSize(tsize,true);
+                  end;
+              end;
+            if actasmtoken in [AS_PLUS,AS_MINUS] then
+              inc(l,BuildConstExpression(true,false));
+            case oper.opr.typ of
+              OPR_LOCAL :
+                begin
+                  { don't allow direct access to fields of parameters, because that
+                    will generate buggy code. Allow it only for explicit typecasting }
+                  if hasdot and
+                     (not oper.hastype) and
+                     (tabstractnormalvarsym(oper.opr.localsym).owner.symtabletype=parasymtable) and
+                     (current_procinfo.procdef.proccalloption<>pocall_register) then
+                    Message(asmr_e_cannot_access_field_directly_for_parameters);
+                  inc(oper.opr.localsymofs,l)
+                end;
+              OPR_CONSTANT :
+                inc(oper.opr.val,l);
+              OPR_REFERENCE :
+                if (mangledname<>'') then
+                  begin
+                    if (oper.opr.val<>0) then
+                      Message(asmr_e_wrong_sym_type);
+                    oper.opr.typ:=OPR_SYMBOL;
+                    oper.opr.symbol:=current_asmdata.RefAsmSymbol(mangledname);
+                  end
+                else
+                  inc(oper.opr.val,l);
+              OPR_SYMBOL:
+                Message(asmr_e_invalid_symbol_ref);
+              else
+                internalerror(200309221);
+            end;
+          end;
+
+
+        function MaybeBuildReference(is64bit: boolean):boolean;
+          { Try to create a reference, if not a reference is found then false
+            is returned }
+          begin
+            MaybeBuildReference:=true;
+            case actasmtoken of
+              AS_INTNUM,
+              AS_MINUS,
+              AS_PLUS:
+                Begin
+                  oper.opr.ref.offset:=BuildConstExpression(True,False);
+                  if actasmtoken<>AS_LPAREN then
+                    Message(asmr_e_invalid_reference_syntax)
+                  else
+                    BuildReference(oper,is64bit);
+                end;
+              AS_LPAREN:
+                BuildReference(oper,is64bit);
+              AS_ID: { only a variable is allowed ... }
+                Begin
+                  ReadSym(oper,is64bit);
+                  case actasmtoken of
+                    AS_end,
+                    AS_SEPARATOR,
+                    AS_COMMA: ;
+                    AS_LPAREN:
+                      BuildReference(oper,is64bit);
+                  else
+                    Begin
+                      Message(asmr_e_invalid_reference_syntax);
+                      Consume(actasmtoken);
+                    end;
+                  end; {end case }
+                end;
+              else
+               MaybeBuildReference:=false;
+            end; { end case }
+          end;
+
+
+      var
+        tempreg: tregister;
+        hl: tasmlabel;
+        icond: tasmcond;
+      Begin
+        expr:='';
+        case actasmtoken of
+          AS_LBRACKET: { Memory reference or constant expression }
+            Begin
+              oper.InitRef;
+              BuildReference(oper,is64bit);
+            end;
+
+          AS_HASH: { Constant expression  }
+            Begin
+              Consume(AS_HASH);
+              BuildConstantOperand(oper);
+            end;
+
+          (*
+          AS_INTNUM,
+          AS_MINUS,
+          AS_PLUS:
+            Begin
+              { Constant memory offset }
+              { This must absolutely be followed by (  }
+              oper.InitRef;
+              oper.opr.ref.offset:=BuildConstExpression(True,False);
+              if actasmtoken<>AS_LPAREN then
+                begin
+                  ofs:=oper.opr.ref.offset;
+                  BuildConstantOperand(oper);
+                  inc(oper.opr.val,ofs);
+                end
+              else
+                BuildReference(oper,is64bit);
+            end;
+          *)
+          AS_ID: { A constant expression, or a Variable ref.  }
+            Begin
+              { Condition code? }
+              icond:=ToConditionCode(actasmpattern,true);
+              if icond<>C_None then
+                begin
+                  oper.opr.typ:=OPR_COND;
+                  oper.opr.cc:=icond;
+                  consume(AS_ID);
+                end
+              else
+              { Local Label ? }
+              if is_locallabel(actasmpattern) then
+               begin
+                 CreateLocalLabel(actasmpattern,hl,false);
+                 Consume(AS_ID);
+                 AddLabelOperand(hl);
+               end
+              else
+               { Check for label }
+               if SearchLabel(actasmpattern,hl,false) then
+                 begin
+                   Consume(AS_ID);
+                   AddLabelOperand(hl);
+                 end
+              else
+               { probably a variable or normal expression }
+               { or a procedure (such as in CALL ID)      }
+               begin
+                 { is it a constant ? }
+                 if SearchIConstant(actasmpattern,l) then
+                  begin
+                    if not (oper.opr.typ in [OPR_NONE,OPR_CONSTANT]) then
+                      Message(asmr_e_invalid_operand_type);
+                    BuildConstantOperand(oper);
+                  end
+                 else
+                  begin
+                    expr:=actasmpattern;
+                    Consume(AS_ID);
+                    { typecasting? }
+                    if (actasmtoken=AS_LPAREN) and
+                       SearchType(expr,typesize) then
+                     begin
+                       oper.hastype:=true;
+                       Consume(AS_LPAREN);
+                       BuildOperand(oper,is64bit);
+                       Consume(AS_RPAREN);
+                       if oper.opr.typ in [OPR_REFERENCE,OPR_LOCAL] then
+                         oper.SetSize(typesize,true);
+                     end
+                    else
+                     begin
+                       if not(oper.SetupVar(expr,false)) then
+                        Begin
+                          { look for special symbols ... }
+                          if expr= '__HIGH' then
+                            begin
+                              consume(AS_LPAREN);
+                              if not oper.setupvar('high'+actasmpattern,false) then
+                                Message1(sym_e_unknown_id,'high'+actasmpattern);
+                              consume(AS_ID);
+                              consume(AS_RPAREN);
+                            end
+                          else
+                           if expr = '__RESULT' then
+                            oper.SetUpResult
+                          else
+                           if expr = '__SELF' then
+                            oper.SetupSelf
+                          else
+                           if expr = '__OLDEBP' then
+                            oper.SetupOldEBP
+                          else
+                            Message1(sym_e_unknown_id,expr);
+                        end
+                       else
+                         MaybeAddGotAddrMode;
+                     end;
+                  end;
+                  if actasmtoken=AS_DOT then
+                    MaybeRecordOffset;
+                  { add a constant expression? }
+                  if (actasmtoken=AS_PLUS) then
+                   begin
+                     l:=BuildConstExpression(true,false);
+                     case oper.opr.typ of
+                       OPR_CONSTANT :
+                         inc(oper.opr.val,l);
+                       OPR_LOCAL :
+                         inc(oper.opr.localsymofs,l);
+                       OPR_REFERENCE :
+                         inc(oper.opr.ref.offset,l);
+                       else
+                         internalerror(200309202);
+                     end;
+                   end
+               end;
+              { Do we have a indexing reference, then parse it also }
+              if actasmtoken=AS_LPAREN then
+                BuildReference(oper,is64bit);
+            end;
+
+          { Register, a variable reference or a constant reference  }
+          AS_REGISTER:
+            Begin
+              { save the type of register used. }
+              tempreg:=actasmregister;
+              Consume(AS_REGISTER);
+              if (actasmtoken in [AS_end,AS_SEPARATOR,AS_COMMA]) then
+                Begin
+                  if not (oper.opr.typ in [OPR_NONE,OPR_REGISTER]) then
+                    Message(asmr_e_invalid_operand_type);
+                  oper.opr.typ:=OPR_REGISTER;
+                  oper.opr.reg:=tempreg;
+                end
+              else
+                Message(asmr_e_syn_operand);
+            end;
+
+          AS_end,
+          AS_SEPARATOR,
+          AS_COMMA: ;
+        else
+          Begin
+            Message(asmr_e_syn_operand);
+            Consume(actasmtoken);
+          end;
+        end; { end case }
+      end;
+
+{*****************************************************************************
+                                taarch64attreader
+*****************************************************************************}
+
+    procedure taarch64attreader.BuildOpCode(instr: taarch64instruction);
+      var
+        operandnum : longint;
+      Begin
+        { opcode }
+        if (actasmtoken<>AS_OPCODE) then
+         Begin
+           Message(asmr_e_invalid_or_missing_opcode);
+           RecoverConsume(true);
+           exit;
+         end;
+        { Fill the instr object with the current state }
+        with instr do
+          begin
+            Opcode:=ActOpcode;
+            condition:=ActCondition;
+            oppostfix:=actoppostfix;
+          end;
+        Consume(AS_OPCODE);
+
+        { We are reading operands, so opcode will be an AS_ID }
+        operandnum:=1;
+        { Zero operand opcode ?  }
+        if actasmtoken in [AS_SEPARATOR,AS_end] then
+         begin
+           instr.Ops:=0;
+           exit;
+         end;
+        { Read the operands }
+        repeat
+          case actasmtoken of
+            AS_COMMA: { Operand delimiter }
+              Begin
+                { operandnum and not operandnum+1, because tinstruction is
+                  one-based and taicpu is zero-based)
+                }
+                if can_be_shifter_operand(instr.opcode,operandnum) then
+                  begin
+                    Consume(AS_COMMA);
+                    if not TryBuildShifterOp(instr,operandnum+1) then
+                      Message(asmr_e_illegal_shifterop_syntax);
+                    Inc(operandnum);
+                  end
+                else
+                  begin
+                    if operandnum>Max_Operands then
+                      Message(asmr_e_too_many_operands)
+                    else
+                      Inc(operandnum);
+                    Consume(AS_COMMA);
+                  end;
+              end;
+            AS_SEPARATOR,
+            AS_end : { End of asm operands for this opcode  }
+              begin
+                break;
+              end;
+          else
+            begin
+              BuildOperand(taarch64operand(instr.operands[operandnum]),instr.Is64bit);
+              instr.Ops:=operandnum;
+              if instr.operands[operandnum].opr.typ=OPR_REFERENCE then
+                if simple_ref_type(instr.opcode,instr.cgsize,instr.oppostfix,instr.operands[operandnum].opr.ref)<>sr_simple then
+                  Message(asmr_e_invalid_reference_syntax);
+                ;
+            end;
+          end; { end case }
+        until false;
+      end;
+
+
+    function taarch64attreader.is_asmopcode(const s: string):boolean;
+
+      const
+        { sorted by length so longer postfixes will match first }
+        postfix2strsorted : array[1..7] of string[3] = (
+          'SB','SH','SW',
+          'B','H','W',
+          'S');
+
+        postfixsorted : array[1..7] of TOpPostfix = (
+          PF_SB,PF_SH,PF_SW,
+          PF_B,PF_H,PF_W,
+          PF_S);
+
+      var
+        j  : longint;
+        hs : string;
+        maxlen : longint;
+        icond : tasmcond;
+      Begin
+        { making s a value parameter would break other assembler readers }
+        hs:=s;
+        is_asmopcode:=false;
+
+        { clear opcode }
+        actopcode:=A_None;
+        actcondition:=C_None;
+
+        { b.cond ? }
+        if (length(hs)=4) and
+           (hs[1]='B') and
+           (hs[2]='.') then
+          begin
+            actopcode:=A_B;
+            actasmtoken:=AS_OPCODE;
+            actcondition:=ToConditionCode(copy(hs,3,length(actasmpattern)-2),false);
+            if actcondition<>C_None then
+              is_asmopcode:=true;
+            exit;
+          end;
+
+        maxlen:=max(length(hs),7);
+        actopcode:=A_NONE;
+        for j:=maxlen downto 1 do
+          begin
+            actopcode:=tasmop(PtrUInt(iasmops.Find(copy(hs,1,j))));
+            if actopcode<>A_NONE then
+              begin
+                actasmtoken:=AS_OPCODE;
+                { strip op code }
+                delete(hs,1,j);
+                break;
+              end;
+          end;
+        if actopcode=A_NONE then
+          exit;
+
+        { check for postfix }
+        if length(hs)>0 then
+          begin
+            for j:=low(postfixsorted) to high(postfixsorted) do
+              begin
+                if copy(hs,1,length(postfix2strsorted[j]))=postfix2strsorted[j] then
+                  begin
+                    actoppostfix:=postfixsorted[j];
+                    { strip postfix }
+                    delete(hs,1,length(postfix2strsorted[j]));
+                    break;
+                  end;
+              end;
+          end;
+        { if we stripped all postfixes, it's a valid opcode }
+        is_asmopcode:=length(hs)=0;
+      end;
+
+
+    procedure taarch64attreader.ConvertCalljmp(instr: taarch64instruction);
+      var
+        newopr : toprrec;
+      begin
+        if instr.Operands[1].opr.typ=OPR_REFERENCE then
+          begin
+            newopr.typ:=OPR_SYMBOL;
+            newopr.symbol:=instr.Operands[1].opr.ref.symbol;
+            newopr.symofs:=instr.Operands[1].opr.ref.offset;
+            if (instr.Operands[1].opr.ref.base<>NR_NO) or
+              (instr.Operands[1].opr.ref.index<>NR_NO) or
+              (instr.Operands[1].opr.ref.refaddr<>addr_pic) then
+              Message(asmr_e_syn_operand);
+            instr.Operands[1].opr:=newopr;
+          end;
+      end;
+
+    procedure taarch64attreader.handleopcode;
+      var
+        instr: taarch64instruction;
+      begin
+        instr:=taarch64instruction.Create(taarch64operand);
+        BuildOpcode(instr);
+        if is_calljmp(instr.opcode) then
+          ConvertCalljmp(instr);
+        {
+        instr.AddReferenceSizes;
+        instr.SetInstructionOpsize;
+        instr.CheckOperandSizes;
+        }
+        instr.ConcatInstruction(curlist);
+        instr.Free;
+        actoppostfix:=PF_None;
+      end;
+
+
+{*****************************************************************************
+                                     Initialize
+*****************************************************************************}
+
+const
+  asmmode_arm_att_info : tasmmodeinfo =
+          (
+            id    : asmmode_arm_gas;
+            idtxt : 'GAS';
+            casmreader : taarch64attreader;
+          );
+
+  asmmode_arm_standard_info : tasmmodeinfo =
+          (
+            id    : asmmode_standard;
+            idtxt : 'STANDARD';
+            casmreader : taarch64attreader;
+          );
+
+initialization
+  RegisterAsmMode(asmmode_arm_att_info);
+  RegisterAsmMode(asmmode_arm_standard_info);
+end.

+ 171 - 0
compiler/aarch64/rgcpu.pas

@@ -0,0 +1,171 @@
+{
+    Copyright (c) 1998-2002 by Florian Klaempfl
+
+    This unit implements the SPARC specific class for the register
+    allocator
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************}
+unit rgcpu;
+
+{$i fpcdefs.inc}
+
+  interface
+
+    uses
+      aasmbase,aasmcpu,aasmtai,aasmdata,
+      cgbase,cgutils,
+      cpubase,
+      globtype,
+      rgobj;
+
+    type
+      trgcpu=class(trgobj)
+        procedure do_spill_read(list:TAsmList;pos:tai;const spilltemp:treference;tempreg:tregister);override;
+        procedure do_spill_written(list:TAsmList;pos:tai;const spilltemp:treference;tempreg:tregister);override;
+       protected
+        procedure do_spill_op(list: tasmlist; op: tasmop; pos: tai; const spilltemp: treference; tempreg: tregister);
+      end;
+
+      trgintcpu=class(trgcpu)
+        procedure add_cpu_interferences(p: tai); override;
+      end;
+
+
+implementation
+
+    uses
+      verbose,cutils,
+      cgobj;
+
+    procedure trgcpu.do_spill_read(list:TAsmList;pos:tai;const spilltemp:treference;tempreg:tregister);
+      begin
+        do_spill_op(list,A_LDR,pos,spilltemp,tempreg);
+      end;
+
+
+    procedure trgcpu.do_spill_written(list:TAsmList;pos:tai;const spilltemp:treference;tempreg:tregister);
+      begin
+        do_spill_op(list,A_STR,pos,spilltemp,tempreg);
+      end;
+
+
+    procedure trgcpu.do_spill_op(list: tasmlist; op: tasmop; pos: tai; const spilltemp: treference; tempreg: tregister);
+      var
+        helpins  : tai;
+        tmpref   : treference;
+        helplist : TAsmList;
+        hreg     : tregister;
+        isload   : boolean;
+      begin
+        isload:=op=A_LDR;
+        { offset out of range for regular load/store? }
+        if simple_ref_type(op,reg_cgsize(tempreg),PF_None,spilltemp)<>sr_simple then
+          begin
+            helplist:=TAsmList.create;
+
+            if getregtype(tempreg)=R_INTREGISTER then
+              hreg:=tempreg
+            else
+              hreg:=cg.getaddressregister(helplist);
+
+            cg.a_load_const_reg(helplist,OS_ADDR,spilltemp.offset,hreg);
+            reference_reset_base(tmpref,spilltemp.base,0,sizeof(pint));
+            tmpref.index:=hreg;
+            if isload then
+              helpins:=spilling_create_load(tmpref,tempreg)
+            else
+              helpins:=spilling_create_store(tempreg,tmpref);
+            helplist.concat(helpins);
+            add_cpu_interferences(helpins);
+            list.insertlistafter(pos,helplist);
+            helplist.free;
+          end
+        else if isload then
+          inherited do_spill_read(list,pos,spilltemp,tempreg)
+        else
+          inherited do_spill_written(list,pos,spilltemp,tempreg)
+      end;
+
+
+    procedure trgintcpu.add_cpu_interferences(p: tai);
+     var
+       i, j: longint;
+     begin
+       if p.typ=ait_instruction then
+         begin
+           { add interferences for instructions that can have SP as a register
+             operand }
+           case taicpu(p).opcode of
+             A_MOV:
+               { all operands can be SP }
+               exit;
+             A_ADD,
+             A_SUB,
+             A_CMP,
+             A_CMN:
+               { ok as destination or first source in immediate or extended
+                 register form }
+               if (taicpu(p).oper[taicpu(p).ops-1]^.typ<>top_shifterop) or
+                  valid_shifter_operand(taicpu(p).opcode,false,true,
+                    reg_cgsize(taicpu(p).oper[0]^.reg) in [OS_64,OS_S64],
+                    taicpu(p).oper[taicpu(p).ops-1]^.shifterop^.shiftmode,
+                    taicpu(p).oper[taicpu(p).ops-1]^.shifterop^.shiftimm) then
+                 begin
+                   if taicpu(p).oper[taicpu(p).ops-1]^.typ=top_shifterop then
+                     i:=taicpu(p).ops-2
+                   else
+                     i:=taicpu(p).ops-1;
+                   if (taicpu(p).oper[i]^.typ=top_reg) then
+                     add_edge(getsupreg(taicpu(p).oper[i]^.reg),RS_SP);
+                   exit;
+                 end;
+             A_AND,
+             A_EOR,
+             A_ORR,
+             A_TST:
+               { ok in immediate form }
+               if taicpu(p).oper[taicpu(p).ops-1]^.typ=top_const then
+                 exit;
+           end;
+           { add interferences for other registers }
+           for i:=0 to taicpu(p).ops-1 do
+             begin
+               case taicpu(p).oper[i]^.typ of
+                 top_reg:
+                   if getregtype(taicpu(p).oper[i]^.reg)=R_INTREGISTER then
+                     add_edge(getsupreg(taicpu(p).oper[i]^.reg),RS_SP);
+                 top_ref:
+                   begin
+                     { sp can always be base, never be index }
+                     if taicpu(p).oper[i]^.ref^.index<>NR_NO then
+                       add_edge(getsupreg(taicpu(p).oper[i]^.ref^.index),RS_SP);
+                     { in case of write back, the base register must be
+                       different from the loaded/stored register }
+                     if (taicpu(p).oper[i]^.ref^.addressmode in [AM_PREINDEXED,AM_POSTINDEXED]) and
+                        (taicpu(p).oper[i]^.ref^.base<>NR_NO) then
+                       begin
+                         for j:=pred(i) downto 0 do
+                           if taicpu(p).oper[j]^.typ=TOP_REG then
+                             add_edge(getsupreg(taicpu(p).oper[j]^.reg),getsupreg(taicpu(p).oper[i]^.ref^.base));
+                       end;
+                   end;
+               end;
+             end;
+         end;
+     end;
+
+end.

+ 21 - 4
compiler/aasmtai.pas

@@ -219,11 +219,11 @@ interface
 {$ifdef arm}
        { ARM only }
        ,top_regset
-       ,top_conditioncode
        ,top_modeflags
        ,top_specialreg
 {$endif arm}
 {$if defined(arm) or defined(aarch64)}
+       ,top_conditioncode
        ,top_shifterop
 {$endif defined(arm) or defined(aarch64)}
 {$ifdef m68k}
@@ -266,12 +266,12 @@ interface
           top_local  : (localoper:plocaloper);
       {$ifdef arm}
           top_regset : (regset:^tcpuregisterset; regtyp: tregistertype; subreg: tsubregister; usermode: boolean);
-          top_conditioncode : (cc : TAsmCond);
           top_modeflags : (modeflags : tcpumodeflags);
           top_specialreg : (specialreg:tregister; specialflags:tspecialregflags);
       {$endif arm}
       {$if defined(arm) or defined(aarch64)}
           top_shifterop : (shifterop : pshifterop);
+          top_conditioncode : (cc : TAsmCond);
       {$endif defined(arm) or defined(aarch64)}
       {$ifdef m68k}
           top_regset : (dataregset,addrregset,fpuregset:^tcpuregisterset);
@@ -331,7 +331,7 @@ interface
         mark_Position
       );
 
-      TRegAllocType = (ra_alloc,ra_dealloc,ra_sync,ra_resize);
+      TRegAllocType = (ra_alloc,ra_dealloc,ra_sync,ra_resize,ra_markused);
 
       TStabType = (stab_stabs,stab_stabn,stab_stabd,
                    { AIX/XCOFF stab types }
@@ -354,6 +354,8 @@ interface
         asd_jclass,asd_jinterface,asd_jsuper,asd_jfield,asd_jlimit,asd_jline,
         { .ent/.end for MIPS and Alpha }
         asd_ent,asd_ent_end,
+        { supported by recent clang-based assemblers for data-in-code  }
+        asd_data_region, asd_end_data_region,
         { .thumb_func for ARM }
         asd_thumb_func
       );
@@ -368,7 +370,7 @@ interface
 
 
     const
-      regallocstr : array[tregalloctype] of string[10]=('allocated','released','sync','resized');
+      regallocstr : array[tregalloctype] of string[10]=('allocated','released','sync','resized','used');
       tempallocstr : array[boolean] of string[10]=('released','allocated');
       stabtypestr : array[TStabType] of string[8]=(
         'stabs','stabn','stabd',
@@ -385,6 +387,8 @@ interface
         'class','interface','super','field','limit','line',
         { .ent/.end for MIPS and Alpha }
         'ent','end',
+        { supported by recent clang-based assemblers for data-in-code }
+        'data_region','end_data_region',
         { .thumb_func for ARM }
         'thumb_func'
       );
@@ -711,6 +715,7 @@ interface
           constructor dealloc(r : tregister;ainstr:tai);
           constructor sync(r : tregister);
           constructor resize(r : tregister);
+          constructor markused(r : tregister);
           constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
           procedure ppuwrite(ppufile:tcompilerppufile);override;
        end;
@@ -2421,6 +2426,15 @@ implementation
       end;
 
 
+    constructor tai_regalloc.markused(r : tregister);
+      begin
+        inherited create;
+        typ:=ait_regalloc;
+        ratype:=ra_markused;
+        reg:=r;
+      end;
+
+
     constructor tai_regalloc.ppuload(t:taitype;ppufile:tcompilerppufile);
       begin
         inherited ppuload(t,ppufile);
@@ -2553,6 +2567,9 @@ implementation
 {$ifdef ARM}
               and not(r.base=NR_R15)
 {$endif ARM}
+{$ifdef aarch64}
+              and not(r.refaddr in [addr_full,addr_gotpageoffset,addr_gotpage])
+{$endif aarch64}
               then
               internalerror(200502052);
             typ:=top_ref;

+ 4 - 1
compiler/aggas.pas

@@ -532,6 +532,8 @@ implementation
          system_powerpc64_darwin,
          system_x86_64_darwin,
          system_arm_darwin,
+         system_aarch64_darwin,
+         system_x86_64_iphonesim,
          system_powerpc_aix,
          system_powerpc64_aix:
            begin
@@ -567,7 +569,8 @@ implementation
                     AsmWriteln('__TEXT,__picsymbolstub4,symbol_stubs,none,16')
                   else
                     AsmWriteln('__TEXT,__symbol_stub4,symbol_stubs,none,12')
-                { darwin/x86-64 uses RIP-based GOT addressing, no symbol stubs }
+                { darwin/(x86-64/AArch64) uses PC-based GOT addressing, no
+                  explicit symbol stubs }
                 else
                   internalerror(2006031101);
               end;

+ 19 - 5
compiler/aoptobj.pas

@@ -361,7 +361,7 @@ Unit AoptObj;
 
     function JumpTargetOp(ai: taicpu): poper; inline;
       begin
-{$ifdef MIPS}
+{$if defined(MIPS)}
         { MIPS branches can have 1,2 or 3 operands, target label is the last one. }
         result:=ai.oper[ai.ops-1];
 {$else MIPS}
@@ -1179,9 +1179,9 @@ Unit AoptObj;
     function IsJumpToLabel(hp: taicpu): boolean;
       begin
         result:=(hp.opcode=aopt_uncondjmp) and
-{$ifdef arm}
+{$if defined(arm) or defined(aarch64)}
           (hp.condition=c_None) and
-{$endif arm}
+{$endif arm or aarch64}
           (JumpTargetOp(hp)^.typ = top_ref) and
           (JumpTargetOp(hp)^.ref^.symbol is TAsmLabel);
       end;
@@ -1260,6 +1260,14 @@ Unit AoptObj;
                     exit;
                   if not GetFinalDestination(taicpu(p1),succ(level)) then
                     exit;
+{$if defined(aarch64)}
+                  { can't have conditional branches to
+                    global labels on AArch64, because the
+                    offset may become too big }
+                  if not(taicpu(hp).condition in [C_None,C_AL,C_NV]) and
+                     (tasmlabel(JumpTargetOp(taicpu(p1))^.ref^.symbol).bind<>AB_LOCAL) then
+                    exit;
+{$endif aarch64}
                   tasmlabel(JumpTargetOp(hp)^.ref^.symbol).decrefs;
                   JumpTargetOp(hp)^.ref^.symbol:=JumpTargetOp(taicpu(p1))^.ref^.symbol;
                   tasmlabel(JumpTargetOp(hp)^.ref^.symbol).increfs;
@@ -1398,9 +1406,15 @@ Unit AoptObj;
                                     FindLabel(tasmlabel(JumpTargetOp(taicpu(p))^.ref^.symbol), hp2) then
                                   begin
                                     if (taicpu(p).opcode=aopt_condjmp)
-  {$ifdef arm}
+  {$if defined(arm) or defined(aarch64)}
                                       and (taicpu(p).condition<>C_None)
-  {$endif arm}
+  {$endif arm or aarch64}
+  {$if defined(aarch64)}
+                                      { can't have conditional branches to
+                                        global labels on AArch64, because the
+                                        offset may become too big }
+                                      and (tasmlabel(JumpTargetOp(taicpu(hp1))^.ref^.symbol).bind=AB_LOCAL)
+  {$endif aarch64}
                                     then
                                       begin
                                         taicpu(p).condition:=inverse_cond(taicpu(p).condition);

+ 45 - 12
compiler/arm/aasmcpu.pas

@@ -908,6 +908,20 @@ implementation
               limit:=254;
         end;
 
+      function is_case_dispatch(hp: taicpu): boolean;
+        begin
+          result:=
+            ((taicpu(hp).opcode in [A_ADD,A_LDR]) and
+             not(GenerateThumbCode or GenerateThumb2Code) and
+             (taicpu(hp).oper[0]^.typ=top_reg) and
+             (taicpu(hp).oper[0]^.reg=NR_PC)) or
+             ((taicpu(hp).opcode=A_MOV) and (GenerateThumbCode) and
+              (taicpu(hp).oper[0]^.typ=top_reg) and
+              (taicpu(hp).oper[0]^.reg=NR_PC)) or
+             (taicpu(hp).opcode=A_TBH) or
+             (taicpu(hp).opcode=A_TBB);
+        end;
+
       var
         curinspos,
         penalty,
@@ -916,7 +930,8 @@ implementation
         currentsize,
         extradataoffset,
         curop : longint;
-        curtai : tai;
+        curtai,
+        inserttai : tai;
         ai_label : tai_label;
         curdatatai,hp,hp2 : tai;
         curdata : TAsmList;
@@ -1076,15 +1091,11 @@ implementation
                 case taicpu(hp).opcode of
                   A_MOV,
                   A_LDR,
-                  A_ADD:
+                  A_ADD,
+                  A_TBH,
+                  A_TBB:
                     { approximation if we hit a case jump table }
-                    if ((taicpu(hp).opcode in [A_ADD,A_LDR]) and not(GenerateThumbCode or GenerateThumb2Code) and
-                       (taicpu(hp).oper[0]^.typ=top_reg) and
-                      (taicpu(hp).oper[0]^.reg=NR_PC)) or
-                      ((taicpu(hp).opcode=A_MOV) and (GenerateThumbCode) and
-                       (taicpu(hp).oper[0]^.typ=top_reg) and
-                       (taicpu(hp).oper[0]^.reg=NR_PC))
-                       then
+                    if is_case_dispatch(taicpu(hp)) then
                       begin
                         penalty:=multiplier;
                         hp:=tai(hp.next);
@@ -1178,12 +1189,34 @@ implementation
                 else
                   limit:=1016;
 
+                { if this is an add/tbh/tbb-based jumptable, go back to the
+                  previous instruction, because inserting data between the
+                  dispatch instruction and the table would mess up the
+                  addresses }
+                inserttai:=curtai;
+                if is_case_dispatch(taicpu(inserttai)) and
+                   ((taicpu(inserttai).opcode=A_ADD) or
+                    (taicpu(inserttai).opcode=A_TBH) or
+                    (taicpu(inserttai).opcode=A_TBB)) then
+                  begin
+                    repeat
+                      inserttai:=tai(inserttai.previous);
+                    until inserttai.typ=ait_instruction;
+                    { if it's an add-based jump table, then also skip the
+                      pc-relative load }
+                    if taicpu(curtai).opcode=A_ADD then
+                      repeat
+                        inserttai:=tai(inserttai.previous);
+                      until inserttai.typ=ait_instruction;
+                  end
+                else
+
                 { on arm thumb, insert the data always after all labels etc. following an instruction so it
                   is prevent that a bxx yyy; bl xxx; yyyy: sequence gets separated ( we never insert on arm thumb after
                   bxx) and the distance of bxx gets too long }
                 if GenerateThumbCode then
-                  while assigned(tai(curtai.Next)) and (tai(curtai.Next).typ in SkipInstr+[ait_label]) do
-                    curtai:=tai(curtai.next);
+                  while assigned(tai(inserttai.Next)) and (tai(inserttai.Next).typ in SkipInstr+[ait_label]) do
+                    inserttai:=tai(inserttai.next);
 
                 doinsert:=false;
                 current_asmdata.getjumplabel(l);
@@ -1210,7 +1243,7 @@ implementation
                   is then equal curdata.last.previous) we could over see one
                   instruction }
                 hp:=tai(curdata.Last);
-                list.insertlistafter(curtai,curdata);
+                list.insertlistafter(inserttai,curdata);
                 curtai:=hp;
               end
             else

+ 1 - 2
compiler/arm/agarmgas.pas

@@ -114,8 +114,7 @@ unit agarmgas;
           result:='-march='+cputype_to_gas_march[current_settings.cputype]+' -mthumb -mthumb-interwork '+result
         else if GenerateThumbCode then
           result:='-march='+cputype_to_gas_march[current_settings.cputype]+' -mthumb -mthumb-interwork '+result
-        // EDSP instructions in RTL require armv5te at least to not generate error
-        else if current_settings.cputype >= cpu_armv5te then
+        else
           result:='-march='+cputype_to_gas_march[current_settings.cputype]+' '+result;
 
         if target_info.abi = abi_eabihf then

+ 5 - 3
compiler/arm/cgcpu.pas

@@ -104,7 +104,7 @@ unit cgcpu;
 
         procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src,dst: tregister;shuffle : pmmshuffle); override;
         { Transform unsupported methods into Internal errors }
-        procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: TCGSize; src, dst: TRegister); override;
+        procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister); override;
 
         { try to generate optimized 32 Bit multiplication, returns true if successful generated }
         function try_optimized_mul32_const_reg_reg(list: TAsmList; a: tcgint; src, dst: tregister) : boolean;
@@ -640,7 +640,9 @@ unit cgcpu;
         sym : TAsmSymbol;
       begin
         { check not really correct: should only be used for non-Thumb cpus }
-        if CPUARM_HAS_BLX_LABEL in cpu_capabilities[current_settings.cputype] then
+        if (CPUARM_HAS_BLX_LABEL in cpu_capabilities[current_settings.cputype]) and
+          { WinCE GNU AS (not sure if this applies in general) does not support BLX imm }
+          (target_info.system<>system_arm_wince) then
           branchopcode:=A_BLX
         else
           branchopcode:=A_BL;
@@ -1704,7 +1706,7 @@ unit cgcpu;
       end;
 
 
-    procedure tbasecgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: TCGSize; src, dst: TRegister);
+    procedure tbasecgarm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: TCGSize; src, dst: TRegister);
       begin
         if reverse then
           begin

+ 6 - 0
compiler/arm/cpuinfo.pas

@@ -337,6 +337,9 @@ Type
       ct_xmc4502x768,
       ct_xmc4504x512,
 
+      { Allwinner }
+      ct_allwinner_a20,
+
       // generic Thumb2 target
       ct_thumb2bare
      );
@@ -702,6 +705,9 @@ Const
       (controllertypestr:'XMC4502X768';   controllerunitstr:'XMC4502'; flashbase:$08000000;	flashsize:$000C0000;	srambase:$20000000;	sramsize:$00010000),
       (controllertypestr:'XMC4504X512';   controllerunitstr:'XMC4504'; flashbase:$08000000;	flashsize:$00080000;	srambase:$20000000;	sramsize:$00010000),
 
+      { Allwinner }
+      (controllertypestr:'ALLWINNER_A20'; controllerunitstr:'ALLWINNER_A20';     flashbase:$00000000; flashsize:$00000000;  srambase:$40000000; sramsize:$80000000),
+
       { Bare bones }
       (controllertypestr:'THUMB2_BARE';	controllerunitstr:'THUMB2_BARE';	flashbase:$00000000;	flashsize:$00002000;	srambase:$20000000;	sramsize:$00000400)
     );

+ 4 - 3
compiler/arm/narmmem.pas

@@ -27,6 +27,7 @@ interface
 
     uses
       globtype,
+      symtype,
       cgbase,cpubase,nmem,ncgmem;
 
     type
@@ -36,7 +37,7 @@ interface
 
 
       tarmvecnode = class(tcgvecnode)
-        procedure update_reference_reg_mul(maybe_const_reg: tregister; l: aint);override;
+        procedure update_reference_reg_mul(maybe_const_reg: tregister; regsize: tdef; l: aint);override;
       end;
 
 implementation
@@ -70,7 +71,7 @@ implementation
                              TARMVECNODE
 *****************************************************************************}
 
-     procedure tarmvecnode.update_reference_reg_mul(maybe_const_reg:tregister;l:aint);
+     procedure tarmvecnode.update_reference_reg_mul(maybe_const_reg: tregister; regsize: tdef; l: aint);
        var
          hreg: tregister;
          hl : longint;
@@ -79,7 +80,7 @@ implementation
             (GenerateThumbCode) or
             { simple constant? }
             (l=1) or ispowerof2(l,hl) or ispowerof2(l+1,hl) or ispowerof2(l-1,hl) then
-           inherited update_reference_reg_mul(maybe_const_reg,l)
+           inherited update_reference_reg_mul(maybe_const_reg,regsize,l)
          else if (location.reference.base<>NR_NO) then
            begin
              hreg:=cg.getaddressregister(current_asmdata.CurrAsmList);

+ 1 - 1
compiler/arm/raarmgas.pas

@@ -1286,7 +1286,7 @@ Unit raarmgas;
                   end;
               end;
           end;
-        maxlen:=max(length(hs),5);
+        maxlen:=min(length(hs),5);
         actopcode:=A_NONE;
         j2:=maxlen;
         hs2:=hs;

+ 2 - 1
compiler/assemble.pas

@@ -83,6 +83,7 @@ interface
         lastsectype : TAsmSectionType;
         procedure WriteSourceLine(hp: tailineinfo);
         procedure WriteTempalloc(hp: tai_tempalloc);
+        Function DoPipe:boolean;
       public
         {# Returns the complete path and executable name of the assembler
            program.
@@ -271,7 +272,7 @@ Implementation
                                  TExternalAssembler
 *****************************************************************************}
 
-    Function DoPipe:boolean;
+    Function TExternalAssembler.DoPipe:boolean;
       begin
         DoPipe:=(cs_asm_pipe in current_settings.globalswitches) and
                 (([cs_asm_extern,cs_asm_leave,cs_link_on_target] * current_settings.globalswitches) = []) and

+ 13 - 0
compiler/cgbase.pas

@@ -101,6 +101,12 @@ interface
          ,addr_dgroup      // the data segment group
          ,addr_seg         // used for getting the segment of an object, e.g. 'mov ax, SEG symbol'
          {$ENDIF}
+         {$IFDEF AARCH64}
+         ,addr_page
+         ,addr_pageoffset
+         ,addr_gotpage
+         ,addr_gotpageoffset
+         {$ENDIF AARCH64}
          );
 
 
@@ -329,6 +335,13 @@ interface
           OS_M8,OS_M16,OS_M32,OS_M64,OS_M128,OS_M256,OS_M8,OS_M16,OS_M32,
           OS_M64,OS_M128,OS_M256);
 
+       tcgsize2signed : array[tcgsize] of tcgsize = (OS_NO,
+          OS_S8,OS_S16,OS_S32,OS_S64,OS_S128,OS_S8,OS_S16,OS_S32,OS_S64,OS_S128,
+          OS_F32,OS_F64,OS_F80,OS_C64,OS_F128,
+          OS_M8,OS_M16,OS_M32,OS_M64,OS_M128,OS_M256,OS_M8,OS_M16,OS_M32,
+          OS_M64,OS_M128,OS_M256);
+
+
        tcgloc2str : array[TCGLoc] of string[12] = (
             'LOC_INVALID',
             'LOC_VOID',

+ 2 - 2
compiler/cghlcpu.pas

@@ -65,14 +65,14 @@ uses
       procedure a_cmp_reg_reg_label(list: TAsmList; size: tcgsize; cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel); override;
       procedure a_call_reg(list: TAsmList; reg: tregister); override;
       procedure a_call_name(list: TAsmList; const s: string; weak: boolean); override;
-      procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tcgsize; src, dst: TRegister); override;
+      procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); override;
     end;
 
 implementation
 
    { thlbasecgcpu }
 
-    procedure thlbasecgcpu.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tcgsize; src, dst: TRegister);
+    procedure thlbasecgcpu.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister);
       begin
         internalerror(2012042801);
       end;

+ 19 - 8
compiler/cgobj.pas

@@ -247,7 +247,7 @@ unit cgobj;
           procedure a_loadaddr_ref_reg(list : TAsmList;const ref : treference;r : tregister);virtual; abstract;
 
           { bit scan instructions }
-          procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tcgsize; src, dst: TRegister); virtual;
+          procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister); virtual;
 
           { Multiplication with doubling result size.
             dstlo or dsthi may be NR_NO, in which case corresponding half of result is discarded. }
@@ -865,7 +865,14 @@ implementation
          ref : treference;
          tmpreg : tregister;
       begin
-         cgpara.check_simple_location;
+         if assigned(cgpara.location^.next) then
+           begin
+             tg.gethltemp(list,cgpara.def,cgpara.def.size,tt_persistent,ref);
+             a_load_reg_ref(list,size,size,r,ref);
+             a_load_ref_cgpara(list,size,ref,cgpara);
+             tg.ungettemp(list,ref);
+             exit;
+           end;
          paramanager.alloccgpara(list,cgpara);
          if cgpara.location^.shiftval<0 then
            begin
@@ -1333,8 +1340,10 @@ implementation
                       dec(tmpref.offset)
                     else
                       inc(tmpref.offset);
-                    a_load_ref_reg(list,OS_8,OS_16,tmpref,register);
-                    a_op_reg_reg(list,OP_OR,OS_16,tmpreg,register);
+                    tmpreg2:=makeregsize(list,register,OS_16);
+                    a_load_ref_reg(list,OS_8,OS_16,tmpref,tmpreg2);
+                    a_op_reg_reg(list,OP_OR,OS_16,tmpreg,tmpreg2);
+                    a_load_reg_reg(list,OS_16,tosize,tmpreg2,register);
                   end;
               OS_32,OS_S32:
                 if ref.alignment=2 then
@@ -1348,8 +1357,10 @@ implementation
                       dec(tmpref.offset,2)
                     else
                       inc(tmpref.offset,2);
-                    a_load_ref_reg(list,OS_16,OS_32,tmpref,register);
-                    a_op_reg_reg(list,OP_OR,OS_32,tmpreg,register);
+                    tmpreg2:=makeregsize(list,register,OS_32);
+                    a_load_ref_reg(list,OS_16,OS_32,tmpref,tmpreg2);
+                    a_op_reg_reg(list,OP_OR,OS_32,tmpreg,tmpreg2);
+                    a_load_reg_reg(list,OS_32,tosize,tmpreg2,register);
                   end
                 else
                   begin
@@ -1368,7 +1379,7 @@ implementation
                         a_load_ref_reg(list,OS_8,OS_32,tmpref,tmpreg2);
                         a_op_reg_reg(list,OP_OR,OS_32,tmpreg2,tmpreg);
                       end;
-                    a_load_reg_reg(list,OS_32,OS_32,tmpreg,register);
+                    a_load_reg_reg(list,OS_32,tosize,tmpreg,register);
                   end
               else
                 a_load_ref_reg(list,fromsize,tosize,tmpref,register);
@@ -2517,7 +2528,7 @@ implementation
       end;
 
 
-    procedure tcg.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tcgsize; src, dst: TRegister);
+    procedure tcg.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tcgsize; src, dst: TRegister);
       begin
         internalerror(2014070601);
       end;

+ 6 - 0
compiler/cgutils.pas

@@ -63,6 +63,12 @@ unit cgutils;
          addressmode : taddressmode;
          shiftmode   : tshiftmode;
 {$endif arm}
+{$ifdef aarch64}
+         symboldata  : tlinkedlistitem;
+         shiftimm    : byte;
+         addressmode : taddressmode;
+         shiftmode   : tshiftmode;
+{$endif aarch64}
 {$ifdef avr}
          addressmode : taddressmode;
 {$endif avr}

+ 1 - 1
compiler/finput.pas

@@ -454,7 +454,7 @@ uses
         fileopen:=false;
         try
           f:=CFileStreamClass.Create(filename,fmOpenRead);
-          fileopen:=true;
+          fileopen:=CStreamError=0;
         except
         end;
       end;

+ 1 - 0
compiler/fpcdefs.inc

@@ -242,6 +242,7 @@
   {$define cpurox}
   {$define cputargethasfixedstack}
   {$define cpurefshaveindexreg}
+  {$define SUPPORT_GET_FRAME}
 {$endif aarch64}
 
 {$IFDEF MACOS}

+ 3 - 1
compiler/fppu.pas

@@ -23,6 +23,8 @@ unit fppu;
 
 {$i fpcdefs.inc}
 
+{ $define DEBUG_UNIT_CRC_CHANGES}
+
 { close ppufiles on system that are
   short on file handles like DOS system PM }
 {$ifdef GO32V2}
@@ -1477,7 +1479,7 @@ var
         { we can now derefence all pointers to the implementation parts }
         tstoredsymtable(globalsymtable).derefimpl;
         if assigned(localsymtable) then
-          tstoredsymtable(localsymtable).derefimpl;
+            tstoredsymtable(localsymtable).derefimpl;
 
          { read whole program optimisation-related information }
          wpoinfo:=tunitwpoinfo.ppuload(ppufile);

+ 2 - 2
compiler/globals.pas

@@ -445,8 +445,8 @@ interface
         fputype : fpu_hard;
   {$endif sparc}
   {$ifdef arm}
-        cputype : cpu_armv3;
-        optimizecputype : cpu_armv3;
+        cputype : cpu_armv4;
+        optimizecputype : cpu_armv4;
         fputype : fpu_fpa;
   {$endif arm}
   {$ifdef x86_64}

+ 3 - 3
compiler/hlcg2ll.pas

@@ -171,7 +171,7 @@ unit hlcg2ll;
           procedure a_loadaddr_ref_reg(list : TAsmList;fromsize, tosize : tdef;const ref : treference;r : tregister);override;
 
           { bit scan instructions }
-          procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tdef; src, dst: tregister); override;
+          procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tdef; src, dst: tregister); override;
 
           { fpu move instructions }
           procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister); override;
@@ -586,9 +586,9 @@ implementation
       cg.a_loadaddr_ref_reg(list,ref,r);
     end;
 
-  procedure thlcg2ll.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tdef; src, dst: tregister);
+  procedure thlcg2ll.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tdef; src, dst: tregister);
     begin
-      cg.a_bit_scan_reg_reg(list,reverse,def_cgsize(size),src,dst);
+      cg.a_bit_scan_reg_reg(list,reverse,def_cgsize(srcsize),def_cgsize(dstsize),src,dst);
     end;
 
   procedure thlcg2ll.a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister);

+ 19 - 7
compiler/hlcgobj.pas

@@ -301,7 +301,7 @@ unit hlcgobj;
          public
 
           { bit scan instructions (still need transformation to thlcgobj) }
-          procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tdef; src, dst: tregister); virtual; abstract;
+          procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tdef; src, dst: tregister); virtual; abstract;
 
           { fpu move instructions }
           procedure a_loadfpu_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister); virtual; abstract;
@@ -536,9 +536,9 @@ unit hlcgobj;
          public
 
           procedure gen_load_para_value(list:TAsmList);virtual;
-         protected
           { helpers called by gen_load_para_value }
           procedure g_copyvalueparas(p:TObject;arg:pointer);virtual;
+         protected
           procedure gen_loadfpu_loc_cgpara(list: TAsmList; size: tdef; const l: tlocation;const cgpara: tcgpara;locintsize: longint);virtual;
           procedure init_paras(p:TObject;arg:pointer);
          protected
@@ -4523,7 +4523,11 @@ implementation
          if (tparavarsym(p).varspez=vs_value) then
           begin
             include(current_procinfo.flags,pi_needs_implicit_finally);
-            location_get_data_ref(list,tparavarsym(p).vardef,tparavarsym(p).localloc,href,is_open_array(tparavarsym(p).vardef),sizeof(pint));
+            location_get_data_ref(list,tparavarsym(p).vardef,tparavarsym(p).localloc,href,
+              is_open_array(tparavarsym(p).vardef) or
+              ((target_info.system in systems_caller_copy_addr_value_para) and
+               paramanager.push_addr_param(vs_value,tparavarsym(p).vardef,current_procinfo.procdef.proccalloption)),
+              sizeof(pint));
             if is_open_array(tparavarsym(p).vardef) then
               begin
                 if paramanager.push_high_param(tparavarsym(p).varspez,tparavarsym(p).vardef,current_procinfo.procdef.proccalloption) then
@@ -4543,7 +4547,8 @@ implementation
           end;
        end;
       { open arrays can contain elements requiring init/final code, so the else has been removed here }
-      if (tparavarsym(p).varspez=vs_value) and
+      if not(target_info.system in systems_caller_copy_addr_value_para) and
+         (tparavarsym(p).varspez=vs_value) and
          (is_open_array(tparavarsym(p).vardef) or
           is_array_of_const(tparavarsym(p).vardef)) then
         begin
@@ -4581,7 +4586,11 @@ implementation
                  if not((tparavarsym(p).vardef.typ=variantdef) and
                    paramanager.push_addr_param(tparavarsym(p).varspez,tparavarsym(p).vardef,current_procinfo.procdef.proccalloption)) then
                    begin
-                     location_get_data_ref(list,tparavarsym(p).vardef,tparavarsym(p).initialloc,href,is_open_array(tparavarsym(p).vardef),sizeof(pint));
+                     location_get_data_ref(list,tparavarsym(p).vardef,tparavarsym(p).initialloc,href,
+                       is_open_array(tparavarsym(p).vardef) or
+                       ((target_info.system in systems_caller_copy_addr_value_para) and
+                        paramanager.push_addr_param(vs_value,tparavarsym(p).vardef,current_procinfo.procdef.proccalloption)),
+                       sizeof(pint));
                      if is_open_array(tparavarsym(p).vardef) then
                        begin
                          if paramanager.push_high_param(tparavarsym(p).varspez,tparavarsym(p).vardef,current_procinfo.procdef.proccalloption) then
@@ -4682,8 +4691,11 @@ implementation
     begin
       list:=TAsmList(arg);
       if (tsym(p).typ=paravarsym) and
-         (tparavarsym(p).varspez=vs_value) and
-        (paramanager.push_addr_param(tparavarsym(p).varspez,tparavarsym(p).vardef,current_procinfo.procdef.proccalloption)) then
+         ((vo_has_local_copy in tparavarsym(p).varoptions) or
+          (not(target_info.system in systems_caller_copy_addr_value_para) and
+           (is_open_array(tparavarsym(p).vardef) or
+            is_array_of_const(tparavarsym(p).vardef)) and
+           (tparavarsym(p).varspez=vs_value))) then
         begin
           { we have no idea about the alignment at the caller side }
           location_get_data_ref(list,tparavarsym(p).vardef,tparavarsym(p).initialloc,href,true,1);

+ 9 - 8
compiler/i386/cpuinfo.pas

@@ -151,7 +151,8 @@ Const
 
 type
    tcpuflags =
-      (CPUX86_HAS_SSEUNIT,
+      (CPUX86_HAS_CMOV,
+       CPUX86_HAS_SSEUNIT,
        CPUX86_HAS_BMI1,
        CPUX86_HAS_BMI2,
        CPUX86_HAS_POPCNT,
@@ -167,13 +168,13 @@ type
      { cpu_none      } [],
      { cpu_386       } [],
      { cpu_Pentium   } [],
-     { cpu_Pentium2  } [],
-     { cpu_Pentium3  } [CPUX86_HAS_SSEUNIT],
-     { cpu_Pentium4  } [CPUX86_HAS_SSEUNIT],
-     { cpu_PentiumM  } [CPUX86_HAS_SSEUNIT],
-     { cpu_core_i    } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT],
-     { cpu_core_avx  } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT],
-     { cpu_core_avx2 } [CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT,CPUX86_HAS_BMI1,CPUX86_HAS_BMI2,CPUX86_HAS_LZCNT,CPUX86_HAS_MOVBE,CPUX86_HAS_FMA]
+     { cpu_Pentium2  } [CPUX86_HAS_CMOV],
+     { cpu_Pentium3  } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT],
+     { cpu_Pentium4  } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT],
+     { cpu_PentiumM  } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT],
+     { cpu_core_i    } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT],
+     { cpu_core_avx  } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT],
+     { cpu_core_avx2 } [CPUX86_HAS_CMOV,CPUX86_HAS_SSEUNIT,CPUX86_HAS_POPCNT,CPUX86_HAS_AVXUNIT,CPUX86_HAS_BMI1,CPUX86_HAS_BMI2,CPUX86_HAS_LZCNT,CPUX86_HAS_MOVBE,CPUX86_HAS_FMA]
    );
 
 

+ 8 - 0
compiler/i386/i386att.inc

@@ -956,6 +956,14 @@
 'vpsravd',
 'vpsrlvd',
 'vpsrlvq',
+'vgatherdpd',
+'vgatherdps',
+'vgatherqpd',
+'vgatherqps',
+'vpgatherdd',
+'vpgatherdq',
+'vpgatherqd',
+'vpgatherqq',
 'vfmadd132pd',
 'vfmadd213pd',
 'vfmadd231pd',

+ 8 - 0
compiler/i386/i386atts.inc

@@ -1016,5 +1016,13 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufNONE
 );

+ 8 - 0
compiler/i386/i386int.inc

@@ -956,6 +956,14 @@
 'vpsravd',
 'vpsrlvd',
 'vpsrlvq',
+'vgatherdpd',
+'vgatherdps',
+'vgatherqpd',
+'vgatherqps',
+'vpgatherdd',
+'vpgatherdq',
+'vpgatherqd',
+'vpgatherqq',
 'vfmadd132pd',
 'vfmadd213pd',
 'vfmadd231pd',

+ 1 - 1
compiler/i386/i386nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
-1926;
+1942;

+ 8 - 0
compiler/i386/i386op.inc

@@ -956,6 +956,14 @@ A_VPSLLVQ,
 A_VPSRAVD,
 A_VPSRLVD,
 A_VPSRLVQ,
+A_VGATHERDPD,
+A_VGATHERDPS,
+A_VGATHERQPD,
+A_VGATHERQPS,
+A_VPGATHERDD,
+A_VPGATHERDQ,
+A_VPGATHERQD,
+A_VPGATHERQQ,
 A_VFMADD132PD,
 A_VFMADD213PD,
 A_VFMADD231PD,

+ 9 - 1
compiler/i386/i386prop.inc

@@ -910,7 +910,7 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
-(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
@@ -956,6 +956,14 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Mop3, Ch_Rop2, Ch_Rop1)),

+ 112 - 0
compiler/i386/i386tab.inc

@@ -12796,6 +12796,118 @@
     code    : #241#242#243#249#1#69#61#80;
     flags   : if_avx2
   ),
+  (
+    opcode  : A_VGATHERDPD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERDPD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_xmem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERDPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERDPS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem32,ot_ymmreg,ot_none);
+    code    : #241#242#244#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_ymem32,ot_xmmreg,ot_none);
+    code    : #241#242#244#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem32,ot_ymmreg,ot_none);
+    code    : #241#242#244#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDQ;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_xmem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#145#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_ymem32,ot_xmmreg,ot_none);
+    code    : #241#242#244#249#1#145#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#145#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQQ;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#145#62#72;
+    flags   : if_avx2
+  ),
   (
     opcode  : A_VFMADD132PD;
     ops     : 3;

+ 1 - 1
compiler/i386/popt386.pas

@@ -2076,7 +2076,7 @@ begin
                             end;
                         end;
                     end;
-                  if (current_settings.cputype>=cpu_Pentium2) then
+                  if CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype] then
                     begin
                        { check for
                               jCC   xxx

+ 8 - 0
compiler/i8086/i8086att.inc

@@ -956,6 +956,14 @@
 'vpsravd',
 'vpsrlvd',
 'vpsrlvq',
+'vgatherdpd',
+'vgatherdps',
+'vgatherqpd',
+'vgatherqps',
+'vpgatherdd',
+'vpgatherdq',
+'vpgatherqd',
+'vpgatherqq',
 'add4s',
 'brkem',
 'clr1',

+ 8 - 0
compiler/i8086/i8086atts.inc

@@ -1030,5 +1030,13 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufNONE
 );

+ 8 - 0
compiler/i8086/i8086int.inc

@@ -956,6 +956,14 @@
 'vpsravd',
 'vpsrlvd',
 'vpsrlvq',
+'vgatherdpd',
+'vgatherdps',
+'vgatherqpd',
+'vgatherqps',
+'vpgatherdd',
+'vpgatherdq',
+'vpgatherqd',
+'vpgatherqq',
 'add4s',
 'brkem',
 'clr1',

+ 1 - 1
compiler/i8086/i8086nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
-1954;
+1970;

+ 8 - 0
compiler/i8086/i8086op.inc

@@ -956,6 +956,14 @@ A_VPSLLVQ,
 A_VPSRAVD,
 A_VPSRLVD,
 A_VPSRLVQ,
+A_VGATHERDPD,
+A_VGATHERDPS,
+A_VGATHERQPD,
+A_VGATHERQPS,
+A_VPGATHERDD,
+A_VPGATHERDQ,
+A_VPGATHERQD,
+A_VPGATHERQQ,
 A_ADD4S,
 A_BRKEM,
 A_CLR1,

+ 9 - 1
compiler/i8086/i8086prop.inc

@@ -910,7 +910,7 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
-(Ch: (Ch_Wop2, Ch_Rop1, Ch_None)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
@@ -958,6 +958,14 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_Mop2, Ch_Rop1, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),

+ 112 - 0
compiler/i8086/i8086tab.inc

@@ -12796,6 +12796,118 @@
     code    : #241#242#243#249#1#69#61#80;
     flags   : if_avx2
   ),
+  (
+    opcode  : A_VGATHERDPD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERDPD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_xmem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERDPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERDPS;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem32,ot_ymmreg,ot_none);
+    code    : #241#242#244#249#1#146#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VGATHERQPS;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_ymem32,ot_xmmreg,ot_none);
+    code    : #241#242#244#249#1#147#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDD;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem32,ot_ymmreg,ot_none);
+    code    : #241#242#244#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERDQ;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_xmem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#144#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem32,ot_xmmreg,ot_none);
+    code    : #241#242#249#1#145#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQD;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_ymem32,ot_xmmreg,ot_none);
+    code    : #241#242#244#249#1#145#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQQ;
+    ops     : 3;
+    optypes : (ot_xmmreg,ot_xmem64,ot_xmmreg,ot_none);
+    code    : #241#242#243#249#1#145#62#72;
+    flags   : if_avx2
+  ),
+  (
+    opcode  : A_VPGATHERQQ;
+    ops     : 3;
+    optypes : (ot_ymmreg,ot_ymem64,ot_ymmreg,ot_none);
+    code    : #241#242#243#244#249#1#145#62#72;
+    flags   : if_avx2
+  ),
   (
     opcode  : A_ADD4S;
     ops     : 0;

+ 5 - 4
compiler/i8086/n8086mem.pas

@@ -27,6 +27,7 @@ interface
 
     uses
       globtype,
+      symtype,
       cgbase,cpuinfo,cpubase,
       node,nmem,ncgmem,nx86mem,ni86mem;
 
@@ -45,7 +46,7 @@ interface
        ti8086vecnode = class(tcgvecnode)
         protected
          function first_arraydef: tnode;override;
-         procedure update_reference_reg_mul(maybe_const_reg:tregister;l:aint);override;
+         procedure update_reference_reg_mul(maybe_const_reg: tregister; regsize: tdef; l: aint);override;
        end;
 
 implementation
@@ -53,7 +54,7 @@ implementation
     uses
       systems,globals,constexp,
       cutils,verbose,
-      symbase,symconst,symdef,symtable,symtype,symsym,symx86,symcpu,
+      symbase,symconst,symdef,symtable,symsym,symx86,symcpu,
       parabase,paramgr,
       aasmtai,aasmdata,
       nld,ncon,nadd,ncal,ncnv,
@@ -212,13 +213,13 @@ implementation
       end;
 
 
-    procedure ti8086vecnode.update_reference_reg_mul(maybe_const_reg:tregister;l:aint);
+    procedure ti8086vecnode.update_reference_reg_mul(maybe_const_reg: tregister; regsize: tdef; l: aint);
       var
         saveseg: TRegister;
       begin
         saveseg:=location.reference.segment;
         location.reference.segment:=NR_NO;
-        inherited update_reference_reg_mul(maybe_const_reg,l);
+        inherited;
         location.reference.segment:=saveseg;
       end;
 

+ 2 - 2
compiler/jvm/hlcgcpu.pas

@@ -114,7 +114,7 @@ uses
       procedure gen_exit_code(list: TAsmList); override;
 
       { unimplemented/unnecessary routines }
-      procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tdef; src, dst: tregister); override;
+      procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tdef; src, dst: tregister); override;
       procedure a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tdef; const loc: tlocation; const reg: tregister; shuffle: pmmshuffle); override;
       procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister; shuffle: pmmshuffle); override;
       procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tdef; const ref: treference; reg: tregister; shuffle: pmmshuffle); override;
@@ -1884,7 +1884,7 @@ implementation
       { nothing }
     end;
 
-  procedure thlcgjvm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tdef; src, dst: tregister);
+  procedure thlcgjvm.a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; srcsize, dstsize: tdef; src, dst: tregister);
     begin
       internalerror(2012090201);
     end;

+ 2 - 2
compiler/link.pas

@@ -620,8 +620,8 @@ Implementation
         FillChar(Info,sizeof(Info),0);
         if cs_link_on_target in current_settings.globalswitches then
           begin
-            Info.ResName:=outputexedir+ChangeFileExt(inputfilename,'_link.res');
-            Info.ScriptName:=outputexedir+ChangeFileExt(inputfilename,'_script.res');
+            Info.ResName:=ChangeFileExt(inputfilename,'_link.res');
+            Info.ScriptName:=ChangeFileExt(inputfilename,'_script.res');
           end
         else
           begin

+ 10 - 1
compiler/m68k/aasmcpu.pas

@@ -496,13 +496,22 @@ type
           // FPU opcodes
           A_FSXX, A_FSEQ, A_FSNE, A_FSLT, A_FSLE, A_FSGT, A_FSGE:
              result:=operand_write;
+          A_FABS,A_FSQRT,A_FNEG:
+             if ops = 1 then
+               begin
+                 if opnr = 0 then
+                   result:=operand_readwrite;
+               end
+             else
+               if opnr = 1 then
+                 result:=operand_write;
           A_FMOVE:
              if opnr=1 then
                result:=operand_write;
           A_FADD, A_FSUB, A_FMUL, A_FDIV:
              if opnr=1 then
                result:=operand_readwrite;
-          A_FCMP:
+          A_FCMP, A_FTST:
              begin end; { operand_read }
 
           else begin

+ 4 - 4
compiler/m68k/cgcpu.pas

@@ -1840,7 +1840,7 @@ unit cgcpu;
             if saved_fpu_registers[r] in rg[R_FPUREGISTER].used_in_proc then
               begin
                 hfreg:=newreg(R_FPUREGISTER,saved_fpu_registers[r],R_SUBWHOLE);
-                inc(fsize,10{sizeof(extended)});
+                inc(fsize,12{sizeof(extended)});
                 fpuregs:=fpuregs + [saved_fpu_registers[r]];
               end;
 
@@ -1873,7 +1873,7 @@ unit cgcpu;
               begin
                 { size is always longword aligned, while fsize is not }
                 inc(href.offset,size);
-                if fsize = 10{sizeof(extended)} then
+                if fsize = 12{sizeof(extended)} then
                   list.concat(taicpu.op_reg_ref(A_FMOVE,S_FX,hfreg,href))
                 else
                   list.concat(taicpu.op_regset_ref(A_FMOVEM,S_FX,[],[],fpuregs,href));
@@ -1931,7 +1931,7 @@ unit cgcpu;
           for r:=low(saved_address_registers) to high(saved_address_registers) do
             if saved_fpu_registers[r] in rg[R_FPUREGISTER].used_in_proc then
               begin
-                inc(fsize,10{sizeof(extended)});
+                inc(fsize,12{sizeof(extended)});
                 hfreg:=newreg(R_FPUREGISTER,saved_address_registers[r],R_SUBWHOLE);
                 { Allocate register so the optimizer does not remove the load }
                 a_reg_alloc(list,hfreg);
@@ -1961,7 +1961,7 @@ unit cgcpu;
           begin
             { size is always longword aligned, while fsize is not }
             inc(href.offset,size);
-            if fsize = 10{sizeof(extended)} then
+            if fsize = 12{sizeof(extended)} then
               list.concat(taicpu.op_ref_reg(A_FMOVE,S_FX,href,hfreg))
             else
               list.concat(taicpu.op_ref_regset(A_FMOVEM,S_FX,href,[],[],fpuregs));

+ 1 - 2
compiler/m68k/cpunode.pas

@@ -40,11 +40,10 @@ unit cpunode;
 //       nppcflw,
          n68kmem,
 //       nppcset,
-//       nppcinl,
+         n68kinl,
 //       nppcopt,
        { this not really a node }
 //       nppcobj,
-//       nppcmat,
          n68kmat,
          n68kcnv,
          { symtable }

+ 36 - 18
compiler/m68k/n68kadd.pas

@@ -117,6 +117,7 @@ implementation
     procedure t68kaddnode.second_addfloat;
       var
         op    : TAsmOp;
+        href  : TReference;
       begin
         pass_left_right;
 
@@ -141,17 +142,27 @@ implementation
         case current_settings.fputype of
           fpu_68881:
             begin
-              // put both operands in a register
-              hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
+              { have left in the register, right can be a memory location }
               hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
 
-              // initialize the result
+              { initialize the result }
               location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
               location.register := cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
 
-              // emit the actual operation
-              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FMOVE,S_FX,left.location.register,location.register));
-              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_FX,right.location.register,location.register));
+              { emit the actual operation }
+              cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmlist,OS_NO,OS_NO,left.location.register,location.register);
+              case right.location.loc of
+                LOC_FPUREGISTER,LOC_CFPUREGISTER:
+                    current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_FX,right.location.register,location.register));
+                LOC_REFERENCE,LOC_CREFERENCE:
+                    begin
+                      href:=right.location.reference;
+                      tcg68k(cg).fixref(current_asmdata.CurrAsmList,href);
+                      current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,tcgsize2opsize[right.location.size],href,location.register));
+                    end
+                else
+                  internalerror(2015021501);
+              end;
             end;
           else
             // softfpu should be handled in pass1, others are not yet supported...
@@ -164,6 +175,7 @@ implementation
       var
         tmpreg : tregister;
         ai: taicpu;
+        href  : TReference;
       begin
         pass_left_right;
         if (nf_swapped in flags) then
@@ -172,28 +184,34 @@ implementation
         case current_settings.fputype of
           fpu_68881:
             begin
-              location_reset(location,LOC_FLAGS,OS_NO);
-
-              { force fpureg as location, left right doesn't matter
-                as both will be in a fpureg }
+              { force left fpureg as register, right can be reference }
               hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
-              hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
 
-              // emit compare
-              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCMP,S_FX,right.location.register,left.location.register));
-
-              location.resflags:=getresflags(false);
+              { emit compare }
+              case right.location.loc of
+                LOC_FPUREGISTER,LOC_CFPUREGISTER:
+                    current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCMP,S_FX,right.location.register,left.location.register));
+                LOC_REFERENCE,LOC_CREFERENCE:
+                    begin
+                      href:=right.location.reference;
+                      tcg68k(cg).fixref(current_asmdata.CurrAsmList,href);
+                      current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_FCMP,tcgsize2opsize[right.location.size],href,left.location.register));
+                    end
+                else
+                  internalerror(2015021502);
+              end;
 
               // temporary(?) hack, move condition result back to the CPU from the FPU.
               // 6888x has its own FBcc branch instructions and FScc flags->reg instruction,
               // which we don't support yet in the rest of the cg. (KB)
               tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,OS_8);
               ai:=taicpu.op_reg(A_FSxx,S_B,tmpreg);
-              ai.SetCondition(flags_to_cond(location.resflags));
+              ai.SetCondition(flags_to_cond(getresflags(false)));
               current_asmdata.CurrAsmList.concat(ai);
-              current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_TST,S_B,tmpreg));
-              location.resflags:=F_E;
+              current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_NEG,S_B,tmpreg));
 
+              location_reset(location,LOC_REGISTER,OS_8);
+              location.register:=tmpreg;
             end;
           else
             // softfpu should be handled in pass1, others are not yet supported...

+ 212 - 0
compiler/m68k/n68kinl.pas

@@ -0,0 +1,212 @@
+{
+    Copyright (c) 2015 by the Free Pascal Development team
+
+    Generates Motorola 68k inline nodes
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit n68kinl;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+      node,ninl,ncginl,cpubase;
+
+    type
+      t68kinlinenode = class(tcgInlineNode)
+        function first_abs_real: tnode; override;
+        function first_sqr_real: tnode; override;
+        function first_sqrt_real: tnode; override;
+        {function first_arctan_real: tnode; override;
+        function first_ln_real: tnode; override;
+        function first_cos_real: tnode; override;
+        function first_sin_real: tnode; override;}
+
+        procedure second_abs_real; override;
+        procedure second_sqr_real; override;
+        procedure second_sqrt_real; override;
+        {procedure second_arctan_real; override;
+        procedure second_ln_real; override;
+        procedure second_cos_real; override;
+        procedure second_sin_real; override;
+        procedure second_prefetch; override;
+        procedure second_abs_long; override;}
+      private
+        procedure second_do_operation(op: TAsmOp);
+      end;
+
+
+implementation
+
+    uses
+      globtype,verbose,globals,cutils,
+      cpuinfo,defutil,symdef,aasmdata,aasmcpu,aasmtai,
+      cgbase,cgutils,pass_1,pass_2,
+      ncgutil,cgobj,cgcpu,hlcgobj;
+
+{*****************************************************************************
+                              t68kinlinenode
+*****************************************************************************}
+
+    function t68kinlinenode.first_abs_real : tnode;
+      begin
+        if (cs_fp_emulation in current_settings.moduleswitches) then
+          result:=inherited first_abs_real
+        else
+          begin
+            case current_settings.fputype of
+              fpu_68881:
+                expectloc:=LOC_FPUREGISTER;
+              else
+                internalerror(2015022206);
+            end;
+            first_abs_real:=nil;
+          end;
+      end;
+
+    function t68kinlinenode.first_sqr_real : tnode;
+      begin
+        if (cs_fp_emulation in current_settings.moduleswitches) then
+          result:=inherited first_sqr_real
+        else
+          begin
+            case current_settings.fputype of
+              fpu_68881:
+                expectloc:=LOC_FPUREGISTER;
+              else
+                internalerror(2015022201);
+            end;
+            first_sqr_real:=nil;
+          end;
+      end;
+
+    function t68kinlinenode.first_sqrt_real : tnode;
+      begin
+        if (cs_fp_emulation in current_settings.moduleswitches) then
+          result:=inherited first_sqrt_real
+        else
+          begin
+            case current_settings.fputype of
+              fpu_68881:
+                expectloc:=LOC_FPUREGISTER;
+              else
+                internalerror(2015022203);
+            end;
+            first_sqrt_real:=nil;
+          end;
+      end;
+
+    procedure t68kinlinenode.second_abs_real;
+      begin
+        //current_asmdata.CurrAsmList.concat(tai_comment.create(strpnew('second_abs_real called!')));
+        second_do_operation(A_FABS);
+      end;
+
+    procedure t68kinlinenode.second_sqr_real;
+      begin
+        secondpass(left);
+        case current_settings.fputype of
+          fpu_68881:
+            begin
+              //current_asmdata.CurrAsmList.concat(tai_comment.create(strpnew('second_sqr_real called!')));
+              hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+              location_copy(location,left.location);
+              if left.location.loc=LOC_CFPUREGISTER then
+                begin
+                  //current_asmdata.CurrAsmList.concat(tai_comment.create(strpnew('second_srq_real called!: left was cfpuregister!')));
+                  location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
+                  location.loc := LOC_FPUREGISTER;
+                  cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmlist,OS_NO,OS_NO,left.location.register,location.register);
+                end;
+              current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FMUL,S_FX,left.location.register,location.register));
+            end;
+        else
+          internalerror(2015022202);
+        end;
+      end;
+
+    procedure t68kinlinenode.second_sqrt_real;
+      begin
+        //current_asmdata.CurrAsmList.concat(tai_comment.create(strpnew('second_sqrt_real called!')));
+        second_do_operation(A_FSQRT);
+      end;
+
+    procedure t68kinlinenode.second_do_operation(op: TAsmOp);
+      var
+        href: TReference;
+      begin
+        secondpass(left);
+        case current_settings.fputype of
+          fpu_68881:
+            begin
+              location_reset(location,LOC_FPUREGISTER,left.location.size);
+
+              case left.location.loc of
+                LOC_FPUREGISTER:
+                  begin
+                    location.register:=left.location.register;
+                    current_asmdata.CurrAsmList.concat(taicpu.op_reg(op,S_FX,location.register))
+                  end;
+                LOC_CFPUREGISTER:
+                  begin
+                    location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
+                    current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_FX,left.location.register,location.register));
+                  end;
+                LOC_REFERENCE,LOC_CREFERENCE:
+                  begin
+                    location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
+                    href:=left.location.reference;
+                    tcg68k(cg).fixref(current_asmdata.CurrAsmList,href);
+                    current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,tcgsize2opsize[left.location.size],href,location.register));
+                  end;
+                else
+                  internalerror(2015022205);
+              end;
+            end;
+        else
+          internalerror(2015022204);
+        end;
+      end;
+
+      { ideas for second_abs_long (KB) }
+
+      { This is probably faster on 68000 than the generic implementation,
+        because shifting is slow on the original 68000, maybe also on the 68020?
+        Also needs to be tested on 040/060. This can also work on a CF.
+        input - d0, output - d2
+        move.l d0,d2
+        btst   #31,d2
+        sne    d1
+        extb.l d1 (or ext.w + ext.l on 68000)
+        eor.l  d1,d2
+        sub.l  d1,d2
+      }
+
+      { Solution using bitfield extraction, we don't support the necessary asm
+        construct for this yet, probably this is the fastest on 020, slower on
+        040/060 than the one above, doesn't work on '000 or CF.
+        input - d0, output - d2
+        move.l  d0,d2
+        bfexts  d0[0:1],d1
+        eor.l   d1,d2
+        sub.l   d1,d2
+      }
+begin
+  cinlinenode:=t68kinlinenode;
+end.

+ 43 - 1
compiler/m68k/n68kmat.pas

@@ -42,6 +42,10 @@ interface
         procedure emit_mod_reg_reg(signed: boolean;denum,num : tregister);override;
       end;
 
+      tm68kunaryminusnode = class(tcgunaryminusnode)
+        procedure second_float;override;
+      end;
+
       tm68kshlshrnode = class(tshlshrnode)
          procedure pass_generate_code;override;
          { everything will be handled in pass_2 }
@@ -172,6 +176,43 @@ implementation
     end;
 
 
+{*****************************************************************************
+                          TM68KUNARYMINUSNODE
+*****************************************************************************}
+
+    procedure tm68kunaryminusnode.second_float;
+      var
+        href: treference;
+      begin
+        secondpass(left);
+        location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
+        //current_asmdata.CurrAsmList.concat(tai_comment.create(strpnew('unaryminus second_float called!')));
+
+        case left.location.loc of
+          LOC_REFERENCE,
+          LOC_CREFERENCE :
+            begin
+              location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
+              href:=left.location.reference;
+              tcg68k(cg).fixref(current_asmdata.CurrAsmList,href);
+              current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_FNEG,tcgsize2opsize[left.location.size],href,location.register));
+            end;
+          LOC_FPUREGISTER:
+            begin
+              location.register:=left.location.register;
+              current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FNEG,S_FX,location.register));
+            end;
+          LOC_CFPUREGISTER:
+            begin
+               location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
+               current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FNEG,S_FX,left.location.register,location.register));
+            end;
+          else
+            internalerror(200306021);
+        end;
+      end;
+
+
 {*****************************************************************************
                              TM68KSHLRSHRNODE
 *****************************************************************************}
@@ -207,7 +248,7 @@ implementation
             hreg64lo:=left.location.register64.reglo;
 
             shiftval := tordconstnode(right).value.svalue;
-	    shiftval := shiftval and 63;
+            shiftval := shiftval and 63;
             if shiftval > 31 then
               begin
                 if nodetype = shln then
@@ -284,5 +325,6 @@ implementation
 begin
    cnotnode:=tm68knotnode;
    cmoddivnode:=tm68kmoddivnode;
+   cunaryminusnode:=tm68kunaryminusnode;
    cshlshrnode:=tm68kshlshrnode;
 end.

+ 3 - 2
compiler/m68k/n68kmem.pas

@@ -27,12 +27,13 @@ interface
 
     uses
       globtype,
+      symtype,
       cgbase,cpuinfo,cpubase,
       node,nmem,ncgmem;
 
     type
        t68kvecnode = class(tcgvecnode)
-          procedure update_reference_reg_mul(maybe_const_reg:tregister;l:aint);override;
+          procedure update_reference_reg_mul(maybe_const_reg: tregister; regsize: tdef; l: aint); override;
           //procedure pass_generate_code;override;
        end;
 
@@ -59,7 +60,7 @@ implementation
     { the live range of the LOC_CREGISTER will most likely overlap the   }
     { the live range of the target LOC_(C)REGISTER)                      }
     { The passed register may be a LOC_CREGISTER as well.                }
-    procedure t68kvecnode.update_reference_reg_mul(maybe_const_reg:tregister;l:aint);
+    procedure t68kvecnode.update_reference_reg_mul(maybe_const_reg: tregister; regsize: tdef; l: aint);
       var
         hreg: tregister;
         scaled: boolean;

+ 3 - 1
compiler/mips/aoptcpu.pas

@@ -310,7 +310,9 @@ unit aoptcpu;
 
   function TCpuAsmOptimizer.TryRemoveMovToRefIndex(var p: tai; next: taicpu): boolean;
     begin
-      result:=(next.oper[1]^.typ=top_ref) and
+      result:=(next.ops>1) and
+        (next.oper[1]^.typ=top_ref) and
+        (next.oper[1]^.ref^.refaddr<>addr_full) and
         (next.oper[1]^.ref^.base=taicpu(p).oper[0]^.reg) and
         (not RegModifiedBetween(taicpu(p).oper[1]^.reg,p,next)) and
         Assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(next.next)));

+ 1 - 1
compiler/msg/errord.msg

@@ -3528,7 +3528,7 @@ F*0*_Es werden nur Optionen aufgelistet, die f
 3*2Anasmelf_ELF32 (Linux) Datei mit Hilfe von Nasm
 3*2Anasmwin32_Win32 Objektdatei mit Hilfe von Nasm
 3*2Anasmwdosx_Win32/WDOSX Objektdatei mit Hilfe von Nasm
-3*2Anasmdarwin Macho32 Objektdatei mit Hilfe von Nasm (experimentell)
+3*2Anasmdarwin_Macho32 Objektdatei mit Hilfe von Nasm (experimentell)
 3*2Awasm_Obj Datei mit Hilfe von Wasm (Watcom)
 3*2Anasmobj_Obj Datei mit Hilfe von Nasm
 3*2Amasm_Obj Datei mit Hilfe von Masm (Microsoft)

+ 1 - 1
compiler/msg/errordu.msg

@@ -3528,7 +3528,7 @@ F*0*_Es werden nur Optionen aufgelistet, die für die voreingestellte oder ausge
 3*2Anasmelf_ELF32 (Linux) Datei mit Hilfe von Nasm
 3*2Anasmwin32_Win32 Objektdatei mit Hilfe von Nasm
 3*2Anasmwdosx_Win32/WDOSX Objektdatei mit Hilfe von Nasm
-3*2Anasmdarwin Macho32 Objektdatei mit Hilfe von Nasm (experimentell)
+3*2Anasmdarwin_Macho32 Objektdatei mit Hilfe von Nasm (experimentell)
 3*2Awasm_Obj Datei mit Hilfe von Wasm (Watcom)
 3*2Anasmobj_Obj Datei mit Hilfe von Nasm
 3*2Amasm_Obj Datei mit Hilfe von Masm (Microsoft)

+ 11 - 0
compiler/msg/errore.msg

@@ -1531,6 +1531,9 @@ parser_w_ptr_type_ignored=03338_W_Pointer type "$1" ignored
 parser_e_global_generic_references_static=03339_E_Global Generic template references static symtable
 % A generic declared in the interface section of a unit must not reference symbols that belong
 % solely to the implementation section of that unit.
+parser_u_already_compiled=03340_UL_Unit $1 has been already compiled meanwhile.
+% This tells you that the recursive reading of the uses clauses triggered already
+% a compilation of the current unit, so the current compilation can be aborded.
 %
 %
 %
@@ -3458,6 +3461,7 @@ new features, etc.):
 #    4 = x86_64
 #    6 = 680x0 targets
 #    8 = 8086 (16-bit) targets
+#    a = AArch64
 #    A = ARM
 #    e = in extended debug mode only
 #    F = help for the 'fpc' binary (independent of the target compiler)
@@ -3726,6 +3730,7 @@ F*2P<x>_Set target CPU (arm,avr,i386,jvm,m68k,mips,mipsel,powerpc,powerpc64,spar
 3*2Twince_Windows CE
 4*2Tdarwin_Darwin/Mac OS X
 4*2Tfreebsd_FreeBSD
+4*2Tiphonesim_ iPhoneSimulator
 4*2Tlinux_Linux
 4*2Tnetbsd_NetBSD
 4*2Topenbsd_OpenBSD
@@ -3736,6 +3741,7 @@ F*2P<x>_Set target CPU (arm,avr,i386,jvm,m68k,mips,mipsel,powerpc,powerpc64,spar
 6*2Tlinux_Linux
 6*2Tpalmos_PalmOS
 8*2Tmsdos_MS-DOS (and compatible)
+a*2Tdarwin_Darwin/iOS
 A*2Tandroid_Android
 A*2Tdarwin_Darwin/iPhoneOS/iOS
 A*2Tembedded_Embedded
@@ -3790,6 +3796,7 @@ A*2WA_Specify native type application (Windows)
 3*2Wb_Create a bundle instead of a library (Darwin)
 P*2Wb_Create a bundle instead of a library (Darwin)
 p*2Wb_Create a bundle instead of a library (Darwin)
+a*2Wb_Create a bundle instead of a library (Darwin)
 A*2Wb_Create a bundle instead of a library (Darwin)
 4*2Wb_Create a bundle instead of a library (Darwin)
 3*2WB_Create a relocatable image (Windows, Symbian)
@@ -3807,6 +3814,7 @@ P*2WC_Specify console type application (Classic Mac OS)
 A*2WD_Use DEFFILE to export functions of DLL or EXE (Windows)
 3*2We_Use external resources (Darwin)
 4*2We_Use external resources (Darwin)
+a*2We_Use external resources (Darwin)
 A*2We_Use external resources (Darwin)
 P*2We_Use external resources (Darwin)
 p*2We_Use external resources (Darwin)
@@ -3817,6 +3825,7 @@ A*2WG_Specify graphic type application (Windows)
 P*2WG_Specify graphic type application (Classic Mac OS)
 3*2Wi_Use internal resources (Darwin)
 4*2Wi_Use internal resources (Darwin)
+a*2Wi_Use internal resources (Darwin)
 A*2Wi_Use internal resources (Darwin)
 P*2Wi_Use internal resources (Darwin)
 p*2Wi_Use internal resources (Darwin)
@@ -3840,6 +3849,8 @@ A*2Wpxxxx_Specify the controller type; see fpc -i or fpc -iu for possible values
 m*2Wpxxxx_Specify the controller type; see fpc -i or fpc -iu for possible values
 V*2Wpxxxx_Specify the controller type; see fpc -i or fpc -iu for possible values
 3*2WP<x>_Minimum iOS deployment version: 3.0, 5.0.1, ... (iphonesim)
+4*2WP<x>_Minimum iOS deployment version: 8.0, 8.0.2, ... (iphonesim)
+a*2WP<x>_Minimum iOS deployment version: 7.0, 7.1.2, ... (Darwin)
 A*2WP<x>_Minimum iOS deployment version: 3.0, 5.0.1, ... (Darwin)
 3*2WR_Generate relocation code (Windows)
 4*2WR_Generate relocation code (Windows)

+ 3 - 2
compiler/msgidx.inc

@@ -441,6 +441,7 @@ const
   parser_e_default_value_val_const=03337;
   parser_w_ptr_type_ignored=03338;
   parser_e_global_generic_references_static=03339;
+  parser_u_already_compiled=03340;
   type_e_mismatch=04000;
   type_e_incompatible_types=04001;
   type_e_not_equal_types=04002;
@@ -1006,9 +1007,9 @@ const
   option_info=11024;
   option_help_pages=11025;
 
-  MsgTxtSize = 74953;
+  MsgTxtSize = 75323;
 
   MsgIdxMax : array[1..20] of longint=(
-    26,99,340,1000,96,57,126,29,202,64,
+    26,99,341,1000,96,57,126,29,202,64,
     58,20,1,1,1,1,1,1,1,1
   );

File diff suppressed because it is too large
+ 288 - 287
compiler/msgtxt.inc


+ 8 - 4
compiler/nadd.pas

@@ -2819,10 +2819,6 @@ implementation
 
         if try_make_mul32to64 then
           begin
-            { if the code generator can handle 32 to 64-bit muls, we're done here }
-            if not use_generic_mul32to64 then
-              exit;
-
             { this uses the same criteria for signedness as the 32 to 64-bit mul
               handling in the i386 code generator }
             if is_signed(left.resultdef) and is_signed(right.resultdef) then
@@ -3124,6 +3120,14 @@ implementation
                   internalerror(200103291);
                  expectloc:=LOC_FLAGS;
                end
+             else if (nodetype=muln) and
+                is_64bitint(resultdef) and
+                not use_generic_mul32to64 and
+                try_make_mul32to64 then
+               begin
+                 { if the code generator can handle 32 to 64-bit muls,
+                   we're done here }
+               end
 {$ifndef cpu64bitalu}
               { is there a 64 bit type ? }
              else if (torddef(ld).ordtype in [s64bit,u64bit,scurrency]) then

+ 2 - 4
compiler/nbas.pas

@@ -52,8 +52,7 @@ interface
           p_asm : TAsmList;
           currenttai : tai;
           { Used registers in assembler block }
-          used_regs_int,
-          used_regs_fpu : tcpuregisterset;
+          has_registerlist : boolean;
           constructor create(p : TAsmList);virtual;
           constructor create_get_position;
           destructor destroy;override;
@@ -642,8 +641,6 @@ implementation
         inherited create(asmn);
         p_asm:=p;
         currenttai:=nil;
-        used_regs_int:=[];
-        used_regs_fpu:=[];
       end;
 
 
@@ -751,6 +748,7 @@ implementation
           end
         else n.p_asm := nil;
         n.currenttai:=currenttai;
+        n.has_registerlist:=has_registerlist;
         result:=n;
       end;
 

File diff suppressed because it is too large
+ 652 - 420
compiler/ncal.pas


+ 5 - 2
compiler/ncgbas.pas

@@ -255,7 +255,9 @@ interface
            end;
 
          { Allocate registers used in the assembler block }
-         cg.alloccpuregisters(current_asmdata.CurrAsmList,R_INTREGISTER,used_regs_int);
+         { has_registerlist=true means that registers are specified and already allocated }
+         if (not has_registerlist) then
+           cg.allocallcpuregisters(current_asmdata.CurrAsmList);
 
          if (po_inline in current_procinfo.procdef.procoptions) then
            begin
@@ -344,7 +346,8 @@ interface
            end;
 
          { Release register used in the assembler block }
-         cg.dealloccpuregisters(current_asmdata.CurrAsmList,R_INTREGISTER,used_regs_int);
+         if (not has_registerlist) then
+           cg.deallocallcpuregisters(current_asmdata.CurrAsmList);
        end;
 
 

+ 2 - 2
compiler/ncgcal.pas

@@ -947,7 +947,7 @@ implementation
                  callref:=can_call_ref(href);
                  if not callref then
                    begin
-                     pvreg:=cg.getintregister(current_asmdata.CurrAsmList,proc_addr_size);
+                     pvreg:=hlcg.getaddressregister(current_asmdata.CurrAsmList,proc_addr_voidptrdef);
                      cg.a_load_ref_reg(current_asmdata.CurrAsmList,proc_addr_size,proc_addr_size,href,pvreg);
                    end;
 
@@ -1026,7 +1026,7 @@ implementation
               secondpass(right);
               callref:=false;
 
-              pvreg:=cg.getintregister(current_asmdata.CurrAsmList,proc_addr_size);
+              pvreg:=hlcg.getaddressregister(current_asmdata.CurrAsmList,proc_addr_voidptrdef);
               { Only load OS_ADDR from the reference (when converting to hlcg:
                 watch out with procedure of object) }
               if po_is_block in procdefinition.procoptions then

+ 1 - 1
compiler/ncgcnv.pas

@@ -364,7 +364,7 @@ interface
             {$ifdef cpu_uses_separate_address_registers}
               if getregtype(left.location.register)<>R_ADDRESSREGISTER then
                 begin
-                  location.reference.base:=rg.getaddressregister(current_asmdata.CurrAsmList);
+                  location.reference.base:=cg.getaddressregister(current_asmdata.CurrAsmList);
                   cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,
                           left.location.register,location.reference.base);
                 end

+ 4 - 8
compiler/ncginl.pas

@@ -749,16 +749,12 @@ implementation
       secondpass(left);
 
       opsize:=tcgsize2unsigned[left.location.size];
-      if opsize < OS_32 then
-        opsize:=OS_32;
-
-      if (left.location.loc <> LOC_REGISTER) or
-         (left.location.size <> opsize) then
+      if not(left.location.loc in [LOC_REGISTER,LOC_CREGISTER]) then
         hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,cgsize_orddef(opsize),true);
 
-      location_reset(location,LOC_REGISTER,opsize);
-      location.register := cg.getintregister(current_asmdata.CurrAsmList,opsize);
-      cg.a_bit_scan_reg_reg(current_asmdata.CurrAsmList,reverse,opsize,left.location.register,location.register);
+      location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
+      location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
+      cg.a_bit_scan_reg_reg(current_asmdata.CurrAsmList,reverse,opsize,location.size,left.location.register,location.register);
     end;
 
 

+ 11 - 3
compiler/ncgld.pas

@@ -768,9 +768,17 @@ implementation
                            (right.resultdef.typ=floatdef) and
                            (left.location.size<>right.location.size) then
                           begin
-                            hlcg.a_loadfpu_ref_ref(current_asmdata.CurrAsmList,
-                              right.resultdef,left.resultdef,
-                              right.location.reference,left.location.reference)
+                            { assume that all float types can be handed by the
+                              fpu if one can be handled by the fpu }
+                            if not use_vectorfpu(left.resultdef) or
+                               not use_vectorfpu(right.resultdef) then
+                              hlcg.a_loadfpu_ref_ref(current_asmdata.CurrAsmList,
+                                right.resultdef,left.resultdef,
+                                right.location.reference,left.location.reference)
+                            else
+                              hlcg.a_loadmm_ref_ref(current_asmdata.CurrAsmList,
+                                right.resultdef,left.resultdef,
+                                right.location.reference,left.location.reference,mms_movescalar)
                           end
                         else
                           begin

+ 48 - 26
compiler/ncgmem.pas

@@ -27,7 +27,8 @@ unit ncgmem;
 interface
 
     uses
-      globtype,cgbase,cpuinfo,cpubase,
+      globtype,cgbase,cgutils,cpuinfo,cpubase,
+      symtype,
       node,nmem;
 
     type
@@ -67,10 +68,12 @@ interface
            This routine should update location.reference correctly,
            so it points to the correct address.
          }
-         procedure update_reference_reg_mul(maybe_const_reg:tregister;l:aint);virtual;
-         procedure update_reference_reg_packed(maybe_const_reg:tregister;l:aint);virtual;
+         procedure update_reference_reg_mul(maybe_const_reg: tregister;regsize: tdef; l: aint);virtual;
+         procedure update_reference_reg_packed(maybe_const_reg: tregister; regsize: tdef; l: aint);virtual;
+         procedure update_reference_offset(var ref: treference; index, mulsize: aint); virtual;
          procedure second_wideansistring;virtual;
          procedure second_dynamicarray;virtual;
+         function valid_index_size(size: tcgsize): boolean;virtual;
        public
          procedure pass_generate_code;override;
        end;
@@ -81,11 +84,11 @@ implementation
     uses
       systems,
       cutils,cclasses,verbose,globals,constexp,
-      symconst,symbase,symtype,symdef,symsym,symcpu,symtable,defutil,paramgr,
+      symconst,symbase,symdef,symsym,symcpu,symtable,defutil,paramgr,
       aasmbase,aasmtai,aasmdata,
       procinfo,pass_2,parabase,
       pass_1,nld,ncon,nadd,ncnv,nutils,
-      cgutils,cgobj,hlcgobj,
+      cgobj,hlcgobj,
       tgobj,ncgutil,objcgutl,
       defcmp
       ;
@@ -345,7 +348,7 @@ implementation
                       {$ifdef cpu_uses_separate_address_registers}
                         if getregtype(left.location.register)<>R_ADDRESSREGISTER then
                           begin
-                            location.reference.base:=rg.getaddressregister(current_asmdata.CurrAsmList);
+                            location.reference.base:=cg.getaddressregister(current_asmdata.CurrAsmList);
                             hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,left.resultdef,left.resultdef,
                               left.location.register,location.reference.base);
                           end
@@ -519,8 +522,8 @@ implementation
              }
              asmsym:=current_asmdata.RefAsmSymbol(vs.mangledname);
              reference_reset_symbol(tmpref,asmsym,0,sizeof(pint));
-             location.reference.index:=cg.getaddressregister(current_asmdata.CurrAsmList);
-             cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,tmpref,location.reference.index);
+             location.reference.index:=hlcg.getintregister(current_asmdata.CurrAsmList,ptruinttype);
+             hlcg.a_load_ref_reg(current_asmdata.CurrAsmList,ptruinttype,ptruinttype,tmpref,location.reference.index);
              { always packrecords C -> natural alignment }
              location.reference.alignment:=vs.vardef.alignment;
            end
@@ -613,7 +616,7 @@ implementation
      { the live range of the LOC_CREGISTER will most likely overlap the   }
      { the live range of the target LOC_(C)REGISTER)                      }
      { The passed register may be a LOC_CREGISTER as well.                }
-     procedure tcgvecnode.update_reference_reg_mul(maybe_const_reg:tregister;l:aint);
+     procedure tcgvecnode.update_reference_reg_mul(maybe_const_reg: tregister; regsize: tdef; l: aint);
        var
          hreg: tregister;
        begin
@@ -643,7 +646,7 @@ implementation
 
 
      { see remarks for tcgvecnode.update_reference_reg_mul above }
-     procedure tcgvecnode.update_reference_reg_packed(maybe_const_reg:tregister;l:aint);
+     procedure tcgvecnode.update_reference_reg_packed(maybe_const_reg: tregister; regsize: tdef; l:aint);
        var
          sref: tsubsetreference;
          offsetreg, hreg: tregister;
@@ -661,7 +664,7 @@ implementation
 {$endif not cpu64bitalu}
              ) then
            begin
-             update_reference_reg_mul(maybe_const_reg,l div 8);
+             update_reference_reg_mul(maybe_const_reg,regsize,l div 8);
              exit;
            end;
          if (l > 8*sizeof(aint)) then
@@ -672,7 +675,7 @@ implementation
          cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_IMUL,OS_INT,l,hreg);
          { keep alignment for index }
          sref.ref.alignment := left.resultdef.alignment;
-         if not ispowerof2(sref.ref.alignment,temp) then
+         if not ispowerof2(packedbitsloadsize(l),temp) then
            internalerror(2006081201);
          alignpower:=temp;
          offsetreg := cg.getaddressregister(current_asmdata.CurrAsmList);
@@ -699,6 +702,12 @@ implementation
        end;
 
 
+     procedure tcgvecnode.update_reference_offset(var ref: treference; index, mulsize: aint);
+       begin
+         inc(ref.offset,index*mulsize);
+       end;
+
+
      procedure tcgvecnode.second_wideansistring;
        begin
        end;
@@ -708,6 +717,13 @@ implementation
        end;
 
 
+     function tcgvecnode.valid_index_size(size: tcgsize): boolean;
+       begin
+         result:=
+           tcgsize2signed[size]=tcgsize2signed[OS_ADDR];
+       end;
+
+
      procedure tcgvecnode.rangecheck_array;
        var
          hightree : tnode;
@@ -853,6 +869,7 @@ implementation
          paraloc2 : tcgpara;
          subsetref : tsubsetreference;
          temp : longint;
+         indexdef : tdef;
       begin
          paraloc1.init;
          paraloc2.init;
@@ -910,7 +927,7 @@ implementation
 
               { in ansistrings/widestrings S[1] is p<w>char(S)[0] }
               if not(cs_zerobasedstrings in current_settings.localswitches) then
-                dec(location.reference.offset,offsetdec);
+                update_reference_offset(location.reference,-1,offsetdec);
            end
          else if is_dynamic_array(left.resultdef) then
            begin
@@ -963,7 +980,7 @@ implementation
               or is_64bitint(resultdef)
 {$endif not cpu64bitalu}
               ) then
-           dec(location.reference.offset,bytemulsize*tarraydef(left.resultdef).lowrange);
+           update_reference_offset(location.reference,-tarraydef(left.resultdef).lowrange,bytemulsize);
 
          if right.nodetype=ordconstn then
            begin
@@ -984,10 +1001,10 @@ implementation
                    { only orddefs are bitpacked }
                    not is_ordinal(resultdef))) then
                 begin
-                  extraoffset:=bytemulsize*tordconstnode(right).value.svalue;
-                  inc(location.reference.offset,extraoffset);
-                  { adjust alignment after to this change }
-                  location.reference.alignment:=newalignment(location.reference.alignment,extraoffset);
+                  extraoffset:=tordconstnode(right).value.svalue;
+                  update_reference_offset(location.reference,extraoffset,bytemulsize);
+                  { adjust alignment after this change }
+                  location.reference.alignment:=newalignment(location.reference.alignment,extraoffset*bytemulsize);
                   { don't do this for floats etc.; needed to properly set the }
                   { size for bitpacked arrays (e.g. a bitpacked array of      }
                   { enums who are size 2 but fit in one byte -> in the array  }
@@ -1000,10 +1017,10 @@ implementation
                 begin
                   subsetref.ref := location.reference;
                   subsetref.ref.alignment := left.resultdef.alignment;
-                  if not ispowerof2(subsetref.ref.alignment,temp) then
+                  if not ispowerof2(packedbitsloadsize(resultdef.packedbitsize),temp) then
                     internalerror(2006081212);
                   alignpow:=temp;
-                  inc(subsetref.ref.offset,((mulsize * (tordconstnode(right).value.svalue-tarraydef(left.resultdef).lowrange)) shr (3+alignpow)) shl alignpow);
+                  update_reference_offset(subsetref.ref,(mulsize * (tordconstnode(right).value.svalue-tarraydef(left.resultdef).lowrange)) shr (3+alignpow),1 shl alignpow);
                   subsetref.bitindexreg := NR_NO;
                   subsetref.startbit := (mulsize * (tordconstnode(right).value.svalue-tarraydef(left.resultdef).lowrange)) and ((1 shl (3+alignpow))-1);
                   subsetref.bitlen := resultdef.packedbitsize;
@@ -1048,8 +1065,7 @@ implementation
                             replacenode(rightp^,taddnode(rightp^).left);
                           end;
                      end;
-                   inc(location.reference.offset,
-                       mulsize*extraoffset);
+                   update_reference_offset(location.reference,extraoffset,mulsize);
                 end;
               { calculate from left to right }
               if not(location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then
@@ -1067,8 +1083,14 @@ implementation
               secondpass(right);
 
               { if mulsize = 1, we won't have to modify the index }
-              if not(right.location.loc in [LOC_CREGISTER,LOC_REGISTER]) or (right.location.size<>OS_ADDR) then
-                hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,ptruinttype,true);
+              if not(right.location.loc in [LOC_CREGISTER,LOC_REGISTER]) or
+                 not valid_index_size(right.location.size) then
+                begin
+                  hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,ptruinttype,true);
+                  indexdef:=ptruinttype
+                end
+              else
+                indexdef:=right.resultdef;
 
               if isjump then
                begin
@@ -1090,9 +1112,9 @@ implementation
               { insert the register and the multiplication factor in the
                 reference }
               if not is_packed_array(left.resultdef) then
-                update_reference_reg_mul(right.location.register,mulsize)
+                update_reference_reg_mul(right.location.register,indexdef,mulsize)
               else
-                update_reference_reg_packed(right.location.register,mulsize);
+                update_reference_reg_packed(right.location.register,indexdef,mulsize);
            end;
 
         location.size:=newsize;

Some files were not shown because too many files changed in this diff