Jelajahi Sumber

Rebased to svn revision 25050

git-svn-id: branches/mips_embedded@25051 -
ring 12 tahun lalu
induk
melakukan
0b17c6df4f
100 mengubah file dengan 2834 tambahan dan 1170 penghapusan
  1. 59 2
      .gitattributes
  2. 8 1
      Makefile
  3. 11 0
      Makefile.fpc
  4. 1 1
      compiler/COPYING.txt
  5. 5 0
      compiler/aasmdata.pas
  6. 58 2
      compiler/aasmtai.pas
  7. 2 2
      compiler/aoptbase.pas
  8. 73 52
      compiler/aoptobj.pas
  9. 5 1
      compiler/arm/aasmcpu.pas
  10. 112 5
      compiler/arm/aoptcpu.pas
  11. 11 7
      compiler/arm/cgcpu.pas
  12. 46 11
      compiler/arm/cpupara.pas
  13. 12 25
      compiler/arm/narmadd.pas
  14. 2 2
      compiler/arm/narmcnv.pas
  15. 1 1
      compiler/arm/narminl.pas
  16. 18 4
      compiler/arm/narmmat.pas
  17. 21 0
      compiler/arm/narmset.pas
  18. 45 13
      compiler/avr/cpupara.pas
  19. 9 1
      compiler/cfileutl.pas
  20. 6 0
      compiler/cgbase.pas
  21. 33 1
      compiler/cgobj.pas
  22. 8 0
      compiler/constexp.pas
  23. 3 3
      compiler/cresstr.pas
  24. 39 19
      compiler/defcmp.pas
  25. 117 14
      compiler/defutil.pas
  26. 5 0
      compiler/fpcdefs.inc
  27. 10 2
      compiler/globals.pas
  28. 6 0
      compiler/globtype.pas
  29. 157 62
      compiler/hlcg2ll.pas
  30. 356 162
      compiler/hlcgobj.pas
  31. 13 3
      compiler/i386/cgcpu.pas
  32. 8 4
      compiler/i386/cpuinfo.pas
  33. 43 11
      compiler/i386/cpupara.pas
  34. 7 1
      compiler/i386/i386att.inc
  35. 6 0
      compiler/i386/i386atts.inc
  36. 7 1
      compiler/i386/i386int.inc
  37. 1 1
      compiler/i386/i386nop.inc
  38. 7 1
      compiler/i386/i386op.inc
  39. 16 10
      compiler/i386/i386prop.inc
  40. 42 0
      compiler/i386/i386tab.inc
  41. 4 4
      compiler/i386/r386ari.inc
  42. 1 1
      compiler/i386/r386att.inc
  43. 1 1
      compiler/i386/r386con.inc
  44. 1 1
      compiler/i386/r386int.inc
  45. 4 4
      compiler/i386/r386iri.inc
  46. 1 1
      compiler/i386/r386nasm.inc
  47. 4 4
      compiler/i386/r386nri.inc
  48. 1 1
      compiler/i386/r386num.inc
  49. 1 1
      compiler/i386/r386op.inc
  50. 1 1
      compiler/i386/r386ot.inc
  51. 1 1
      compiler/i386/r386rni.inc
  52. 4 4
      compiler/i386/r386sri.inc
  53. 1 1
      compiler/i386/r386std.inc
  54. 275 59
      compiler/i8086/cgcpu.pas
  55. 13 4
      compiler/i8086/cpuinfo.pas
  56. 5 6
      compiler/i8086/cpunode.pas
  57. 72 20
      compiler/i8086/cpupara.pas
  58. 10 11
      compiler/i8086/n8086add.pas
  59. 136 0
      compiler/i8086/n8086cnv.pas
  60. 71 0
      compiler/i8086/n8086con.pas
  61. 132 0
      compiler/i8086/n8086mem.pas
  62. 4 4
      compiler/i8086/r8086ari.inc
  63. 1 1
      compiler/i8086/r8086att.inc
  64. 1 1
      compiler/i8086/r8086con.inc
  65. 1 1
      compiler/i8086/r8086int.inc
  66. 4 4
      compiler/i8086/r8086iri.inc
  67. 1 1
      compiler/i8086/r8086nasm.inc
  68. 4 4
      compiler/i8086/r8086nri.inc
  69. 1 1
      compiler/i8086/r8086num.inc
  70. 1 1
      compiler/i8086/r8086op.inc
  71. 1 1
      compiler/i8086/r8086ot.inc
  72. 1 1
      compiler/i8086/r8086rni.inc
  73. 4 4
      compiler/i8086/r8086sri.inc
  74. 1 1
      compiler/i8086/r8086std.inc
  75. 15 0
      compiler/jvm/cpupara.pas
  76. 38 2
      compiler/jvm/hlcgcpu.pas
  77. 3 3
      compiler/jvm/njvmcal.pas
  78. 2 2
      compiler/jvm/njvmld.pas
  79. 3 2
      compiler/jvm/njvmmem.pas
  80. 20 5
      compiler/m68k/cpupara.pas
  81. 2 2
      compiler/m68k/n68kadd.pas
  82. 17 13
      compiler/mips/aasmcpu.pas
  83. 179 254
      compiler/mips/cgcpu.pas
  84. 19 31
      compiler/mips/cpubase.pas
  85. 27 22
      compiler/mips/cpugas.pas
  86. 2 6
      compiler/mips/cpuinfo.pas
  87. 59 53
      compiler/mips/cpupara.pas
  88. 9 16
      compiler/mips/cpupi.pas
  89. 67 0
      compiler/mips/hlcgcpu.pas
  90. 15 21
      compiler/mips/ncpuadd.pas
  91. 5 3
      compiler/mips/ncpucnv.pas
  92. 0 7
      compiler/mips/ncpuld.pas
  93. 39 16
      compiler/mips/ncpumat.pas
  94. 2 9
      compiler/mips/ncpuset.pas
  95. 10 28
      compiler/mips/opcode.inc
  96. 30 59
      compiler/mips/racpugas.pas
  97. 58 0
      compiler/mips/rgcpu.pas
  98. 10 28
      compiler/mips/strinst.inc
  99. 25 6
      compiler/msg/errord.msg
  100. 25 7
      compiler/msg/errordu.msg

+ 59 - 2
.gitattributes

@@ -248,8 +248,11 @@ compiler/i8086/i8086prop.inc svneol=native#text/plain
 compiler/i8086/i8086tab.inc svneol=native#text/plain
 compiler/i8086/n8086add.pas svneol=native#text/plain
 compiler/i8086/n8086cal.pas svneol=native#text/plain
+compiler/i8086/n8086cnv.pas svneol=native#text/plain
+compiler/i8086/n8086con.pas svneol=native#text/plain
 compiler/i8086/n8086inl.pas svneol=native#text/plain
 compiler/i8086/n8086mat.pas svneol=native#text/plain
+compiler/i8086/n8086mem.pas svneol=native#text/plain
 compiler/i8086/r8086ari.inc svneol=native#text/plain
 compiler/i8086/r8086att.inc svneol=native#text/plain
 compiler/i8086/r8086con.inc svneol=native#text/plain
@@ -820,8 +823,12 @@ compiler/x86_64/x8664nop.inc svneol=native#text/plain
 compiler/x86_64/x8664op.inc svneol=native#text/plain
 compiler/x86_64/x8664pro.inc svneol=native#text/plain
 compiler/x86_64/x8664tab.inc svneol=native#text/plain
+/fpmake.pp svneol=native#text/plain
+/fpmake_add1.inc svneol=native#text/plain
+/fpmake_proc1.inc svneol=native#text/plain
 ide/Makefile svneol=native#text/plain
 ide/Makefile.fpc svneol=native#text/plain
+ide/Makefile.fpc.fpcmake svneol=native#text/plain
 ide/README.txt svneol=native#text/plain
 ide/TODO.txt svneol=native#text/plain
 ide/compiler/Makefile svneol=native#text/plain
@@ -856,6 +863,7 @@ ide/fpide.pas svneol=native#text/plain
 ide/fpini.pas svneol=native#text/plain
 ide/fpintf.pas svneol=native#text/plain
 ide/fpkeys.pas svneol=native#text/plain
+ide/fpmake.pp svneol=native#text/plain
 ide/fpmansi.inc svneol=native#text/plain
 ide/fpmcomp.inc svneol=native#text/plain
 ide/fpmdebug.inc svneol=native#text/plain
@@ -2184,12 +2192,14 @@ packages/fcl-db/tests/dbtestframework_gui.lpr svneol=native#text/plain
 packages/fcl-db/tests/inieditor.lfm svneol=native#text/plain
 packages/fcl-db/tests/inieditor.pas svneol=native#text/plain
 packages/fcl-db/tests/memdstoolsunit.pas svneol=native#text/plain
+packages/fcl-db/tests/reruntest.sh svneol=native#text/plain
 packages/fcl-db/tests/sdfdstoolsunit.pas svneol=native#text/plain
 packages/fcl-db/tests/sqldbtoolsunit.pas svneol=native#text/plain
 packages/fcl-db/tests/tcgensql.pas svneol=native#text/plain
 packages/fcl-db/tests/tcparser.pas svneol=native#text/plain
 packages/fcl-db/tests/tcsdfdata.pp svneol=native#text/plain
 packages/fcl-db/tests/tcsqlscanner.pas svneol=native#text/plain
+packages/fcl-db/tests/test-list.txt svneol=native#text/plain
 packages/fcl-db/tests/test.json svneol=native#text/plain
 packages/fcl-db/tests/testbasics.pas svneol=native#text/plain
 packages/fcl-db/tests/testbufdatasetstreams.pas svneol=native#text/plain
@@ -2199,8 +2209,10 @@ packages/fcl-db/tests/testdbexport.pas svneol=native#text/plain
 packages/fcl-db/tests/testdddiff.pp svneol=native#text/plain
 packages/fcl-db/tests/testfieldtypes.pas svneol=native#text/plain
 packages/fcl-db/tests/testjsondataset.pp svneol=native#text/plain
+packages/fcl-db/tests/testleaks.sh svneol=native#text/plain
 packages/fcl-db/tests/testspecifictbufdataset.pas svneol=native#text/plain
 packages/fcl-db/tests/testspecifictdbf.pas svneol=native#text/plain
+packages/fcl-db/tests/testsqldb.pas svneol=native#text/pascal
 packages/fcl-db/tests/testsqlfiles.lpi svneol=native#text/plain
 packages/fcl-db/tests/testsqlfiles.lpr svneol=native#text/plain
 packages/fcl-db/tests/testsqlscanner.lpi svneol=native#text/plain
@@ -7830,6 +7842,8 @@ rtl/i8086/i8086.inc svneol=native#text/plain
 rtl/i8086/int64p.inc svneol=native#text/plain
 rtl/i8086/makefile.cpu svneol=native#text/plain
 rtl/i8086/math.inc svneol=native#text/plain
+rtl/i8086/mathu.inc svneol=native#text/plain
+rtl/i8086/mathuh.inc svneol=native#text/plain
 rtl/i8086/set.inc svneol=native#text/plain
 rtl/i8086/setjump.inc svneol=native#text/plain
 rtl/i8086/setjumph.inc svneol=native#text/plain
@@ -8245,8 +8259,15 @@ rtl/morphos/video.pp svneol=native#text/plain
 rtl/morphos/videodata.inc svneol=native#text/plain
 rtl/msdos/Makefile svneol=native#text/plain
 rtl/msdos/Makefile.fpc svneol=native#text/plain
+rtl/msdos/classes.pp svneol=native#text/plain
+rtl/msdos/crt.pp svneol=native#text/plain
 rtl/msdos/dos.pp svneol=native#text/plain
-rtl/msdos/prt0.asm svneol=native#text/plain
+rtl/msdos/msmouse.pp svneol=native#text/plain
+rtl/msdos/ports.pp svneol=native#text/plain
+rtl/msdos/prt0m.asm svneol=native#text/plain
+rtl/msdos/prt0s.asm svneol=native#text/plain
+rtl/msdos/prt0stm.asm svneol=native#text/plain
+rtl/msdos/prt0t.asm svneol=native#text/plain
 rtl/msdos/registers.inc svneol=native#text/plain
 rtl/msdos/sysdir.inc svneol=native#text/plain
 rtl/msdos/sysfile.inc svneol=native#text/plain
@@ -8254,6 +8275,9 @@ rtl/msdos/sysheap.inc svneol=native#text/plain
 rtl/msdos/sysos.inc svneol=native#text/plain
 rtl/msdos/sysosh.inc svneol=native#text/plain
 rtl/msdos/system.pp svneol=native#text/plain
+rtl/msdos/sysutils.pp svneol=native#text/plain
+rtl/msdos/tthread.inc svneol=native#text/plain
+rtl/msdos/varutils.pp svneol=native#text/plain
 rtl/nativent/Makefile svneol=native#text/plain
 rtl/nativent/Makefile.fpc svneol=native#text/plain
 rtl/nativent/buildrtl.lpi svneol=native#text/plain
@@ -9346,6 +9370,10 @@ tests/tbf/tb0231.pp svneol=native#text/pascal
 tests/tbf/tb0232.pp svneol=native#text/pascal
 tests/tbf/tb0233.pp svneol=native#text/pascal
 tests/tbf/tb0234.pp svneol=native#text/pascal
+tests/tbf/tb0235.pp svneol=native#text/pascal
+tests/tbf/tb0236.pp svneol=native#text/pascal
+tests/tbf/tb0237.pp svneol=native#text/pascal
+tests/tbf/tb0238.pp svneol=native#text/pascal
 tests/tbf/ub0115.pp svneol=native#text/plain
 tests/tbf/ub0149.pp svneol=native#text/plain
 tests/tbf/ub0158a.pp svneol=native#text/plain
@@ -9942,6 +9970,7 @@ tests/tbs/tb0592.pp svneol=native#text/plain
 tests/tbs/tb0593.pp svneol=native#text/pascal
 tests/tbs/tb0594.pp svneol=native#text/plain
 tests/tbs/tb0595.pp svneol=native#text/plain
+tests/tbs/tb0596.pp svneol=native#text/pascal
 tests/tbs/tb205.pp svneol=native#text/plain
 tests/tbs/tbs0594.pp svneol=native#text/pascal
 tests/tbs/ub0060.pp svneol=native#text/plain
@@ -10235,10 +10264,13 @@ tests/test/cg/obj/win32/i386/tcext3.o -text
 tests/test/cg/obj/win32/i386/tcext4.o -text
 tests/test/cg/obj/win32/i386/tcext5.o -text
 tests/test/cg/obj/win32/i386/tcext6.o -text
+tests/test/cg/obj/win64/x86_64/cpptcl1.o -text
+tests/test/cg/obj/win64/x86_64/cpptcl2.o -text
 tests/test/cg/obj/win64/x86_64/ctest.o -text svneol=unset#unset
 tests/test/cg/obj/win64/x86_64/tcext3.o -text svneol=unset#unset
 tests/test/cg/obj/win64/x86_64/tcext4.o -text svneol=unset#unset
 tests/test/cg/obj/win64/x86_64/tcext5.o -text svneol=unset#unset
+tests/test/cg/obj/win64/x86_64/tcext6.o -text
 tests/test/cg/obj/wince/arm/ctest.o -text
 tests/test/cg/obj/wince/arm/tcext3.o -text
 tests/test/cg/obj/wince/arm/tcext4.o -text
@@ -11731,6 +11763,7 @@ tests/test/tstdhandle.pp svneol=native#text/plain
 tests/test/tstprocv.pp svneol=native#text/plain
 tests/test/tstring1.pp svneol=native#text/plain
 tests/test/tstring10.pp svneol=native#text/plain
+tests/test/tstring11.pp svneol=native#text/pascal
 tests/test/tstring2.pp svneol=native#text/plain
 tests/test/tstring3.pp svneol=native#text/plain
 tests/test/tstring4.pp svneol=native#text/plain
@@ -12323,6 +12356,8 @@ tests/webtbf/tw2414.pp svneol=native#text/plain
 tests/webtbf/tw24184.pp svneol=native#text/plain
 tests/webtbf/tw24428.pp svneol=native#text/plain
 tests/webtbf/tw24428a.pp svneol=native#text/plain
+tests/webtbf/tw24495.pp svneol=native#text/pascal
+tests/webtbf/tw24588.pp svneol=native#text/pascal
 tests/webtbf/tw2478.pp svneol=native#text/plain
 tests/webtbf/tw2562.pp svneol=native#text/plain
 tests/webtbf/tw2657.pp svneol=native#text/plain
@@ -13406,6 +13441,7 @@ tests/webtbs/tw23962.pp svneol=native#text/plain
 tests/webtbs/tw23963.pp svneol=native#text/plain
 tests/webtbs/tw2397.pp svneol=native#text/plain
 tests/webtbs/tw24007.pp svneol=native#text/plain
+tests/webtbs/tw24071.pp svneol=native#text/pascal
 tests/webtbs/tw2409.pp svneol=native#text/plain
 tests/webtbs/tw24131.pp svneol=native#text/plain
 tests/webtbs/tw24197.pp svneol=native#text/plain
@@ -13418,6 +13454,7 @@ tests/webtbs/tw2438.pp svneol=native#text/plain
 tests/webtbs/tw2442.pp svneol=native#text/plain
 tests/webtbs/tw2452.pp svneol=native#text/plain
 tests/webtbs/tw2454.pp svneol=native#text/plain
+tests/webtbs/tw24705.pp svneol=native#text/pascal
 tests/webtbs/tw2473.pp svneol=native#text/plain
 tests/webtbs/tw2480.pp svneol=native#text/plain
 tests/webtbs/tw2481.pp svneol=native#text/plain
@@ -13441,6 +13478,7 @@ tests/webtbs/tw2620.pp svneol=native#text/plain
 tests/webtbs/tw2626.pp svneol=native#text/plain
 tests/webtbs/tw2627.pp svneol=native#text/plain
 tests/webtbs/tw2631.pp svneol=native#text/plain
+tests/webtbs/tw26408.pp svneol=native#text/pascal
 tests/webtbs/tw2643.pp svneol=native#text/plain
 tests/webtbs/tw2645.pp svneol=native#text/plain
 tests/webtbs/tw2647.pp svneol=native#text/plain
@@ -14234,6 +14272,7 @@ tests/webtbs/uw9113a.pp svneol=native#text/plain
 tests/webtbs/uw9113b.pp svneol=native#text/plain
 utils/Makefile svneol=native#text/plain
 utils/Makefile.fpc svneol=native#text/plain
+utils/Makefile.fpc.fpcmake svneol=native#text/plain
 utils/README.txt svneol=native#text/plain
 utils/bin2obj.pp svneol=native#text/plain
 utils/creumap.pp svneol=native#text/plain
@@ -14241,6 +14280,7 @@ utils/data2inc.exm -text
 utils/data2inc.pp svneol=native#text/plain
 utils/debugsvr/Makefile svneol=native#text/plain
 utils/debugsvr/Makefile.fpc svneol=native#text/plain
+utils/debugsvr/Makefile.fpc.fpcmake svneol=native#text/plain
 utils/debugsvr/README.txt svneol=native#text/plain
 utils/debugsvr/console/Makefile svneol=native#text/plain
 utils/debugsvr/console/Makefile.fpc svneol=native#text/plain
@@ -14270,11 +14310,13 @@ utils/debugsvr/testdebug.pp svneol=native#text/plain
 utils/delp.pp svneol=native#text/plain
 utils/dxegen/Makefile svneol=native#text/plain
 utils/dxegen/Makefile.fpc svneol=native#text/plain
+utils/dxegen/Makefile.fpc.fpcmake svneol=native#text/plain
 utils/dxegen/coff.pp svneol=native#text/plain
 utils/dxegen/dxegen.pp svneol=native#text/plain
 utils/dxegen/fpmake.pp svneol=native#text/plain
 utils/fpcm/Makefile svneol=native#text/plain
 utils/fpcm/Makefile.fpc svneol=native#text/plain
+utils/fpcm/Makefile.fpc.fpcmake svneol=native#text/plain
 utils/fpcm/Makefile.fpmake.bs.template svneol=native#text/plain
 utils/fpcm/Makefile.fpmake.template svneol=native#text/plain
 utils/fpcm/convert_all_fpmake.sh svneol=native#text/plain
@@ -14296,6 +14338,7 @@ utils/fpcm/readme.txt svneol=native#text/plain
 utils/fpcm/revision.inc svneol=native#text/plain
 utils/fpcmkcfg/Makefile svneol=native#text/plain
 utils/fpcmkcfg/Makefile.fpc svneol=native#text/plain
+utils/fpcmkcfg/Makefile.fpc.fpcmake svneol=native#text/plain
 utils/fpcmkcfg/default.cft svneol=native#text/plain
 utils/fpcmkcfg/default.inc svneol=native#text/plain
 utils/fpcmkcfg/fpc.cft svneol=native#text/plain
@@ -14310,11 +14353,11 @@ utils/fpcmkcfg/fppkg.cfg svneol=native#text/plain
 utils/fpcmkcfg/fppkg.inc svneol=native#text/plain
 utils/fpcres/Makefile svneol=native#text/plain
 utils/fpcres/Makefile.fpc svneol=native#text/plain
+utils/fpcres/Makefile.fpc.fpcmake svneol=native#text/plain
 utils/fpcres/closablefilestream.pas svneol=native#text/plain
 utils/fpcres/fpcjres.pas svneol=native#text/plain
 utils/fpcres/fpcres.pas svneol=native#text/plain
 utils/fpcres/fpmake.pp svneol=native#text/plain
-utils/fpcres/jarparamparser.pas svneol=native#text/plain
 utils/fpcres/jarsourcehandler.pas svneol=native#text/plain
 utils/fpcres/msghandler.pas svneol=native#text/plain
 utils/fpcres/paramparser.pas svneol=native#text/plain
@@ -14322,6 +14365,7 @@ utils/fpcres/sourcehandler.pas svneol=native#text/plain
 utils/fpcres/target.pas svneol=native#text/plain
 utils/fpcreslipo/Makefile svneol=native#text/plain
 utils/fpcreslipo/Makefile.fpc svneol=native#text/plain
+utils/fpcreslipo/Makefile.fpc.fpcmake svneol=native#text/plain
 utils/fpcreslipo/fpcreslipo.pp svneol=native#text/plain
 utils/fpcreslipo/fpmake.pp svneol=native#text/plain
 utils/fpcreslipo/msghandler.pp svneol=native#text/plain
@@ -14330,6 +14374,7 @@ utils/fpcreslipo/sourcehandler.pp svneol=native#text/plain
 utils/fpdoc/COPYING.txt svneol=native#text/plain
 utils/fpdoc/Makefile svneol=native#text/plain
 utils/fpdoc/Makefile.fpc svneol=native#text/plain
+utils/fpdoc/Makefile.fpc.fpcmake svneol=native#text/plain
 utils/fpdoc/README.txt svneol=native#text/plain
 utils/fpdoc/css.inc svneol=native#text/plain
 utils/fpdoc/dglobals.pp svneol=native#text/plain
@@ -14420,6 +14465,7 @@ utils/fpmake_add.inc svneol=native#text/plain
 utils/fpmake_proc.inc svneol=native#text/plain
 utils/fpmc/Makefile svneol=native#text/plain
 utils/fpmc/Makefile.fpc svneol=native#text/plain
+utils/fpmc/Makefile.fpc.fpcmake svneol=native#text/plain
 utils/fpmc/README.txt svneol=native#text/plain
 utils/fpmc/dumpfile.pp svneol=native#text/plain
 utils/fpmc/fpmake.pp svneol=native#text/plain
@@ -14479,6 +14525,7 @@ utils/fppkg/lnet/sys/osunits.inc svneol=native#text/plain
 utils/fppkg/pkglnet.pp svneol=native#text/plain
 utils/fprcp/Makefile svneol=native#text/plain
 utils/fprcp/Makefile.fpc svneol=native#text/plain
+utils/fprcp/Makefile.fpc.fpcmake svneol=native#text/plain
 utils/fprcp/Readme.txt svneol=native#text/plain
 utils/fprcp/comments.pp svneol=native#text/plain
 utils/fprcp/demo.h -text
@@ -14492,6 +14539,7 @@ utils/fprcp/use_demo.bat -text
 utils/grab_vcsa.pp -text
 utils/h2pas/Makefile svneol=native#text/plain
 utils/h2pas/Makefile.fpc svneol=native#text/plain
+utils/h2pas/Makefile.fpc.fpcmake svneol=native#text/plain
 utils/h2pas/README.txt svneol=native#text/plain
 utils/h2pas/converu.pas svneol=native#text/plain
 utils/h2pas/fpmake.pp svneol=native#text/plain
@@ -14508,11 +14556,13 @@ utils/h2pas/yylex.cod -text
 utils/h2pas/yyparse.cod -text
 utils/importtl/Makefile svneol=native#text/plain
 utils/importtl/Makefile.fpc svneol=native#text/plain
+utils/importtl/Makefile.fpc.fpcmake svneol=native#text/plain
 utils/importtl/fpmake.pp svneol=native#text/plain
 utils/importtl/importtl.lpi svneol=native#text/plain
 utils/importtl/importtl.pas svneol=native#text/plain
 utils/instantfpc/Makefile svneol=native#text/plain
 utils/instantfpc/Makefile.fpc svneol=native#text/plain
+utils/instantfpc/Makefile.fpc.fpcmake svneol=native#text/plain
 utils/instantfpc/README.txt svneol=native#text/plain
 utils/instantfpc/examples/envvars.pas svneol=native#text/plain
 utils/instantfpc/examples/exitcode.pas svneol=native#text/plain
@@ -14553,6 +14603,7 @@ utils/javapp/src/fpc/tools/javapp/TrapData.java svneol=native#text/plain
 utils/javapp/src/fpc/tools/javapp/TypeSignature.java svneol=native#text/plain
 utils/mksymbian/Makefile svneol=native#text/plain
 utils/mksymbian/Makefile.fpc svneol=native#text/plain
+utils/mksymbian/Makefile.fpc.fpcmake svneol=native#text/plain
 utils/mksymbian/cfgfile.pas svneol=native#text/plain
 utils/mksymbian/cmdline.pas svneol=native#text/plain
 utils/mksymbian/compiler.pas svneol=native#text/plain
@@ -14563,11 +14614,13 @@ utils/mksymbian/projectparser.pas svneol=native#text/plain
 utils/mksymbian/sdkutil.pas svneol=native#text/plain
 utils/pas2fpm/Makefile svneol=native#text/plain
 utils/pas2fpm/Makefile.fpc svneol=native#text/plain
+utils/pas2fpm/Makefile.fpc.fpcmake svneol=native#text/plain
 utils/pas2fpm/fpmake.pp svneol=native#text/plain
 utils/pas2fpm/pas2fpm.lpi svneol=native#text/plain
 utils/pas2fpm/pas2fpm.pp svneol=native#text/plain
 utils/pas2jni/Makefile svneol=native#text/plain
 utils/pas2jni/Makefile.fpc svneol=native#text/plain
+utils/pas2jni/Makefile.fpc.fpcmake svneol=native#text/plain
 utils/pas2jni/def.pas svneol=native#text/plain
 utils/pas2jni/fpmake.pp svneol=native#text/plain
 utils/pas2jni/pas2jni.pas svneol=native#text/plain
@@ -14576,6 +14629,7 @@ utils/pas2jni/readme.txt svneol=native#text/plain
 utils/pas2jni/writer.pas svneol=native#text/plain
 utils/pas2ut/Makefile svneol=native#text/plain
 utils/pas2ut/Makefile.fpc svneol=native#text/plain
+utils/pas2ut/Makefile.fpc.fpcmake svneol=native#text/plain
 utils/pas2ut/fpmake.pp svneol=native#text/plain
 utils/pas2ut/pas2ut.lpi svneol=native#text/plain
 utils/pas2ut/pas2ut.pp svneol=native#text/plain
@@ -14586,6 +14640,7 @@ utils/ptopu.pp svneol=native#text/plain
 utils/rmcvsdir.pp svneol=native#text/plain
 utils/rmwait/Makefile svneol=native#text/plain
 utils/rmwait/Makefile.fpc svneol=native#text/plain
+utils/rmwait/Makefile.fpc.fpcmake svneol=native#text/plain
 utils/rmwait/fpmake.pp svneol=native#text/plain
 utils/rmwait/rmwait.pas svneol=native#text/plain
 utils/rstconv.pp svneol=native#text/plain
@@ -14664,6 +14719,7 @@ utils/svn2cvs/vers.pp svneol=native#text/plain
 utils/tply/COPYING.txt svneol=native#text/plain
 utils/tply/Makefile svneol=native#text/plain
 utils/tply/Makefile.fpc svneol=native#text/plain
+utils/tply/Makefile.fpc.fpcmake svneol=native#text/plain
 utils/tply/README.txt svneol=native#text/plain
 utils/tply/fpmake.pp svneol=native#text/plain
 utils/tply/lexbase.pas svneol=native#text/plain
@@ -14693,6 +14749,7 @@ utils/tply/yylex.cod svneol=native#text/plain
 utils/tply/yyparse.cod svneol=native#text/plain
 utils/unicode/Makefile svneol=native#text/plain
 utils/unicode/Makefile.fpc svneol=native#text/plain
+utils/unicode/Makefile.fpc.fpcmake svneol=native#text/plain
 utils/unicode/cldrhelper.pas svneol=native#text/pascal
 utils/unicode/cldrparser.lpi svneol=native#text/plain
 utils/unicode/cldrparser.lpr svneol=native#text/pascal

+ 8 - 1
Makefile

@@ -1,5 +1,5 @@
 #
-# Don't edit, this file is generated by FPCMake Version 2.0.0 [2013-04-25 rev 24324]
+# Don't edit, this file is generated by FPCMake Version 2.0.0 [2013/05/28]
 #
 default: help
 MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-haiku i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian i386-nativent i386-iphonesim i386-android m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded powerpc-wii powerpc-aix sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-netbsd x86_64-solaris x86_64-openbsd x86_64-darwin x86_64-win64 x86_64-embedded arm-linux arm-palmos arm-darwin arm-wince arm-gba arm-nds arm-embedded arm-symbian arm-android powerpc64-linux powerpc64-darwin powerpc64-embedded powerpc64-aix avr-embedded armeb-linux armeb-embedded mips-linux mipsel-linux jvm-java jvm-android i8086-msdos
@@ -436,7 +436,11 @@ export DIST_DESTDIR:=$(BASEDIR)
 endif
 BASEPACKDIR=$(BASEDIR)/basepack
 ifndef FPCMAKENEW
+ifdef CROSSCOMPILE
 FPCMAKENEW=$(BASEDIR)/utils/fpcm/fpcmake$(SRCEXEEXT)
+else
+FPCMAKENEW=$(BASEDIR)/utils/fpcm/bin/$(SOURCESUFFIX)/fpcmake$(SRCEXEEXT)
+endif
 endif
 CLEANOPTS=FPC=$(PPNEW)
 BUILDOPTS=FPC=$(PPNEW) FPCFPMAKE=$(FPCFPMAKENEW) RELEASE=1
@@ -2750,6 +2754,9 @@ endif
 buildbase: base.$(BUILDSTAMP)
 base.$(BUILDSTAMP):
 	$(MAKE) compiler_cycle RELEASE=1
+ifdef CROSSCOMPILE
+	$(MAKE) -C utils/fpcm bootstrap $(BUILDOPTS)
+endif
 	$(MAKE) rtl_clean $(CLEANOPTS)
 	$(MAKE) rtl_$(ALLTARGET) $(BUILDOPTS)
 	$(ECHOREDIR) Build > base.$(BUILDSTAMP)

+ 11 - 0
Makefile.fpc

@@ -169,7 +169,13 @@ BASEPACKDIR=$(BASEDIR)/basepack
 
 # Always use newly created fpcmake
 ifndef FPCMAKENEW
+ifdef CROSSCOMPILE
+# Use bootstrapped fpcmake when cross-compiling
 FPCMAKENEW=$(BASEDIR)/utils/fpcm/fpcmake$(SRCEXEEXT)
+else
+# Use normal fpcmake
+FPCMAKENEW=$(BASEDIR)/utils/fpcm/bin/$(SOURCESUFFIX)/fpcmake$(SRCEXEEXT)
+endif
 endif
 
 # Build/install options
@@ -316,6 +322,11 @@ buildbase: base.$(BUILDSTAMP)
 base.$(BUILDSTAMP):
 # create new compiler
         $(MAKE) compiler_cycle RELEASE=1
+ifdef CROSSCOMPILE
+# Buld a new native fpcmake when cross-compiling.
+# Fresh native compiler and RTL are ready at this stage.
+        $(MAKE) -C utils/fpcm bootstrap $(BUILDOPTS)
+endif
 # clean
         $(MAKE) rtl_clean $(CLEANOPTS)
 # build everything

+ 1 - 1
compiler/COPYING.txt

@@ -2,7 +2,7 @@
 		       Version 2, June 1991
 
  Copyright (C) 1989, 1991 Free Software Foundation, Inc.
-                       59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  Everyone is permitted to copy and distribute verbatim copies
  of this license document, but changing it is not allowed.
 

+ 5 - 0
compiler/aasmdata.pas

@@ -122,6 +122,7 @@ interface
     type
       TAsmList = class(tlinkedlist)
          constructor create;
+         constructor create_without_marker;
          function  empty : boolean;
          function  getlasttaifilepos : pfileposinfo;
       end;
@@ -288,6 +289,10 @@ implementation
         insert(tai_marker.create(mark_BlockStart));
       end;
 
+    constructor TAsmList.create_without_marker;
+      begin
+        inherited create;
+      end;
 
     function TAsmList.empty : boolean;
       begin

+ 58 - 2
compiler/aasmtai.pas

@@ -138,7 +138,9 @@ interface
           { for use by dwarf debugger information }
           aitconst_16bit_unaligned,
           aitconst_32bit_unaligned,
-          aitconst_64bit_unaligned
+          aitconst_64bit_unaligned,
+          { i8086 far pointer; emits: 'DW symbol, SEG symbol' }
+          aitconst_farptr
         );
 
     const
@@ -572,6 +574,8 @@ interface
           constructor Create_rel_sym(_typ:taiconst_type;_sym,_endsym:tasmsymbol);
           constructor Create_rva_sym(_sym:tasmsymbol);
           constructor Createname(const name:string;ofs:aint);
+          constructor Create_nil_codeptr;
+          constructor Create_nil_dataptr;
           constructor ppuload(t:taitype;ppufile:tcompilerppufile);override;
           procedure ppuwrite(ppufile:tcompilerppufile);override;
           procedure derefimpl;override;
@@ -1628,7 +1632,24 @@ implementation
       begin
          inherited Create;
          typ:=ait_const;
+{$ifdef i8086}
+         if assigned(_sym) and (_sym.typ=AT_DATA) then
+           begin
+             if current_settings.x86memorymodel in x86_far_data_models then
+               consttype:=aitconst_farptr
+             else
+               consttype:=aitconst_ptr;
+           end
+         else
+           begin
+             if current_settings.x86memorymodel in x86_far_code_models then
+               consttype:=aitconst_farptr
+             else
+               consttype:=aitconst_ptr;
+           end;
+{$else i8086}
          consttype:=aitconst_ptr;
+{$endif i8086}
          { sym is allowed to be nil, this is used to write nil pointers }
          sym:=_sym;
          endsym:=nil;
@@ -1664,6 +1685,40 @@ implementation
       end;
 
 
+    constructor tai_const.Create_nil_codeptr;
+      begin
+        inherited Create;
+        typ:=ait_const;
+{$ifdef i8086}
+        if current_settings.x86memorymodel in x86_far_code_models then
+          consttype:=aitconst_farptr
+        else
+{$endif i8086}
+          consttype:=aitconst_ptr;
+        sym:=nil;
+        endsym:=nil;
+        symofs:=0;
+        value:=0;
+      end;
+
+
+    constructor tai_const.Create_nil_dataptr;
+      begin
+        inherited Create;
+        typ:=ait_const;
+{$ifdef i8086}
+        if current_settings.x86memorymodel in x86_far_data_models then
+          consttype:=aitconst_farptr
+        else
+{$endif i8086}
+          consttype:=aitconst_ptr;
+        sym:=nil;
+        endsym:=nil;
+        symofs:=0;
+        value:=0;
+      end;
+
+
     constructor tai_const.ppuload(t:taitype;ppufile:tcompilerppufile);
       begin
         inherited ppuload(t,ppufile);
@@ -1693,7 +1748,8 @@ implementation
       begin
         getcopy:=inherited getcopy;
         { we need to increase the reference number }
-        sym.increfs;
+        if assigned(sym) then
+          sym.increfs;
         if assigned(endsym) then
           endsym.increfs;
       end;

+ 2 - 2
compiler/aoptbase.pas

@@ -171,11 +171,11 @@ unit aoptbase;
       Current := tai(Current.Next);
       While Assigned(Current) And
             ((Current.typ In SkipInstr) or
-{$ifdef SPARC}
+{$if defined(SPARC) or defined(MIPS)}
              ((Current.typ=ait_instruction) and
               (taicpu(Current).opcode=A_NOP)
              ) or
-{$endif SPARC}
+{$endif SPARC or MIPS}
              ((Current.typ = ait_label) And
               labelCanBeSkipped(Tai_Label(Current)))) Do
         Current := tai(Current.Next);

+ 73 - 52
compiler/aoptobj.pas

@@ -344,6 +344,18 @@ Unit AoptObj;
       verbose,
       procinfo;
 
+
+    function JumpTargetOp(ai: taicpu): poper; inline;
+      begin
+{$ifdef MIPS}
+        { MIPS branches can have 1,2 or 3 operands, target label is the last one. }
+        result:=ai.oper[ai.ops-1];
+{$else MIPS}
+        result:=ai.oper[0];
+{$endif MIPS}
+      end;
+
+
       { ************************************************************************* }
       { ******************************** TUsedRegs ****************************** }
       { ************************************************************************* }
@@ -1120,6 +1132,17 @@ Unit AoptObj;
       end;
 {$pop}
 
+    function IsJumpToLabel(hp: taicpu): boolean;
+      begin
+        result:=(hp.opcode=aopt_uncondjmp) and
+{$ifdef arm}
+          (hp.condition=c_None) and
+{$endif arm}
+          (JumpTargetOp(hp)^.typ = top_ref) and
+          (JumpTargetOp(hp)^.ref^.symbol is TAsmLabel);
+      end;
+
+
     function TAOptObj.GetFinalDestination(hp: taicpu; level: longint): boolean;
       {traces sucessive jumps to their final destination and sets it, e.g.
        je l1                je l3
@@ -1140,7 +1163,7 @@ Unit AoptObj;
         GetfinalDestination := false;
         if level > 20 then
           exit;
-        p1 := getlabelwithsym(tasmlabel(hp.oper[0]^.ref^.symbol));
+        p1 := getlabelwithsym(tasmlabel(JumpTargetOp(hp)^.ref^.symbol));
         if assigned(p1) then
           begin
             SkipLabels(p1,p1);
@@ -1148,14 +1171,12 @@ Unit AoptObj;
                (taicpu(p1).is_jmp) then
               if { the next instruction after the label where the jump hp arrives}
                  { is unconditional or of the same type as hp, so continue       }
-                 (((taicpu(p1).opcode = aopt_uncondjmp) and
-{$ifdef arm}
-                   (taicpu(p1).condition = C_None) and
-{$endif arm}
-                   (taicpu(p1).oper[0]^.typ = top_ref) and
-                   (assigned(taicpu(p1).oper[0]^.ref^.symbol)) and
-                   (taicpu(p1).oper[0]^.ref^.symbol is TAsmLabel)) or
-                  conditions_equal(taicpu(p1).condition,hp.condition)) or
+                 IsJumpToLabel(taicpu(p1))
+{$ifndef MIPS}
+{ for MIPS, it isn't enough to check the condition; first operands must be same, too. }
+                 or
+                 conditions_equal(taicpu(p1).condition,hp.condition) or
+
                  { the next instruction after the label where the jump hp arrives
                    is the opposite of hp (so this one is never taken), but after
                    that one there is a branch that will be taken, so perform a
@@ -1165,26 +1186,23 @@ Unit AoptObj;
                   SkipLabels(p1,p2) and
                   (p2.typ = ait_instruction) and
                   (taicpu(p2).is_jmp) and
-                  (((taicpu(p2).opcode = aopt_uncondjmp) and
-{$ifdef arm}
-                    (taicpu(p1).condition = C_None) and
-{$endif arm}
-                    (taicpu(p2).oper[0]^.typ = top_ref) and
-                    (assigned(taicpu(p2).oper[0]^.ref^.symbol)) and
-                    (taicpu(p2).oper[0]^.ref^.symbol is TAsmLabel)) or
+                   (IsJumpToLabel(taicpu(p2)) or
                    (conditions_equal(taicpu(p2).condition,hp.condition))) and
-                  SkipLabels(p1,p1)) then
+                  SkipLabels(p1,p1))
+{$endif MIPS}
+                 then
                 begin
                   { quick check for loops of the form "l5: ; jmp l5 }
-                  if (tasmlabel(taicpu(p1).oper[0]^.ref^.symbol).labelnr =
-                       tasmlabel(hp.oper[0]^.ref^.symbol).labelnr) then
+                  if (tasmlabel(JumpTargetOp(taicpu(p1))^.ref^.symbol).labelnr =
+                       tasmlabel(JumpTargetOp(hp)^.ref^.symbol).labelnr) then
                     exit;
                   if not GetFinalDestination(taicpu(p1),succ(level)) then
                     exit;
-                  tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
-                  hp.oper[0]^.ref^.symbol:=taicpu(p1).oper[0]^.ref^.symbol;
-                  tasmlabel(hp.oper[0]^.ref^.symbol).increfs;
+                  tasmlabel(JumpTargetOp(hp)^.ref^.symbol).decrefs;
+                  JumpTargetOp(hp)^.ref^.symbol:=JumpTargetOp(taicpu(p1))^.ref^.symbol;
+                  tasmlabel(JumpTargetOp(hp)^.ref^.symbol).increfs;
                 end
+{$ifndef MIPS}
               else
                 if conditions_equal(taicpu(p1).condition,inverse_cond(hp.condition)) then
                   if not FindAnyLabel(p1,l) then
@@ -1195,8 +1213,8 @@ Unit AoptObj;
       {$endif finaldestdebug}
                       current_asmdata.getjumplabel(l);
                       insertllitem(p1,p1.next,tai_label.Create(l));
-                      tasmlabel(taicpu(hp).oper[0]^.ref^.symbol).decrefs;
-                      hp.oper[0]^.ref^.symbol := l;
+                      tasmlabel(JumpTargetOp(hp)^.ref^.symbol).decrefs;
+                      JumpTargetOp(hp)^.ref^.symbol := l;
                       l.increfs;
       {               this won't work, since the new label isn't in the labeltable }
       {               so it will fail the rangecheck. Labeltable should become a   }
@@ -1210,11 +1228,12 @@ Unit AoptObj;
                         strpnew('next label reused'))));
       {$endif finaldestdebug}
                       l.increfs;
-                      tasmlabel(hp.oper[0]^.ref^.symbol).decrefs;
-                      hp.oper[0]^.ref^.symbol := l;
+                      tasmlabel(JumpTargetOp(hp)^.ref^.symbol).decrefs;
+                      JumpTargetOp(hp)^.ref^.symbol := l;
                       if not GetFinalDestination(hp,succ(level)) then
                         exit;
                     end;
+{$endif not MIPS}
           end;
         GetFinalDestination := true;
       end;
@@ -1255,13 +1274,7 @@ Unit AoptObj;
                       { the following if-block removes all code between a jmp and the next label,
                         because it can never be executed
                       }
-                      if (taicpu(p).opcode = aopt_uncondjmp) and
-{$ifdef arm}
-                         (taicpu(p).condition = C_None) and
-{$endif arm}
-                         (taicpu(p).oper[0]^.typ = top_ref) and
-                         (assigned(taicpu(p).oper[0]^.ref^.symbol)) and
-                         (taicpu(p).oper[0]^.ref^.symbol is TAsmLabel) then
+                      if IsJumpToLabel(taicpu(p)) then
                         begin
                           hp2:=p;
                           while GetNextInstruction(hp2, hp1) and
@@ -1270,10 +1283,9 @@ Unit AoptObj;
                               begin
                                 if (hp1.typ = ait_instruction) and
                                    taicpu(hp1).is_jmp and
-                                   (taicpu(hp1).oper[0]^.typ = top_ref) and
-                                   assigned(taicpu(hp1).oper[0]^.ref^.symbol) and
-                                   (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) then
-                                   TAsmLabel(taicpu(hp1).oper[0]^.ref^.symbol).decrefs;
+                                   (JumpTargetOp(taicpu(hp1))^.typ = top_ref) and
+                                   (JumpTargetOp(taicpu(hp1))^.ref^.symbol is TAsmLabel) then
+                                   TAsmLabel(JumpTargetOp(taicpu(hp1))^.ref^.symbol).decrefs;
                                 { don't kill start/end of assembler block,
                                   no-line-info-start/end etc }
                                 if hp1.typ<>ait_marker then
@@ -1289,13 +1301,18 @@ Unit AoptObj;
                       { remove jumps to a label coming right after them }
                       if GetNextInstruction(p, hp1) then
                         begin
-                          if FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp1) and
+                          if FindLabel(tasmlabel(JumpTargetOp(taicpu(p))^.ref^.symbol), hp1) and
         { TODO: FIXME removing the first instruction fails}
                               (p<>blockstart) then
                             begin
+                              tasmlabel(JumpTargetOp(taicpu(p))^.ref^.symbol).decrefs;
+{$if defined(SPARC) or defined(MIPS)}
+                              hp2:=tai(p.next);
+                              asml.remove(hp2);
+                              hp2.free;
+{$endif SPARC or MIPS}
                               hp2:=tai(hp1.next);
                               asml.remove(p);
-                              tasmlabel(taicpu(p).oper[0]^.ref^.symbol).decrefs;
                               p.free;
                               p:=hp2;
                               continue;
@@ -1305,15 +1322,9 @@ Unit AoptObj;
                               if hp1.typ = ait_label then
                                 SkipLabels(hp1,hp1);
                               if (tai(hp1).typ=ait_instruction) and
-                                  (taicpu(hp1).opcode=aopt_uncondjmp) and
-{$ifdef arm}
-                                  (taicpu(hp1).condition=C_None) and
-{$endif arm}
-                                  (taicpu(hp1).oper[0]^.typ = top_ref) and
-                                  (assigned(taicpu(hp1).oper[0]^.ref^.symbol)) and
-                                  (taicpu(hp1).oper[0]^.ref^.symbol is TAsmLabel) and
+                                  IsJumpToLabel(taicpu(hp1)) and
                                   GetNextInstruction(hp1, hp2) and
-                                  FindLabel(tasmlabel(taicpu(p).oper[0]^.ref^.symbol), hp2) then
+                                  FindLabel(tasmlabel(JumpTargetOp(taicpu(p))^.ref^.symbol), hp2) then
                                 begin
                                   if (taicpu(p).opcode=aopt_condjmp)
 {$ifdef arm}
@@ -1323,17 +1334,27 @@ Unit AoptObj;
                                     begin
                                       taicpu(p).condition:=inverse_cond(taicpu(p).condition);
                                       tai_label(hp2).labsym.decrefs;
-                                      taicpu(p).oper[0]^.ref^.symbol:=taicpu(hp1).oper[0]^.ref^.symbol;
+                                      JumpTargetOp(taicpu(p))^.ref^.symbol:=JumpTargetOp(taicpu(hp1))^.ref^.symbol;
                                       { when freeing hp1, the reference count
                                         isn't decreased, so don't increase
 
                                        taicpu(p).oper[0]^.ref^.symbol.increfs;
                                       }
-{$ifdef SPARC}
+{$if defined(SPARC) or defined(MIPS)}
+                                      { Remove delay slot. Initially is is placed immediately after
+                                        branch, but RA can insert regallocs in between. }
                                       hp2:=tai(hp1.next);
-                                      asml.remove(hp2);
-                                      hp2.free;
-{$endif SPARC}
+                                      while assigned(hp2) and (hp2.typ in SkipInstr) do
+                                        hp2:=tai(hp2.next);
+                                      if assigned(hp2) and (hp2.typ=ait_instruction) and
+                                         (taicpu(hp2).opcode=A_NOP) then
+                                        begin
+                                          asml.remove(hp2);
+                                          hp2.free;
+                                        end
+                                      else
+                                        InternalError(2013070301);
+{$endif SPARC or MIPS}
                                       asml.remove(hp1);
                                       hp1.free;
                                       GetFinalDestination(taicpu(p),0);

+ 5 - 1
compiler/arm/aasmcpu.pas

@@ -1092,7 +1092,6 @@ implementation
                     curtai:=tai(curtai.next);
 
                 doinsert:=false;
-                hp:=tai(curtai.next);
                 current_asmdata.getjumplabel(l);
 
                 { align thumb in thumb .text section to 4 bytes }
@@ -1112,6 +1111,11 @@ implementation
                     hp2:=tai(hp2.next);
                   end;
 
+                { continue with the last inserted label because we use later
+                  on SimpleGetNextInstruction, so if we used curtai.next (which
+                  is then equal curdata.last.previous) we could over see one
+                  instruction }
+                hp:=tai(curdata.Last);
                 list.insertlistafter(curtai,curdata);
                 curtai:=hp;
               end

+ 112 - 5
compiler/arm/aoptcpu.pas

@@ -57,6 +57,7 @@ Type
   private
    function SkipEntryExitMarker(current: tai; var next: tai): boolean;
   protected
+    function LookForPreindexedPattern(p: taicpu): boolean;
     function LookForPostindexedPattern(p: taicpu): boolean;
   End;
 
@@ -405,6 +406,60 @@ Implementation
         end;
     end;
 
+  {
+    optimize
+      add/sub reg1,reg1,regY/const
+      ...
+      ldr/str regX,[reg1]
+
+      into
+
+      ldr/str regX,[reg1, regY/const]!
+  }
+  function TCpuAsmOptimizer.LookForPreindexedPattern(p: taicpu): boolean;
+    var
+      hp1: tai;
+    begin
+      if (p.ops=3) and
+        MatchOperand(p.oper[0]^, p.oper[1]^.reg) and
+        GetNextInstructionUsingReg(p, hp1, p.oper[0]^.reg) and
+        (not RegModifiedBetween(p.oper[0]^.reg, p, hp1)) and
+        MatchInstruction(hp1, [A_LDR,A_STR], [C_None], [PF_None,PF_B,PF_H,PF_SH,PF_SB]) and
+        (taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
+        (taicpu(hp1).oper[1]^.ref^.base=p.oper[0]^.reg) and
+        (taicpu(hp1).oper[0]^.reg<>p.oper[0]^.reg) and
+        (taicpu(hp1).oper[1]^.ref^.offset=0) and
+        (taicpu(hp1).oper[1]^.ref^.index=NR_NO) and
+        (((p.oper[2]^.typ=top_reg) and
+          (not RegModifiedBetween(p.oper[2]^.reg, p, hp1))) or
+         ((p.oper[2]^.typ=top_const) and
+          ((abs(p.oper[2]^.val) < 256) or
+           ((abs(p.oper[2]^.val) < 4096) and
+            (taicpu(hp1).oppostfix in [PF_None,PF_B]))))) then
+        begin
+          taicpu(hp1).oper[1]^.ref^.addressmode:=AM_PREINDEXED;
+
+          if p.oper[2]^.typ=top_reg then
+            begin
+              taicpu(hp1).oper[1]^.ref^.index:=p.oper[2]^.reg;
+              if p.opcode=A_ADD then
+                taicpu(hp1).oper[1]^.ref^.signindex:=1
+              else
+                taicpu(hp1).oper[1]^.ref^.signindex:=-1;
+            end
+          else
+            begin
+              if p.opcode=A_ADD then
+                taicpu(hp1).oper[1]^.ref^.offset:=p.oper[2]^.val
+              else
+                taicpu(hp1).oper[1]^.ref^.offset:=-p.oper[2]^.val;
+            end;
+
+          result:=true;
+        end
+      else
+        result:=false;
+    end;
 
   {
     optimize
@@ -443,7 +498,8 @@ Implementation
         not(RegModifiedBetween(taicpu(hp1).oper[0]^.reg,p,hp1)) and
         { don't apply the optimization if the (new) index register is loaded }
         (p.oper[0]^.reg<>taicpu(hp1).oper[2]^.reg) and
-        not(RegModifiedBetween(taicpu(hp1).oper[2]^.reg,p,hp1)) then
+        not(RegModifiedBetween(taicpu(hp1).oper[2]^.reg,p,hp1)) and
+        not(current_settings.cputype in cpu_thumb) then
         begin
           DebugMsg('Peephole Str/LdrAdd/Sub2Str/Ldr Postindex done', p);
           p.oper[1]^.ref^.addressmode:=AM_POSTINDEXED;
@@ -669,6 +725,30 @@ Implementation
                           end;
                       end;
 
+                    {
+                      Change
+
+                        ldrb dst1, [REF]
+                        and  dst2, dst1, #255
+
+                      into
+
+                        ldrb dst2, [ref]
+                    }
+                    if (taicpu(p).oppostfix=PF_B) and
+                       GetNextInstructionUsingReg(p, hp1, taicpu(p).oper[0]^.reg) and
+                       MatchInstruction(hp1, A_AND, [taicpu(p).condition], [PF_NONE]) and
+                       (taicpu(hp1).oper[1]^.reg = taicpu(p).oper[0]^.reg) and
+                       (taicpu(hp1).oper[2]^.typ = top_const) and
+                       (taicpu(hp1).oper[2]^.val = $FF) and
+                       not(RegUsedBetween(taicpu(hp1).oper[0]^.reg, p, hp1)) and
+                       RegEndOfLife(taicpu(p).oper[0]^.reg, taicpu(hp1)) then
+                       begin
+                         DebugMsg('Peephole LdrbAnd2Ldrb done', p);
+                         taicpu(p).oper[0]^.reg := taicpu(hp1).oper[0]^.reg;
+                         asml.remove(hp1);
+                         hp1.free;
+                       end;
                     LookForPostindexedPattern(taicpu(p));
                     { Remove superfluous mov after ldr
                       changes
@@ -1149,18 +1229,35 @@ Implementation
                        {Only LDR, LDRB, STR, STRB can handle scaled register indexing}
                        MatchInstruction(hp1, [A_LDR, A_STR], [taicpu(p).condition],
                                              [PF_None, PF_B]) and
-                       (taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) and
-                       (taicpu(hp1).oper[1]^.ref^.base <> taicpu(p).oper[0]^.reg) and
+                       (
+                         {If this is address by offset, one of the two registers can be used}
+                         ((taicpu(hp1).oper[1]^.ref^.addressmode=AM_OFFSET) and
+                           (
+                             (taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) xor
+                             (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg)
+                           )
+                         ) or
+                         {For post and preindexed only the index register can be used}
+                         ((taicpu(hp1).oper[1]^.ref^.addressmode in [AM_POSTINDEXED, AM_PREINDEXED]) and
+                           (
+                             (taicpu(hp1).oper[1]^.ref^.index = taicpu(p).oper[0]^.reg) and
+                             (taicpu(hp1).oper[1]^.ref^.base <> taicpu(p).oper[0]^.reg)
+                           )
+                         )
+                       ) and
                        { Only fold if there isn't another shifterop already. }
                        (taicpu(hp1).oper[1]^.ref^.shiftmode = SM_None) and
                        not(RegModifiedBetween(taicpu(p).oper[1]^.reg,p,hp1)) and
                        (assigned(FindRegDealloc(taicpu(p).oper[0]^.reg,tai(hp1.Next))) or
                          regLoadedWithNewValue(taicpu(p).oper[0]^.reg, hp1)) then
                        begin
-                         DebugMsg('Peephole FoldShiftLdrStr done', hp1);
+                         { If the register we want to do the shift for resides in base, we need to swap that}
+                         if (taicpu(hp1).oper[1]^.ref^.base = taicpu(p).oper[0]^.reg) then
+                           taicpu(hp1).oper[1]^.ref^.base := taicpu(hp1).oper[1]^.ref^.index;
                          taicpu(hp1).oper[1]^.ref^.index := taicpu(p).oper[1]^.reg;
                          taicpu(hp1).oper[1]^.ref^.shiftmode := taicpu(p).oper[2]^.shifterop^.shiftmode;
                          taicpu(hp1).oper[1]^.ref^.shiftimm := taicpu(p).oper[2]^.shifterop^.shiftimm;
+                         DebugMsg('Peephole FoldShiftLdrStr done', hp1);
                          asml.remove(p);
                          p.free;
                          p:=hp1;
@@ -1425,6 +1522,16 @@ Implementation
                         if (taicpu(p).ops=3) then
                           RemoveSuperfluousMove(p, hp1, 'DataMov2Data');
                       end;
+
+                    if MatchInstruction(p, [A_ADD,A_SUB], [C_None], [PF_None]) and
+                      LookForPreindexedPattern(taicpu(p)) then
+                      begin
+                        GetNextInstruction(p,hp1);
+                        DebugMsg('Peephole Add/Sub to Preindexed done', p);
+                        asml.remove(p);
+                        p.free;
+                        p:=hp1;
+                      end;
                   end;
 {$ifdef dummy}                  
                 A_MVN:
@@ -2039,7 +2146,7 @@ Implementation
     begin
       result:=true;
 
-      list:=TAsmList.Create;
+      list:=TAsmList.create_without_marker;
       p:=BlockStart;
       while p<>BlockEnd Do
         begin

+ 11 - 7
compiler/arm/cgcpu.pas

@@ -3942,7 +3942,7 @@ unit cgcpu;
               it saves us a register }
 {$ifdef DUMMY}
             else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a,l1) and not(cgsetflags or setflags) then
-              a_op_const_reg_reg(list,OP_SHL,size,l1,src,dst)
+              a_op_const_reg_reg(list,OP_SHL,size,l1,dst,dst)
             { for example : b=a*5 -> b=a*4+a with add instruction and shl }
             else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a-1,l1) and not(cgsetflags or setflags) then
               begin
@@ -3951,7 +3951,7 @@ unit cgcpu;
                 shifterop_reset(so);
                 so.shiftmode:=SM_LSL;
                 so.shiftimm:=l1;
-                list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,src,src,so));
+                list.concat(taicpu.op_reg_reg_reg_shifterop(A_ADD,dst,dst,dst,so));
               end
             { for example : b=a*7 -> b=a*8-a with rsb instruction and shl }
             else if (op in [OP_MUL,OP_IMUL]) and ispowerof2(a+1,l1) and not(cgsetflags or setflags) then
@@ -3961,9 +3961,9 @@ unit cgcpu;
                 shifterop_reset(so);
                 so.shiftmode:=SM_LSL;
                 so.shiftimm:=l1;
-                list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,src,src,so));
+                list.concat(taicpu.op_reg_reg_reg_shifterop(A_RSB,dst,dst,dst,so));
               end
-            else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,src,dst) then
+            else if (op in [OP_MUL,OP_IMUL]) and not(cgsetflags or setflags) and try_optimized_mul32_const_reg_reg(list,a,dst,dst) then
               begin
                 { nothing to do on success }
               end
@@ -3979,20 +3979,24 @@ unit cgcpu;
               broader range of shifterconstants.}
 {$ifdef DUMMY}
             else if (op = OP_AND) and is_shifter_const(not(dword(a)),shift) then
-              list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,not(dword(a))))
+              list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,not(dword(a))))
             else if (op = OP_AND) and split_into_shifter_const(not(dword(a)), imm1, imm2) then
               begin
-                list.concat(taicpu.op_reg_reg_const(A_BIC,dst,src,imm1));
+                list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm1));
                 list.concat(taicpu.op_reg_reg_const(A_BIC,dst,dst,imm2));
               end
             else if (op in [OP_ADD, OP_SUB, OP_OR]) and
                     not(cgsetflags or setflags) and
                     split_into_shifter_const(a, imm1, imm2) then
               begin
-                list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,src,imm1));
+                list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm1));
                 list.concat(taicpu.op_reg_reg_const(op_reg_reg_opcg2asmop[op],dst,dst,imm2));
               end
 {$endif DUMMY}
+            else if (op in [OP_SHL, OP_SHR, OP_SAR, OP_ROR]) then
+              begin
+                list.concat(taicpu.op_reg_reg_const(op_reg_opcg2asmop[op],dst,dst,a));
+              end
             else
               begin
                 tmpreg:=getintregister(list,size);

+ 46 - 11
compiler/arm/cpupara.pas

@@ -81,20 +81,25 @@ unit cpupara;
     procedure tarmparamanager.getintparaloc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);
       var
         paraloc : pcgparalocation;
-        def : tdef;
+        psym : tparavarsym;
+        pdef : tdef;
       begin
         if nr<1 then
           internalerror(2002070801);
-        def:=tparavarsym(pd.paras[nr-1]).vardef;
+        psym:=tparavarsym(pd.paras[nr-1]);
+        pdef:=psym.vardef;
+        if push_addr_param(psym.varspez,pdef,pd.proccalloption) then
+          pdef:=getpointerdef(pdef);
         cgpara.reset;
-        cgpara.size:=def_cgsize(def);
+        cgpara.size:=def_cgsize(pdef);
         cgpara.intsize:=tcgsize2size[cgpara.size];
         cgpara.alignment:=std_param_align;
-        cgpara.def:=def;
+        cgpara.def:=pdef;
         paraloc:=cgpara.add_location;
         with paraloc^ do
           begin
-            size:=OS_INT;
+            def:=pdef;
+            size:=def_cgsize(pdef);
             { the four first parameters are passed into registers }
             if nr<=4 then
               begin
@@ -362,6 +367,7 @@ unit cpupara;
                 paraloc^.loc:=LOC_REGISTER;
                 paraloc^.register:=NR_R0;
                 paraloc^.size:=OS_ADDR;
+                paraloc^.def:=voidpointertype;
                 break;
               end;
 
@@ -413,16 +419,28 @@ unit cpupara;
                  if (loc=LOC_REGISTER) and (paracgsize in [OS_F32,OS_F64,OS_F80]) then
                    case paracgsize of
                      OS_F32:
-                       paraloc^.size:=OS_32;
+                       begin
+                         paraloc^.size:=OS_32;
+                         paraloc^.def:=u32inttype;
+                       end;
                      OS_F64:
-                       paraloc^.size:=OS_32;
+                       begin
+                         paraloc^.size:=OS_32;
+                         paraloc^.def:=u32inttype;
+                       end;
                      else
                        internalerror(2005082901);
                    end
                  else if (paracgsize in [OS_NO,OS_64,OS_S64]) then
-                   paraloc^.size := OS_32
+                   begin
+                     paraloc^.size:=OS_32;
+                     paraloc^.def:=u32inttype;
+                   end
                  else
-                   paraloc^.size:=paracgsize;
+                   begin
+                     paraloc^.size:=paracgsize;
+                     paraloc^.def:=get_paraloc_def(paradef,paralen,firstparaloc);
+                   end;
                  case loc of
                     LOC_REGISTER:
                       begin
@@ -449,6 +467,7 @@ unit cpupara;
                             { LOC_REFERENCE always contains everything that's left }
                             paraloc^.loc:=LOC_REFERENCE;
                             paraloc^.size:=int_cgsize(paralen);
+                            paraloc^.def:=getarraydef(u8inttype,paralen);
                             if (side=callerside) then
                               paraloc^.reference.index:=NR_STACK_POINTER_REG;
                             paraloc^.reference.offset:=stack_offset;
@@ -522,6 +541,7 @@ unit cpupara;
                             { LOC_REFERENCE always contains everything that's left }
                             paraloc^.loc:=LOC_REFERENCE;
                             paraloc^.size:=int_cgsize(paralen);
+                            paraloc^.def:=getarraydef(u8inttype,paralen);
                             if (side=callerside) then
                               paraloc^.reference.index:=NR_STACK_POINTER_REG;
                             paraloc^.reference.offset:=stack_offset;
@@ -534,6 +554,7 @@ unit cpupara;
                         if push_addr_param(hp.varspez,paradef,p.proccalloption) then
                           begin
                             paraloc^.size:=OS_ADDR;
+                            paraloc^.def:=getpointerdef(paradef);
                             assignintreg
                           end
                         else
@@ -545,6 +566,7 @@ unit cpupara;
                               stack_offset:=align(stack_offset,8);
 
                              paraloc^.size:=paracgsize;
+                             paraloc^.def:=paradef;
                              paraloc^.loc:=LOC_REFERENCE;
                              paraloc^.reference.index:=NR_STACK_POINTER_REG;
                              paraloc^.reference.offset:=stack_offset;
@@ -621,6 +643,7 @@ unit cpupara;
                     internalerror(2012032501);
                 end;
                 paraloc^.size:=retcgsize;
+                paraloc^.def:=result.def;
               end
             else if (p.proccalloption in [pocall_softfloat]) or
                (cs_fp_emulation in current_settings.moduleswitches) or
@@ -636,6 +659,7 @@ unit cpupara;
                       else
                         paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG;
                       paraloc^.size:=OS_32;
+                      paraloc^.def:=u32inttype;
                       paraloc:=result.add_location;
                       paraloc^.loc:=LOC_REGISTER;
                       if target_info.endian = endian_big then
@@ -643,6 +667,7 @@ unit cpupara;
                       else
                         paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG;
                       paraloc^.size:=OS_32;
+                      paraloc^.def:=u32inttype;
                     end;
                   OS_32,
                   OS_F32:
@@ -650,6 +675,7 @@ unit cpupara;
                       paraloc^.loc:=LOC_REGISTER;
                       paraloc^.register:=NR_FUNCTION_RETURN_REG;
                       paraloc^.size:=OS_32;
+                      paraloc^.def:=u32inttype;
                     end;
                   else
                     internalerror(2005082603);
@@ -660,6 +686,7 @@ unit cpupara;
                 paraloc^.loc:=LOC_FPUREGISTER;
                 paraloc^.register:=NR_FPU_RESULT_REG;
                 paraloc^.size:=retcgsize;
+                paraloc^.def:=result.def;
               end;
           end
           { Return in register }
@@ -673,6 +700,7 @@ unit cpupara;
                 else
                   paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG;
                 paraloc^.size:=OS_32;
+                paraloc^.def:=u32inttype;
                 paraloc:=result.add_location;
                 paraloc^.loc:=LOC_REGISTER;
                 if target_info.endian = endian_big then
@@ -680,15 +708,22 @@ unit cpupara;
                 else
                   paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG;
                 paraloc^.size:=OS_32;
+                paraloc^.def:=u32inttype;
               end
             else
               begin
                 paraloc^.loc:=LOC_REGISTER;
                 paraloc^.register:=NR_FUNCTION_RETURN_REG;
                 if (result.intsize<>3) then
-                  paraloc^.size:=retcgsize
+                  begin
+                    paraloc^.size:=retcgsize;
+                    paraloc^.def:=result.def;
+                  end
                 else
-                  paraloc^.size:=OS_32;
+                  begin
+                    paraloc^.size:=OS_32;
+                    paraloc^.def:=u32inttype;
+                  end;
               end;
           end;
       end;

+ 12 - 25
compiler/arm/narmadd.pas

@@ -139,13 +139,10 @@ interface
               { force fpureg as location, left right doesn't matter
                 as both will be in a fpureg }
               location_force_fpureg(current_asmdata.CurrAsmList,left.location,true);
-              location_force_fpureg(current_asmdata.CurrAsmList,right.location,(left.location.loc<>LOC_CFPUREGISTER));
+              location_force_fpureg(current_asmdata.CurrAsmList,right.location,true);
 
               location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
-              if left.location.loc<>LOC_CFPUREGISTER then
-                location.register:=left.location.register
-              else
-                location.register:=right.location.register;
+              location.register:=cg.getfpuregister(current_asmdata.CurrAsmList,location.size);
 
               case nodetype of
                 addn :
@@ -170,16 +167,11 @@ interface
             begin
               { force mmreg as location, left right doesn't matter
                 as both will be in a fpureg }
-              location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
-              location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,true);
+              hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+              hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
 
               location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
-              if left.location.loc<>LOC_CMMREGISTER then
-                location.register:=left.location.register
-              else if right.location.loc<>LOC_CMMREGISTER then
-                location.register:=right.location.register
-              else
-                location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+              location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
 
               singleprec:=tfloatdef(left.resultdef).floattype=s32real;
               case nodetype of
@@ -214,16 +206,11 @@ interface
             begin
               { force mmreg as location, left right doesn't matter
                 as both will be in a fpureg }
-              location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
-              location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,true);
+              hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+              hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
 
               location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
-              if left.location.loc<>LOC_CMMREGISTER then
-                location.register:=left.location.register
-              else if right.location.loc<>LOC_CMMREGISTER then
-                location.register:=right.location.register
-              else
-                location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
+              location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
 
               case nodetype of
                 addn :
@@ -284,8 +271,8 @@ interface
           fpu_vfpv3,
           fpu_vfpv3_d16:
             begin
-              location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
-              location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,true);
+              hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+              hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
 
               if (tfloatdef(left.resultdef).floattype=s32real) then
                 if nodetype in [equaln,unequaln] then
@@ -303,8 +290,8 @@ interface
             end;
           fpu_fpv4_s16:
             begin
-              location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
-              location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,true);
+              hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
+              hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
 
               if nodetype in [equaln,unequaln] then
                 op:=A_VCMP

+ 2 - 2
compiler/arm/narmcnv.pas

@@ -246,7 +246,7 @@ implementation
             begin
               location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
               signed:=left.location.size=OS_S32;
-              location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
+              hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
               if (left.location.size<>OS_F32) then
                 internalerror(2009112703);
               if left.location.size<>location.size then
@@ -260,7 +260,7 @@ implementation
             begin
               location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
               signed:=left.location.size=OS_S32;
-              location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
+              hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
               if (left.location.size<>OS_F32) then
                 internalerror(2009112703);
               if left.location.size<>location.size then

+ 1 - 1
compiler/arm/narminl.pas

@@ -88,7 +88,7 @@ implementation
           fpu_vfpv3_d16,
           fpu_fpv4_s16:
             begin
-              location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
+              hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
               location_copy(location,left.location);
               if left.location.loc=LOC_CMMREGISTER then
                 begin

+ 18 - 4
compiler/arm/narmmat.pas

@@ -58,7 +58,7 @@ implementation
       symtype,symconst,symtable,
       cgbase,cgobj,hlcgobj,cgutils,
       pass_2,procinfo,
-      ncon,ncnv,ncal,
+      ncon,ncnv,ncal,ninl,
       cpubase,cpuinfo,
       ncgutil,
       nadd,pass_1,symdef;
@@ -99,6 +99,17 @@ implementation
               end;
             left:=nil;
           end
+        else if (nodetype=modn) and
+          (is_signed(left.resultdef)) and
+          (right.nodetype=ordconstn) and
+          (tordconstnode(right).value=2) then
+          begin
+            // result:=(0-(left and 1)) and (1+(sarlongint(left,31) shl 1))
+            result:=caddnode.create(andn,caddnode.create(subn,cordconstnode.create(0,sinttype,false),caddnode.create(andn,left,cordconstnode.create(1,sinttype,false))),
+                                         caddnode.create(addn,cordconstnode.create(1,sinttype,false),
+                                                              cshlshrnode.create(shln,cinlinenode.create(in_sar_x_y,false,ccallparanode.create(cordconstnode.create(31,sinttype,false),ccallparanode.Create(left.getcopy,nil))),cordconstnode.create(1,sinttype,false))));
+            left:=nil;
+          end
         else
           result:=inherited first_moddivint;
       end;
@@ -135,7 +146,10 @@ implementation
                  begin
                     helper1:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
                     helper2:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
-                    cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,OS_INT,31,numerator,helper1);
+                    if power = 1 then
+                      cg.a_load_reg_reg(current_asmdata.CurrAsmList,OS_INT,OS_INT,numerator,helper1)
+                    else
+                      cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,OS_INT,31,numerator,helper1);
                     if current_settings.cputype in cpu_thumb then
                       begin
                         cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,OS_INT,32-power,helper1);
@@ -390,7 +404,7 @@ implementation
           fpu_vfpv3,
           fpu_vfpv3_d16:
             begin
-              location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
+              hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
               location:=left.location;
               if (left.location.loc=LOC_CMMREGISTER) then
                 location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
@@ -403,7 +417,7 @@ implementation
             end;
           fpu_fpv4_s16:
             begin
-              location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,true);
+              hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
               location:=left.location;
               if (left.location.loc=LOC_CMMREGISTER) then
                 location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);

+ 21 - 0
compiler/arm/narmset.pas

@@ -141,6 +141,7 @@ implementation
     procedure tarmcasenode.genjumptable(hp : pcaselabel;min_,max_ : aint);
       var
         last : TConstExprInt;
+        basereg,
         indexreg : tregister;
         href : treference;
         tablelabel: TAsmLabel;
@@ -208,6 +209,26 @@ implementation
             last:=min_;
             genitem_thumb2(current_asmdata.CurrAsmList,hp);
           end
+        else if current_settings.cputype in cpu_thumb then
+          begin
+            cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SUB,OS_ADDR,min_+1,indexreg,indexreg);
+            current_asmdata.getaddrlabel(tablelabel);
+
+            cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHL,OS_ADDR,2,indexreg);
+
+            basereg:=cg.getintregister(current_asmdata.CurrAsmList, OS_ADDR);
+            reference_reset_symbol(href,tablelabel,0,4);
+            cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList, href, basereg);
+
+            cg.a_op_reg_reg(current_asmdata.CurrAsmList, OP_ADD, OS_ADDr, indexreg, basereg);
+
+            current_asmdata.CurrAsmList.Concat(taicpu.op_reg(A_BX, basereg));
+
+            cg.a_label(current_asmdata.CurrAsmList,tablelabel);
+            { generate jump table }
+            last:=min_;
+            genitem(current_asmdata.CurrAsmList,hp);
+          end
         else
           begin
             { adjust index }

+ 45 - 13
compiler/avr/cpupara.pas

@@ -71,20 +71,25 @@ unit cpupara;
     procedure tavrparamanager.getintparaloc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);
       var
         paraloc : pcgparalocation;
-        def : tdef;
+        psym: tparavarsym;
+        pdef: tdef;
       begin
         if nr<1 then
           internalerror(2002070801);
-        def:=tparavarsym(pd.paras[nr-1]).vardef;
+        psym:=tparavarsym(pd.paras[nr-1]);
+        pdef:=psym.vardef;
+        if push_addr_param(psym.varspez,pdef,pd.proccalloption) then
+          pdef:=getpointerdef(pdef);
         cgpara.reset;
-        cgpara.size:=def_cgsize(def);
+        cgpara.size:=def_cgsize(pdef);
         cgpara.intsize:=tcgsize2size[cgpara.size];
         cgpara.alignment:=std_param_align;
-        cgpara.def:=def;
+        cgpara.def:=pdef;
         paraloc:=cgpara.add_location;
         with paraloc^ do
           begin
-            size:=OS_INT;
+            size:=def_cgsize(pdef);
+            def:=pdef;
             { the four first parameters are passed into registers }
             if nr<=9 then
               begin
@@ -225,6 +230,7 @@ unit cpupara;
         paracgsize   : tcgsize;
         paralen : longint;
         i : integer;
+        firstparaloc: boolean;
 
       procedure assignintreg;
         begin
@@ -272,6 +278,7 @@ unit cpupara;
                 paraloc^.loc:=LOC_REGISTER;
                 paraloc^.register:=NR_R25;
                 paraloc^.size:=OS_ADDR;
+                paraloc^.def:=voidpointertype;
                 break;
               end;
 
@@ -315,6 +322,7 @@ unit cpupara;
              if paralen=0 then
                internalerror(200410311);
 {$endif EXTDEBUG}
+             firstparaloc:=true;
              while paralen>0 do
                begin
                  paraloc:=hp.paraloc[side].add_location;
@@ -322,16 +330,28 @@ unit cpupara;
                  if (loc=LOC_REGISTER) and (paracgsize in [OS_F32,OS_F64,OS_F80]) then
                    case paracgsize of
                      OS_F32:
-                       paraloc^.size:=OS_32;
+                       begin
+                         paraloc^.size:=OS_32;
+                         paraloc^.def:=u32inttype;
+                       end;
                      OS_F64:
-                       paraloc^.size:=OS_32;
+                       begin
+                         paraloc^.size:=OS_32;
+                         paraloc^.def:=u32inttype;
+                       end;
                      else
                        internalerror(2005082901);
                    end
                  else if paracgsize<>OS_S8 then
-                   paraloc^.size := OS_8
+                   begin
+                     paraloc^.size:=OS_8;
+                     paraloc^.def:=u8inttype
+                   end
                  else
-                   paraloc^.size:=paracgsize;
+                   begin
+                     paraloc^.size:=paracgsize;
+                     paraloc^.def:=paradef;
+                   end;
                  case loc of
                     LOC_REGISTER:
                       begin
@@ -346,6 +366,8 @@ unit cpupara;
                             { LOC_REFERENCE covers always the overleft }
                             paraloc^.loc:=LOC_REFERENCE;
                             paraloc^.size:=int_cgsize(paralen);
+                            paraloc^.def:=get_paraloc_def(paradef,paralen,firstparaloc);
+
                             if (side=callerside) then
                               paraloc^.reference.index:=NR_STACK_POINTER_REG;
                             paraloc^.reference.offset:=stack_offset;
@@ -356,12 +378,14 @@ unit cpupara;
                     LOC_REFERENCE:
                       begin
                         paraloc^.size:=OS_ADDR;
-                        if push_addr_param(hp.varspez,paradef,p.proccalloption) or
-                          is_open_array(paradef) or
-                          is_array_of_const(paradef) then
-                          assignintreg
+                        if push_addr_param(hp.varspez,paradef,p.proccalloption) then
+                          begin
+                            paraloc^.def:=getpointerdef(paradef);
+                            assignintreg
+                          end
                         else
                           begin
+                             paraloc^.def:=hp.vardef;
                              paraloc^.loc:=LOC_REFERENCE;
                              paraloc^.reference.index:=NR_STACK_POINTER_REG;
                              paraloc^.reference.offset:=stack_offset;
@@ -380,6 +404,7 @@ unit cpupara;
                        end;
                    end;
                  dec(paralen,tcgsize2size[paraloc^.size]);
+                 firstparaloc:=false;
                end;
           end;
         curintreg:=nextintreg;
@@ -426,10 +451,12 @@ unit cpupara;
                       paraloc^.loc:=LOC_REGISTER;
                       paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG;
                       paraloc^.size:=OS_32;
+                      paraloc^.def:=u32inttype;
                       paraloc:=result.add_location;
                       paraloc^.loc:=LOC_REGISTER;
                       paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG;
                       paraloc^.size:=OS_32;
+                      paraloc^.def:=u32inttype;
                     end;
                   OS_32,
                   OS_F32:
@@ -437,6 +464,7 @@ unit cpupara;
                       paraloc^.loc:=LOC_REGISTER;
                       paraloc^.register:=NR_FUNCTION_RETURN_REG;
                       paraloc^.size:=OS_32;
+                      paraloc^.def:=u32inttype;
                     end;
                   else
                     internalerror(2005082603);
@@ -447,6 +475,7 @@ unit cpupara;
                 paraloc^.loc:=LOC_FPUREGISTER;
                 paraloc^.register:=NR_FPU_RESULT_REG;
                 paraloc^.size:=retcgsize;
+                paraloc^.def:=result.def;
               end;
           end
           { Return in register }
@@ -457,16 +486,19 @@ unit cpupara;
                 paraloc^.loc:=LOC_REGISTER;
                 paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG;
                 paraloc^.size:=OS_32;
+                paraloc^.def:=u32inttype;
                 paraloc:=result.add_location;
                 paraloc^.loc:=LOC_REGISTER;
                 paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG;
                 paraloc^.size:=OS_32;
+                paraloc^.def:=u32inttype;
               end
             else
               begin
                 paraloc^.loc:=LOC_REGISTER;
                 paraloc^.register:=NR_FUNCTION_RETURN_REG;
                 paraloc^.size:=OS_32;
+                paraloc^.def:=u32inttype;
               end;
           end;
       end;

+ 9 - 1
compiler/cfileutl.pas

@@ -142,7 +142,7 @@ interface
 
 {$IF DEFINED(MORPHOS) OR DEFINED(AMIGA)}
 { * PATHCONV is implemented in the Amiga/MorphOS system unit * }
-{$WARNING TODO Amiga: implement PathConv() in System unit, which works with AnsiString}
+{$NOTE TODO Amiga: implement PathConv() in System unit, which works with AnsiString}
 function Unix2AmigaPath(path: ShortString): ShortString; external name 'PATHCONV';
 {$ELSE}
 function Unix2AmigaPath(path: String): String;{$IFDEF USEINLINE}inline;{$ENDIF}
@@ -1203,6 +1203,14 @@ end;
        StartPos, EndPos, L: LongInt;
      begin
        Result:=False;
+
+       if (path_absolute(f)) then
+         begin
+           Result:=FileExistsNonCase('',f, allowcache, foundfile);
+           if Result then
+             Exit;
+         end;
+
        StartPos := 1;
        L := Length(Path);
        repeat

+ 6 - 0
compiler/cgbase.pas

@@ -97,6 +97,12 @@ interface
          ,addr_lo8
          ,addr_hi8
          {$ENDIF}
+         {$IFDEF i8086}
+         ,addr_dgroup      // the data segment group
+         ,addr_far         // used for emitting 'call/jmp far label' instructions
+         ,addr_far_ref     // used for emitting 'call far [reference]' instructions
+         ,addr_seg         // used for getting the segment of an object, e.g. 'mov ax, SEG symbol'
+         {$ENDIF}
          );
 
 

+ 33 - 1
compiler/cgobj.pas

@@ -52,8 +52,10 @@ unit cgobj;
           by Free Pascal. For 32-bit processors, the base class
           should be @link(tcg64f32) and not @var(tcg).
        }
+
+       { tcg }
+
        tcg = class
-       public
           { how many times is this current code executed }
           executionweight : longint;
           alignment : talignment;
@@ -271,6 +273,9 @@ unit cgobj;
           procedure a_opmm_ref_reg(list: TAsmList; Op: TOpCG; size : tcgsize;const ref: treference; reg: tregister;shuffle : pmmshuffle); virtual;
           procedure a_opmm_loc_reg(list: TAsmList; Op: TOpCG; size : tcgsize;const loc: tlocation; reg: tregister;shuffle : pmmshuffle); virtual;
           procedure a_opmm_reg_ref(list: TAsmList; Op: TOpCG; size : tcgsize;reg: tregister;const ref: treference; shuffle : pmmshuffle); virtual;
+          procedure a_opmm_loc_reg_reg(list: TAsmList;Op : TOpCG;size : tcgsize;const loc : tlocation;src,dst : tregister;shuffle : pmmshuffle); virtual;
+          procedure a_opmm_reg_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;src1,src2,dst: tregister;shuffle : pmmshuffle); virtual;
+          procedure a_opmm_ref_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;const ref: treference; src,dst: tregister;shuffle : pmmshuffle); virtual;
 
           procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tcgsize; intreg, mmreg: tregister; shuffle: pmmshuffle); virtual;
           procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tcgsize; mmreg, intreg: tregister; shuffle : pmmshuffle); virtual;
@@ -2061,6 +2066,33 @@ implementation
       end;
 
 
+    procedure tcg.a_opmm_loc_reg_reg(list: TAsmList; Op: TOpCG; size : tcgsize;const loc: tlocation; src,dst: tregister;shuffle : pmmshuffle);
+      begin
+        case loc.loc of
+          LOC_CMMREGISTER,LOC_MMREGISTER:
+            a_opmm_reg_reg_reg(list,op,size,loc.register,src,dst,shuffle);
+          LOC_CREFERENCE,LOC_REFERENCE:
+            a_opmm_ref_reg_reg(list,op,size,loc.reference,src,dst,shuffle);
+          else
+            internalerror(200312232);
+        end;
+      end;
+
+
+    procedure tcg.a_opmm_reg_reg_reg(list : TAsmList;Op : TOpCG;size : tcgsize;
+      src1,src2,dst : tregister;shuffle : pmmshuffle);
+      begin
+        internalerror(2013061102);
+      end;
+
+
+    procedure tcg.a_opmm_ref_reg_reg(list : TAsmList;Op : TOpCG;size : tcgsize;
+      const ref : treference;src,dst : tregister;shuffle : pmmshuffle);
+      begin
+        internalerror(2013061101);
+      end;
+
+
     procedure tcg.g_concatcopy_unaligned(list : TAsmList;const source,dest : treference;len : tcgint);
       begin
         g_concatcopy(list,source,dest,len);

+ 8 - 0
compiler/constexp.pas

@@ -189,6 +189,10 @@ try_qword:
   result.overflow:=true;
 end;
 
+{ workaround for 2.6.x bug }
+{$ifdef VER2_6}
+    {$push} {$Q-}
+{$endif VER2_6}
 function sub_from(const a:Tconstexprint;b:qword):Tconstexprint;
 
 const abs_low_int64=qword(9223372036854775808);   {abs(low(int64)) -> overflow error}
@@ -231,6 +235,10 @@ try_qword:
 ov:
   result.overflow:=true;
 end;
+{ workaround for 2.6.x bug }
+{$ifdef VER2_6}
+    {$pop}
+{$endif VER2_6}
 
 operator + (const a,b:Tconstexprint):Tconstexprint;
 

+ 3 - 3
compiler/cresstr.pas

@@ -139,10 +139,10 @@ uses
         R : TResourceStringItem;
       begin
         { Put resourcestrings in a new objectfile. Putting it in multiple files
-	  makes the linking too dependent on the linker script requiring a SORT(*) for
-	  the data sections }
+          makes the linking too dependent on the linker script requiring a SORT(*) for
+          the data sections }
         maybe_new_object_file(current_asmdata.asmlists[al_const]);
-        new_section(current_asmdata.asmlists[al_const],sec_data,make_mangledname('RESSTRTABLE',current_module.localsymtable,''),sizeof(pint));
+        new_section(current_asmdata.asmlists[al_const],sec_rodata_norel,make_mangledname('RESSTRTABLE',current_module.localsymtable,''),sizeof(pint));
 
         maybe_new_object_file(current_asmdata.asmlists[al_resourcestrings]);
         new_section(current_asmdata.asmlists[al_resourcestrings],sec_data,make_mangledname('RESSTR',current_module.localsymtable,'1_START'),sizeof(pint));

+ 39 - 19
compiler/defcmp.pas

@@ -245,8 +245,13 @@ implementation
 
              { if only one def is a undefined def then they are not considered as
                equal}
-             if (def_from.typ=undefineddef) or
-                (def_to.typ=undefineddef) then
+             if (
+                   (def_from.typ=undefineddef) or
+                   assigned(tstoreddef(def_from).genconstraintdata)
+                 ) or (
+                   (def_to.typ=undefineddef) or
+                   assigned(tstoreddef(def_to).genconstraintdata)
+                 ) then
               begin
                 doconv:=tc_not_possible;
                 compare_defs_ext:=te_incompatible;
@@ -255,9 +260,15 @@ implementation
            end
          else
            begin
-             { undefined defs are considered equal }
-             if (def_from.typ=undefineddef) or
-                (def_to.typ=undefineddef) then
+             { undefined defs or defs with generic constraints are
+               considered equal to everything }
+             if (
+                   (def_from.typ=undefineddef) or
+                   assigned(tstoreddef(def_from).genconstraintdata)
+                 ) or (
+                   (def_to.typ=undefineddef) or
+                   assigned(tstoreddef(def_to).genconstraintdata)
+                 ) then
               begin
                 doconv:=tc_equal;
                 compare_defs_ext:=te_exact;
@@ -271,21 +282,27 @@ implementation
              (df_specialization in def_to.defoptions) and
              (tstoreddef(def_from).genericdef=tstoreddef(def_to).genericdef) then
            begin
-             if tstoreddef(def_from).genericparas.count<>tstoreddef(def_to).genericparas.count then
-               internalerror(2012091301);
+             if assigned(tstoreddef(def_from).genericparas) xor
+                 assigned(tstoreddef(def_to).genericparas) then
+               internalerror(2013030901);
              diff:=false;
-             for i:=0 to tstoreddef(def_from).genericparas.count-1 do
+             if assigned(tstoreddef(def_from).genericparas) then
                begin
-                 if tstoreddef(def_from).genericparas.nameofindex(i)<>tstoreddef(def_to).genericparas.nameofindex(i) then
-                   internalerror(2012091302);
-                 symfrom:=ttypesym(tstoreddef(def_from).genericparas[i]);
-                 symto:=ttypesym(tstoreddef(def_to).genericparas[i]);
-                 if not (symfrom.typ=typesym) or not (symto.typ=typesym) then
-                   internalerror(2012121401);
-                 if not equal_defs(ttypesym(symfrom).typedef,ttypesym(symto).typedef) then
-                   diff:=true;
-                 if diff then
-                   break;
+                 if tstoreddef(def_from).genericparas.count<>tstoreddef(def_to).genericparas.count then
+                   internalerror(2012091301);
+                 for i:=0 to tstoreddef(def_from).genericparas.count-1 do
+                   begin
+                     if tstoreddef(def_from).genericparas.nameofindex(i)<>tstoreddef(def_to).genericparas.nameofindex(i) then
+                       internalerror(2012091302);
+                     symfrom:=ttypesym(tstoreddef(def_from).genericparas[i]);
+                     symto:=ttypesym(tstoreddef(def_to).genericparas[i]);
+                     if not (symfrom.typ=typesym) or not (symto.typ=typesym) then
+                       internalerror(2012121401);
+                     if not equal_defs(ttypesym(symfrom).typedef,ttypesym(symto).typedef) then
+                       diff:=true;
+                     if diff then
+                       break;
+                   end;
                end;
              if not diff then
                begin
@@ -1265,7 +1282,10 @@ implementation
                      { check for far pointers }
                      if (tpointerdef(def_from).x86pointertyp<>tpointerdef(def_to).x86pointertyp) then
                        begin
-                         eq:=te_incompatible;
+                         if fromtreetype=niln then
+                           eq:=te_equal
+                         else
+                           eq:=te_incompatible;
                        end
                      else
 {$endif x86}

+ 117 - 14
compiler/defutil.pas

@@ -110,6 +110,9 @@ interface
     {# Returns whether def is reference counted }
     function is_managed_type(def: tdef) : boolean;{$ifdef USEINLINE}inline;{$endif}
 
+    { # Returns whether def is needs to load RTTI for reference counting }
+    function is_rtti_managed_type(def: tdef) : boolean;
+
 {    function is_in_limit_value(val_from:TConstExprInt;def_from,def_to : tdef) : boolean;}
 
 {*****************************************************************************
@@ -293,6 +296,13 @@ interface
        to note that the value returned can be @var(OS_NO) }
     function def_cgsize(def: tdef): tcgsize;
 
+    { #Return an orddef (integer) correspondig to a tcgsize }
+    function cgsize_orddef(size: tcgsize): torddef;
+
+    {# Same as def_cgsize, except that it will interpret certain arrays as
+       vectors and return OS_M* sizes for them }
+    function def_cgmmsize(def: tdef): tcgsize;
+
     {# returns true, if the type passed is can be used with windows automation }
     function is_automatable(p : tdef) : boolean;
 
@@ -318,6 +328,14 @@ interface
     { returns true of def is a methodpointer }
     function is_methodpointer(def : tdef) : boolean;
 
+{$ifdef i8086}
+    {# Returns true if p is a far pointer def }
+    function is_farpointer(p : tdef) : boolean;
+
+    {# Returns true if p is a huge pointer def }
+    function is_hugepointer(p : tdef) : boolean;
+{$endif i8086}
+
 implementation
 
     uses
@@ -616,6 +634,19 @@ implementation
       end;
 
 
+    function is_rtti_managed_type(def: tdef): boolean;
+      begin
+        result:=def.needs_inittable and not (
+          is_interfacecom_or_dispinterface(def) or
+          (def.typ=variantdef) or
+          (
+            (def.typ=stringdef) and
+            (tstringdef(def).stringtype in [st_ansistring,st_widestring,st_unicodestring])
+          )
+        );
+      end;
+
+
     { true, if p points to an open array def }
     function is_open_string(p : tdef) : boolean;
       begin
@@ -1170,23 +1201,27 @@ implementation
                 result:=tcgsize(ord(result)+(ord(OS_S8)-ord(OS_8)));
             end;
           classrefdef,
-          pointerdef:
-            result := OS_ADDR;
-          procvardef:
+          pointerdef,
+          formaldef:
             begin
-              if not tprocvardef(def).is_addressonly then
-                {$if sizeof(pint) = 2}
-                  result:=OS_32
-                {$elseif sizeof(pint) = 4}
-                  result:=OS_64
-                {$elseif sizeof(pint) = 8}
-                  result:=OS_128
-                {$else}
-                  internalerror(200707141)
-                {$endif}
+{$ifdef x86}
+              if (def.typ=pointerdef) and
+                 (tpointerdef(def).x86pointertyp in [x86pt_far,x86pt_huge]) then
+                begin
+                  {$if defined(i8086)}
+                    result := OS_32;
+                  {$elseif defined(i386)}
+                    internalerror(2013052201);  { there's no OS_48 }
+                  {$elseif defined(x86_64)}
+                    internalerror(2013052202);  { there's no OS_80 }
+                  {$endif}
+                end
               else
-                result:=OS_ADDR;
+{$endif x86}
+                result := OS_ADDR;
             end;
+          procvardef:
+            result:=int_cgsize(def.size);
           stringdef :
             begin
               if is_ansistring(def) or is_wide_or_unicode_string(def) then
@@ -1228,6 +1263,60 @@ implementation
         end;
       end;
 
+    function cgsize_orddef(size: tcgsize): torddef;
+      begin
+        case size of
+          OS_8:
+            result:=torddef(u8inttype);
+          OS_S8:
+            result:=torddef(s8inttype);
+          OS_16:
+            result:=torddef(u16inttype);
+          OS_S16:
+            result:=torddef(s16inttype);
+          OS_32:
+            result:=torddef(u32inttype);
+          OS_S32:
+            result:=torddef(s32inttype);
+          OS_64:
+            result:=torddef(u64inttype);
+          OS_S64:
+            result:=torddef(s64inttype);
+          else
+            internalerror(2012050401);
+        end;
+      end;
+
+    function def_cgmmsize(def: tdef): tcgsize;
+      begin
+        case def.typ of
+          arraydef:
+            begin
+              if tarraydef(def).elementdef.typ in [orddef,floatdef] then
+                begin
+                  { this is not correct, OS_MX normally mean that the vector
+                    contains elements of size X. However, vectors themselves
+                    can also have different sizes (e.g. a vector of 2 singles on
+                    SSE) and the total size is currently more important }
+                  case def.size of
+                    1: result:=OS_M8;
+                    2: result:=OS_M16;
+                    4: result:=OS_M32;
+                    8: result:=OS_M64;
+                    16: result:=OS_M128;
+                    32: result:=OS_M256;
+                    else
+                      internalerror(2013060103);
+                  end;
+                end
+              else
+                result:=def_cgsize(def);
+            end
+          else
+            result:=def_cgsize(def);
+        end;
+      end;
+
     { In Windows 95 era, ordinals were restricted to [u8bit,s32bit,s16bit,bool16bit]
       As of today, both signed and unsigned types from 8 to 64 bits are supported. }
     function is_automatable(p : tdef) : boolean;
@@ -1348,4 +1437,18 @@ implementation
         result:=(def.typ=procvardef) and (po_methodpointer in tprocvardef(def).procoptions);
       end;
 
+{$ifdef i8086}
+    { true if p is a far pointer def }
+    function is_farpointer(p : tdef) : boolean;
+      begin
+        result:=(p.typ=pointerdef) and (tpointerdef(p).x86pointertyp=x86pt_far);
+      end;
+
+    { true if p is a huge pointer def }
+    function is_hugepointer(p : tdef) : boolean;
+      begin
+        result:=(p.typ=pointerdef) and (tpointerdef(p).x86pointertyp=x86pt_huge);
+      end;
+{$endif i8086}
+
 end.

+ 5 - 0
compiler/fpcdefs.inc

@@ -58,6 +58,7 @@
   { TODO: add another define in order to disable the div helper for 16-bit divs? }
   {$define cpuneedsdiv32helper}
   {$define VOLATILE_ES}
+  {$define SUPPORT_GET_FRAME}
 {$endif i8086}
 
 {$ifdef i386}
@@ -72,6 +73,7 @@
   {$define fewintregisters}
   {$define cpurox}
   {$define SUPPORT_SAFECALL}
+  {$define SUPPORT_GET_FRAME}
 {$endif i386}
 
 {$ifdef x86_64}
@@ -86,6 +88,7 @@
   {$define cpurox}
   {$define cpurefshaveindexreg}
   {$define SUPPORT_SAFECALL}
+  {$define SUPPORT_GET_FRAME}
 {$endif x86_64}
 
 {$ifdef ia64}
@@ -146,6 +149,7 @@
   {$define cputargethasfixedstack}
   {$define cpurefshaveindexreg}
   {$define SUPPORT_SAFECALL}
+  {$define SUPPORT_GET_FRAME}
   { default to armel }
   {$if not(defined(CPUARM)) and not(defined(CPUARMEB)) and not(defined(FPC_OARM)) and not(defined(FPC_ARMEB)) and not(defined(FPC_ARMHF))}
     {$define FPC_ARMEL}
@@ -221,6 +225,7 @@
   {$define cpu32bitaddr}
   {$define cpuhighleveltarget}
   {$define symansistr}
+  {$define SUPPORT_GET_FRAME}
 {$endif}
 
 {$ifdef aarch64}

+ 10 - 2
compiler/globals.pas

@@ -154,6 +154,8 @@ interface
 
          disabledircache : boolean;
 
+         x86memorymodel  : tx86memorymodel;
+
         { CPU targets with microcontroller support can add a controller specific unit }
 {$if defined(ARM) or defined(AVR)}
         controllertype   : tcontrollertype;
@@ -446,8 +448,8 @@ interface
         fputype : fpu_none;
   {$endif avr}
   {$ifdef mips}
-        cputype : cpu_mips32;
-        optimizecputype : cpu_mips32;
+        cputype : cpu_mips2;
+        optimizecputype : cpu_mips2;
         fputype : fpu_mips2;
   {$endif mips}
   {$ifdef jvm}
@@ -477,6 +479,7 @@ interface
         minfpconstprec : s32real;
 
         disabledircache : false;
+        x86memorymodel : mm_small;
 {$if defined(ARM) or defined(AVR)}
         controllertype : ct_none;
 {$endif defined(ARM) or defined(AVR)}
@@ -902,6 +905,11 @@ implementation
         {$undef GETENVOK}
       {$else}
         GetEnvPchar:=StrPNew(GetEnvironmentVariable(envname));
+        if (length(GetEnvPChar)=0) then 
+          begin
+            FreeEnvPChar(GetEnvPChar);
+            GetEnvPChar:=nil;
+          end;
       {$endif}
       end;
 

+ 6 - 0
compiler/globtype.pas

@@ -103,7 +103,11 @@ interface
          pointer(-1) will result in a pointer with the value
          $fffffffffffffff on a 32bit machine if the compiler uses
          int64 constants internally (JM) }
+{$ifdef i8086}
+       TConstPtrUInt = LongWord;  { 32-bit for far pointers support }
+{$else i8086}
        TConstPtrUInt = AWord;
+{$endif i8086}
 
        { Use a variant record to be sure that the array if aligned correctly }
        tdoublerec=record
@@ -665,6 +669,8 @@ interface
         state : tmsgstate;
       end;
 
+    type
+      tx86memorymodel = (mm_tiny,mm_small,mm_medium,mm_compact,mm_large,mm_huge);
 
   { hide Sysutils.ExecuteProcess in units using this one after SysUtils}
   const

+ 157 - 62
compiler/hlcg2ll.pas

@@ -67,8 +67,8 @@ unit hlcg2ll;
           {# Gets a register suitable to do integer operations on.}
           function getaddressregister(list:TAsmList;size:tdef):Tregister;override;
           function getfpuregister(list:TAsmList;size:tdef):Tregister;override;
-//        we don't have high level defs yet that translate into all mm cgsizes
-//          function getmmregister(list:TAsmList;size:tdef):Tregister;override;
+          { warning: only works correctly for fpu types currently }
+          function getmmregister(list:TAsmList;size:tdef):Tregister;override;
           function getflagregister(list:TAsmList;size:tdef):Tregister;override;
           {Does the generic cg need SIMD registers, like getmmxregister? Or should
            the cpu specific child cg object have such a method?}
@@ -188,14 +188,10 @@ unit hlcg2ll;
           procedure a_loadfpu_ref_cgpara(list : TAsmList;fromsize : tdef;const ref : treference;const cgpara : TCGPara);override;
 
           { vector register move instructions }
-//        we don't have high level defs yet that translate into all mm cgsizes
-{
           procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tdef;reg1, reg2: tregister;shuffle : pmmshuffle); override;
           procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tdef;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
           procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tdef;reg: tregister; const ref: treference;shuffle : pmmshuffle); override;
-}
-          procedure a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tcgsize; const loc: tlocation; const reg: tregister;shuffle : pmmshuffle);override;
-{
+          procedure a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tdef; const loc: tlocation; const reg: tregister;shuffle : pmmshuffle);override;
           procedure a_loadmm_reg_loc(list: TAsmList; fromsize, tosize: tdef; const reg: tregister; const loc: tlocation;shuffle : pmmshuffle);override;
           procedure a_loadmm_reg_cgpara(list: TAsmList; fromsize: tdef; reg: tregister;const cgpara : TCGPara;shuffle : pmmshuffle); override;
           procedure a_loadmm_ref_cgpara(list: TAsmList; fromsize: tdef; const ref: treference;const cgpara : TCGPara;shuffle : pmmshuffle); override;
@@ -204,10 +200,8 @@ unit hlcg2ll;
           procedure a_opmm_ref_reg(list: TAsmList; Op: TOpCG; size : tdef;const ref: treference; reg: tregister;shuffle : pmmshuffle); override;
           procedure a_opmm_loc_reg(list: TAsmList; Op: TOpCG; size : tdef;const loc: tlocation; reg: tregister;shuffle : pmmshuffle); override;
           procedure a_opmm_reg_ref(list: TAsmList; Op: TOpCG; size : tdef;reg: tregister;const ref: treference; shuffle : pmmshuffle); override;
-}
-//        we don't have high level defs yet that translate into all mm cgsizes
-//          procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tdef; intreg, mmreg: tregister; shuffle: pmmshuffle); override;
-//          procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tdef; mmreg, intreg: tregister; shuffle : pmmshuffle); override;
+          procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tdef; intreg, mmreg: tregister; shuffle: pmmshuffle); override;
+          procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tdef; mmreg, intreg: tregister; shuffle : pmmshuffle); override;
 
           { basic arithmetic operations }
           { note: for operators which require only one argument (not, neg), use }
@@ -322,7 +316,7 @@ unit hlcg2ll;
           procedure location_force_reg(list:TAsmList;var l:tlocation;src_size,dst_size:tdef;maybeconst:boolean);override;
           procedure location_force_fpureg(list:TAsmList;var l: tlocation;size: tdef;maybeconst:boolean);override;
           procedure location_force_mem(list:TAsmList;var l:tlocation;size:tdef);override;
-//          procedure location_force_mmregscalar(list:TAsmList;var l: tlocation;size:tdef;maybeconst:boolean);override;
+          procedure location_force_mmregscalar(list:TAsmList;var l: tlocation;size:tdef;maybeconst:boolean);override;
 //          procedure location_force_mmreg(list:TAsmList;var l: tlocation;size:tdef;maybeconst:boolean);override;
 
           procedure maketojumpbool(list:TAsmList; p : tnode);override;
@@ -337,7 +331,10 @@ unit hlcg2ll;
           procedure gen_load_cgpara_loc(list: TAsmList; vardef: tdef; const para: TCGPara; var destloc: tlocation; reusepara: boolean); override;
 
          protected
-          procedure initialize_regvars(p: TObject; arg: pointer); override;
+          { returns the equivalent MM size for a vector register that contains
+            a record, because in that case "size" will contain a cgsize
+            representing an integer size}
+          function getintmmcgsize(reg: tregister; size: tcgsize): tcgsize; virtual;
        end;
 
 
@@ -385,6 +382,12 @@ implementation
     begin
       result:=cg.getfpuregister(list,def_cgsize(size));
     end;
+
+  function thlcg2ll.getmmregister(list: TAsmList; size: tdef): Tregister;
+    begin
+      result:=cg.getmmregister(list,def_cgsize(size));
+    end;
+
 (*
   function thlcg2ll.getmmregister(list: TAsmList; size: tdef): Tregister;
     begin
@@ -659,93 +662,136 @@ implementation
       cg.a_loadfpu_ref_cgpara(list,def_cgsize(fromsize),ref,cgpara);
     end;
 
-  procedure thlcg2ll.a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tcgsize; const loc: tlocation; const reg: tregister; shuffle: pmmshuffle);
+  procedure thlcg2ll.a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tdef; const loc: tlocation; const reg: tregister;shuffle : pmmshuffle);
     var
       tmpreg: tregister;
+      tocgsize: tcgsize;
     begin
+      if def_cgmmsize(fromsize)<>loc.size then
+        internalerror(2012071226);
+      tocgsize:=getintmmcgsize(reg,def_cgmmsize(tosize));
       case loc.loc of
         LOC_SUBSETREG,LOC_CSUBSETREG,
         LOC_SUBSETREF,LOC_CSUBSETREF:
           begin
             tmpreg:=cg.getintregister(list,loc.size);
-            a_load_loc_reg(list,tcgsize2orddef(fromsize),tcgsize2orddef(fromsize),loc,tmpreg);
-            cg.a_loadmm_intreg_reg(list,loc.size,tosize,tmpreg,reg,shuffle);
+            a_load_loc_reg(list,fromsize,fromsize,loc,tmpreg);
+            { integer register -> no def_cgmmsize but plain }
+            cg.a_loadmm_intreg_reg(list,def_cgsize(fromsize),tocgsize,tmpreg,reg,shuffle);
           end
         else
-          cg.a_loadmm_loc_reg(list,tosize,loc,reg,shuffle);
+          cg.a_loadmm_loc_reg(list,tocgsize,loc,reg,shuffle);
       end;
     end;
 
-(*
   procedure thlcg2ll.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister; shuffle: pmmshuffle);
+    var
+      fromcgsize: tcgsize;
+      tocgsize: tcgsize;
     begin
-      cg.a_loadmm_reg_reg(list,def_cgsize(fromsize),def_cgsize(tosize),reg1,reg2,shuffle);
+      fromcgsize:=getintmmcgsize(reg1,def_cgmmsize(fromsize));
+      tocgsize:=getintmmcgsize(reg2,def_cgmmsize(tosize));
+      { records may be stored in mmregisters, but def_cgsize will return an
+        integer size for them... }
+      cg.a_loadmm_reg_reg(list,fromcgsize,tocgsize,reg1,reg2,shuffle);
     end;
 
   procedure thlcg2ll.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tdef; const ref: treference; reg: tregister; shuffle: pmmshuffle);
+    var
+      tocgsize: tcgsize;
     begin
-      cg.a_loadmm_ref_reg(list,def_cgsize(fromsize),def_cgsize(tosize),ref,reg,shuffle);
+      { records may be stored in mmregisters, but def_cgsize will return an
+        integer size for them... }
+      tocgsize:=getintmmcgsize(reg,def_cgmmsize(tosize));
+      cg.a_loadmm_ref_reg(list,def_cgmmsize(fromsize),tocgsize,ref,reg,shuffle);
     end;
 
   procedure thlcg2ll.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tdef; reg: tregister; const ref: treference; shuffle: pmmshuffle);
+    var
+      fromcgsize: tcgsize;
     begin
-      cg.a_loadmm_reg_ref(list,def_cgsize(fromsize),def_cgsize(tosize),reg,ref,shuffle);
-    end;
-
-  procedure thlcg2ll.a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tdef; const loc: tlocation; const reg: tregister; shuffle: pmmshuffle);
-    begin
-{$ifdef extdebug}
-      if def_cgsize(fromsize)<>loc.size then
-        internalerror(2010112103);
-{$endif}
-      cg.a_loadmm_loc_reg(list,def_cgsize(tosize),loc,reg,shuffle);
+      { records may be stored in mmregisters, but def_cgsize will return an
+        integer size for them... }
+      fromcgsize:=getintmmcgsize(reg,def_cgmmsize(fromsize));
+      cg.a_loadmm_reg_ref(list,fromcgsize,def_cgmmsize(tosize),reg,ref,shuffle);
     end;
 
   procedure thlcg2ll.a_loadmm_reg_loc(list: TAsmList; fromsize, tosize: tdef; const reg: tregister; const loc: tlocation; shuffle: pmmshuffle);
+    var
+      fromcgsize: tcgsize;
     begin
-{$ifdef extdebug}
-      if def_cgsize(tosize)<>loc.size then
-        internalerror(2010112104);
-{$endif}
-      cg.a_loadmm_reg_loc(list,def_cgsize(fromsize),reg,loc,shuffle);
+      { sanity check }
+      if def_cgmmsize(tosize)<>loc.size then
+        internalerror(2012071216);
+      { records may be stored in mmregisters, but def_cgsize will return an
+        integer size for them... }
+      fromcgsize:=getintmmcgsize(reg,def_cgmmsize(fromsize));
+      cg.a_loadmm_reg_loc(list,fromcgsize,reg,loc,shuffle);
     end;
 
   procedure thlcg2ll.a_loadmm_reg_cgpara(list: TAsmList; fromsize: tdef; reg: tregister; const cgpara: TCGPara; shuffle: pmmshuffle);
+    var
+      fromcgsize: tcgsize;
     begin
-      cg.a_loadmm_reg_cgpara(list,def_cgsize(fromsize),reg,cgpara,shuffle);
+      { records may be stored in mmregisters, but def_cgsize will return an
+        integer size for them... }
+      fromcgsize:=getintmmcgsize(reg,def_cgmmsize(fromsize));
+      cg.a_loadmm_reg_cgpara(list,fromcgsize,reg,cgpara,shuffle);
     end;
 
   procedure thlcg2ll.a_loadmm_ref_cgpara(list: TAsmList; fromsize: tdef; const ref: treference; const cgpara: TCGPara; shuffle: pmmshuffle);
     begin
-      cg.a_loadmm_ref_cgpara(list,def_cgsize(fromsize),ref,cgpara,shuffle);
+      cg.a_loadmm_ref_cgpara(list,def_cgmmsize(fromsize),ref,cgpara,shuffle);
     end;
 
   procedure thlcg2ll.a_loadmm_loc_cgpara(list: TAsmList; fromsize: tdef; const loc: tlocation; const cgpara: TCGPara; shuffle: pmmshuffle);
     begin
-{$ifdef extdebug}
-      if def_cgsize(fromsize)<>loc.size then
-        internalerror(2010112105);
-{$endif}
+      { sanity check }
+      if def_cgmmsize(fromsize)<>loc.size then
+        internalerror(2012071220);
       cg.a_loadmm_loc_cgpara(list,loc,cgpara,shuffle);
     end;
 
+  procedure thlcg2ll.a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size: tdef; src, dst: tregister; shuffle: pmmshuffle);
+    begin
+      cg.a_opmm_reg_reg(list,op,def_cgmmsize(size),src,dst,shuffle);
+    end;
+
+  procedure thlcg2ll.a_opmm_ref_reg(list: TAsmList; Op: TOpCG; size: tdef; const ref: treference; reg: tregister; shuffle: pmmshuffle);
+    begin
+      cg.a_opmm_ref_reg(list,op,def_cgmmsize(size),ref,reg,shuffle);
+    end;
+
   procedure thlcg2ll.a_opmm_loc_reg(list: TAsmList; Op: TOpCG; size: tdef; const loc: tlocation; reg: tregister; shuffle: pmmshuffle);
     begin
-      cg.a_opmm_loc_reg(list,op,def_cgsize(size),loc,reg,shuffle);
+      cg.a_opmm_loc_reg(list,op,def_cgmmsize(size),loc,reg,shuffle);
+    end;
+
+  procedure thlcg2ll.a_opmm_reg_ref(list: TAsmList; Op: TOpCG; size: tdef; reg: tregister; const ref: treference; shuffle: pmmshuffle);
+    begin
+      cg.a_opmm_reg_ref(list,op,def_cgmmsize(size),reg,ref,shuffle);
     end;
-*)
 
-(*
   procedure thlcg2ll.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tdef; intreg, mmreg: tregister; shuffle: pmmshuffle);
+    var
+      tocgsize: tcgsize;
     begin
-      cg.a_loadmm_intreg_reg(list,def_cgsize(fromsize),def_cgsize(tosize),intreg,mmreg,shuffle);
+      { records may be stored in mmregisters, but def_cgmmsize will return an
+        integer size for them... }
+      tocgsize:=getintmmcgsize(mmreg,def_cgmmsize(tosize));
+      cg.a_loadmm_intreg_reg(list,def_cgsize(fromsize),tocgsize,intreg,mmreg,shuffle);
     end;
 
   procedure thlcg2ll.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tdef; mmreg, intreg: tregister; shuffle: pmmshuffle);
+    var
+      fromcgsize: tcgsize;
     begin
-      cg.a_loadmm_reg_intreg(list,def_cgsize(fromsize),def_cgsize(tosize),mmreg,intreg,shuffle);
+      { records may be stored in mmregisters, but def_cgsize will return an
+        integer size for them... }
+      fromcgsize:=getintmmcgsize(mmreg,def_cgmmsize(fromsize));
+      cg.a_loadmm_reg_intreg(list,fromcgsize,def_cgsize(tosize),mmreg,intreg,shuffle);
     end;
-*)
+
   procedure thlcg2ll.a_op_const_reg(list: TAsmList; Op: TOpCG; size: tdef; a: tcgint; reg: TRegister);
     begin
       cg.a_op_const_reg(list,op,def_cgsize(size),a,reg);
@@ -1222,12 +1268,62 @@ implementation
           inherited;
       end;
     end;
-(*
+
   procedure thlcg2ll.location_force_mmregscalar(list: TAsmList; var l: tlocation; size: tdef; maybeconst: boolean);
+    var
+      reg : tregister;
+      href : treference;
+      newsize : tdef;
     begin
-       ncgutil.location_force_mmregscalar(list,l,maybeconst);
+      if (l.loc<>LOC_MMREGISTER)  and
+         ((l.loc<>LOC_CMMREGISTER) or (not maybeconst)) then
+        begin
+          { if it's in an fpu register, store to memory first }
+          if (l.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
+            begin
+              tg.GetTemp(list,tcgsize2size[l.size],tcgsize2size[l.size],tt_normal,href);
+              cg.a_loadfpu_reg_ref(list,l.size,l.size,l.register,href);
+              location_reset_ref(l,LOC_REFERENCE,l.size,0);
+              l.reference:=href;
+            end;
+{$ifndef cpu64bitalu}
+          if (l.loc in [LOC_REGISTER,LOC_CREGISTER]) and
+             (l.size in [OS_64,OS_S64]) then
+            begin
+              reg:=cg.getmmregister(list,OS_F64);
+              cg64.a_loadmm_intreg64_reg(list,OS_F64,l.register64,reg);
+              l.size:=OS_F64;
+              size:=s64floattype;
+            end
+          else
+{$endif not cpu64bitalu}
+            begin
+               { on ARM, CFP values may be located in integer registers,
+                 and its second_int_to_real() also uses this routine to
+                 force integer (memory) values in an mmregister }
+               if (l.size in [OS_32,OS_S32]) then
+                 begin
+                   size:=cgsize_orddef(l.size);
+                   newsize:=s32floattype;
+                 end
+               else if (l.size in [OS_64,OS_S64]) then
+                 begin
+                   size:=cgsize_orddef(l.size);
+                   newsize:=s64floattype;
+                 end
+               else
+                 newsize:=size;
+               reg:=getmmregister(list,newsize);
+               a_loadmm_loc_reg(list,size,newsize,l,reg,mms_movescalar);
+               l.size:=def_cgsize(newsize);
+             end;
+          location_freetemp(list,l);
+          location_reset(l,LOC_MMREGISTER,l.size);
+          l.register:=reg;
+        end;
     end;
 
+(*
   procedure thlcg2ll.location_force_mmreg(list: TAsmList; var l: tlocation; size: tdef; maybeconst: boolean);
     begin
       ncgutil.location_force_mmreg(list,l,maybeconst);
@@ -1282,7 +1378,7 @@ implementation
             LOC_CMMREGISTER:
               begin
                 tmploc:=l;
-                location_force_mmregscalar(list,tmploc,false);
+                location_force_mmregscalar(list,tmploc,size,false);
                 cg.a_loadmm_reg_cgpara(list,tmploc.size,tmploc.register,cgpara,mms_movescalar);
               end;
             { Some targets pass floats in normal registers }
@@ -1424,20 +1520,19 @@ implementation
       ncgutil.gen_load_cgpara_loc(list, vardef, para, destloc, reusepara);
     end;
 
-  procedure thlcg2ll.initialize_regvars(p: TObject; arg: pointer);
+  function thlcg2ll.getintmmcgsize(reg: tregister; size: tcgsize): tcgsize;
     begin
-      if (tsym(p).typ=staticvarsym) and
-         { not yet handled via tlhcgobj... }
-         (tstaticvarsym(p).initialloc.loc=LOC_CMMREGISTER) then
+      result:=size;
+      if getregtype(reg)=R_MMREGISTER then
         begin
-          { clear the whole register }
-          cg.a_opmm_reg_reg(TAsmList(arg),OP_XOR,reg_cgsize(tstaticvarsym(p).initialloc.register),
-            tstaticvarsym(p).initialloc.register,
-            tstaticvarsym(p).initialloc.register,
-            nil);
-        end
-      else
-        inherited initialize_regvars(p, arg);
+          case size of
+            OS_32:
+              result:=OS_F32;
+            OS_64:
+              result:=OS_F64;
+          end;
+        end;
     end;
 
+
 end.

+ 356 - 162
compiler/hlcgobj.pas

@@ -69,8 +69,8 @@ unit hlcgobj;
           {# Gets a register suitable to do integer operations on.}
           function getaddressregister(list:TAsmList;size:tdef):Tregister;virtual;
           function getfpuregister(list:TAsmList;size:tdef):Tregister;virtual;
-//        we don't have high level defs yet that translate into all mm cgsizes
-//          function getmmregister(list:TAsmList;size:tdef):Tregister;virtual;
+          { warning: only works correctly for fpu types currently }
+          function getmmregister(list:TAsmList;size:tdef):Tregister;virtual;
           function getflagregister(list:TAsmList;size:tdef):Tregister;virtual;
           function getregisterfordef(list: TAsmList;size:tdef):Tregister;virtual;
           {Does the generic cg need SIMD registers, like getmmxregister? Or should
@@ -96,8 +96,6 @@ unit hlcgobj;
              by the compiler for any purpose other than parameter passing/function
              result loading, this is the register type used }
           function def2regtyp(def: tdef): tregistertype; virtual;
-          { # Returns orddef corresponding to size }
-          class function tcgsize2orddef(size: tcgsize): torddef; static;
 
           {# Emit a label to the instruction stream. }
           procedure a_label(list : TAsmList;l : tasmlabel); inline;
@@ -292,27 +290,26 @@ unit hlcgobj;
           procedure a_loadfpu_ref_cgpara(list : TAsmList;fromsize : tdef;const ref : treference;const cgpara : TCGPara);virtual;
 
           { vector register move instructions }
-//        we don't have high level defs yet that translate into all mm cgsizes
-{
-          procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tdef;reg1, reg2: tregister;shuffle : pmmshuffle); virtual;
-          procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tdef;const ref: treference; reg: tregister;shuffle : pmmshuffle); virtual;
-          procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tdef;reg: tregister; const ref: treference;shuffle : pmmshuffle); virtual;
-}
+          procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tdef;reg1, reg2: tregister;shuffle : pmmshuffle); virtual; abstract;
+          procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tdef;const ref: treference; reg: tregister;shuffle : pmmshuffle); virtual; abstract;
+          procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tdef;reg: tregister; const ref: treference;shuffle : pmmshuffle); virtual; abstract;
+          procedure a_loadmm_ref_ref(list: TAsmList; fromsize, tosize: tdef; const fromref, toref: treference; shuffle: pmmshuffle); virtual;
           { required for subsetreg/ref; still tcgsize rather than tdef because of reason mentioned above }
-          procedure a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tcgsize; const loc: tlocation; const reg: tregister;shuffle : pmmshuffle);virtual; abstract;
-{
+          procedure a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tdef; const loc: tlocation; const reg: tregister; shuffle : pmmshuffle);virtual;
           procedure a_loadmm_reg_loc(list: TAsmList; fromsize, tosize: tdef; const reg: tregister; const loc: tlocation;shuffle : pmmshuffle);virtual;
           procedure a_loadmm_reg_cgpara(list: TAsmList; fromsize: tdef; reg: tregister;const cgpara : TCGPara;shuffle : pmmshuffle); virtual;
           procedure a_loadmm_ref_cgpara(list: TAsmList; fromsize: tdef; const ref: treference;const cgpara : TCGPara;shuffle : pmmshuffle); virtual;
           procedure a_loadmm_loc_cgpara(list: TAsmList; fromsize: tdef; const loc: tlocation; const cgpara : TCGPara;shuffle : pmmshuffle); virtual;
-          procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tdef;src,dst: tregister;shuffle : pmmshuffle); virtual;
+          procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size : tdef;src,dst: tregister;shuffle : pmmshuffle); virtual; abstract;
           procedure a_opmm_ref_reg(list: TAsmList; Op: TOpCG; size : tdef;const ref: treference; reg: tregister;shuffle : pmmshuffle); virtual;
           procedure a_opmm_loc_reg(list: TAsmList; Op: TOpCG; size : tdef;const loc: tlocation; reg: tregister;shuffle : pmmshuffle); virtual;
           procedure a_opmm_reg_ref(list: TAsmList; Op: TOpCG; size : tdef;reg: tregister;const ref: treference; shuffle : pmmshuffle); virtual;
-}
-//        we don't have high level defs yet that translate into all mm cgsizes
-//          procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tdef; intreg, mmreg: tregister; shuffle: pmmshuffle); virtual;
-//          procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tdef; mmreg, intreg: tregister; shuffle : pmmshuffle); virtual;
+          { requires a temp that is interpreted in two different ways, and we
+            don't have a way (yet) to tag a treference with tdef information so
+            targets like LLVM can insert the necessary bitcast
+          }
+          procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize : tdef; intreg, mmreg: tregister; shuffle: pmmshuffle); virtual; abstract;
+          procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize : tdef; mmreg, intreg: tregister; shuffle : pmmshuffle); virtual; abstract;
 
           { basic arithmetic operations }
           { note: for operators which require only one argument (not, neg), use }
@@ -473,7 +470,7 @@ unit hlcgobj;
           procedure location_force_reg(list:TAsmList;var l:tlocation;src_size,dst_size:tdef;maybeconst:boolean);virtual;
           procedure location_force_fpureg(list:TAsmList;var l: tlocation;size: tdef;maybeconst:boolean);virtual;
           procedure location_force_mem(list:TAsmList;var l:tlocation;size:tdef);virtual;
-//          procedure location_force_mmregscalar(list:TAsmList;var l: tlocation;size:tdef;maybeconst:boolean);virtual;abstract;
+          procedure location_force_mmregscalar(list:TAsmList;var l: tlocation;size:tdef;maybeconst:boolean);virtual;
 //          procedure location_force_mmreg(list:TAsmList;var l: tlocation;size:tdef;maybeconst:boolean);virtual;abstract;
 
           { Retrieve the location of the data pointed to in location l, when the location is
@@ -607,6 +604,12 @@ implementation
     begin
       result:=cg.getfpuregister(list,def_cgsize(size));
     end;
+
+  function thlcgobj.getmmregister(list: TAsmList; size: tdef): Tregister;
+    begin
+      result:=cg.getmmregister(list,def_cgsize(size));
+    end;
+
 (*
   function thlcgobj.getmmregister(list: TAsmList; size: tdef): Tregister;
     begin
@@ -627,10 +630,8 @@ implementation
             result:=getaddressregister(list,size);
           R_FPUREGISTER:
             result:=getfpuregister(list,size);
-(*
           R_MMREGISTER:
             result:=getmmregister(list,size);
-*)
           else
             internalerror(2010122901);
         end;
@@ -713,30 +714,6 @@ implementation
         end;
     end;
 
-  class function thlcgobj.tcgsize2orddef(size: tcgsize): torddef;
-    begin
-      case size of
-        OS_8:
-          result:=torddef(u8inttype);
-        OS_S8:
-          result:=torddef(s8inttype);
-        OS_16:
-          result:=torddef(u16inttype);
-        OS_S16:
-          result:=torddef(s16inttype);
-        OS_32:
-          result:=torddef(u32inttype);
-        OS_S32:
-          result:=torddef(s32inttype);
-        OS_64:
-          result:=torddef(u64inttype);
-        OS_S64:
-          result:=torddef(s64inttype);
-        else
-          internalerror(2012050401);
-      end;
-    end;
-
   procedure thlcgobj.a_label(list: TAsmList; l: tasmlabel); inline;
     begin
       cg.a_label(list,l);
@@ -760,26 +737,31 @@ implementation
   procedure thlcgobj.a_load_reg_cgpara(list: TAsmList; size: tdef; r: tregister; const cgpara: TCGPara);
     var
       ref: treference;
+      tmpreg : tregister;
     begin
       cgpara.check_simple_location;
       paramanager.alloccgpara(list,cgpara);
+      if cgpara.location^.shiftval<0 then
+        begin
+          tmpreg:=getintregister(list,cgpara.location^.def);
+          a_op_const_reg_reg(list,OP_SHL,cgpara.location^.def,-cgpara.location^.shiftval,r,tmpreg);
+          r:=tmpreg;
+        end;
       case cgpara.location^.loc of
          LOC_REGISTER,LOC_CREGISTER:
-           a_load_reg_reg(list,size,cgpara.def,r,cgpara.location^.register);
+           a_load_reg_reg(list,size,cgpara.location^.def,r,cgpara.location^.register);
          LOC_REFERENCE,LOC_CREFERENCE:
            begin
               reference_reset_base(ref,cgpara.location^.reference.index,cgpara.location^.reference.offset,cgpara.alignment);
-              a_load_reg_ref(list,size,cgpara.def,r,ref);
+              a_load_reg_ref(list,size,cgpara.location^.def,r,ref);
            end;
-(*
          LOC_MMREGISTER,LOC_CMMREGISTER:
-           a_loadmm_intreg_reg(list,size,cgpara.def,r,cgpara.location^.register,mms_movescalar);
-*)
+           a_loadmm_intreg_reg(list,size,cgpara.location^.def,r,cgpara.location^.register,mms_movescalar);
          LOC_FPUREGISTER,LOC_CFPUREGISTER:
            begin
              tg.gethltemp(list,size,size.size,tt_normal,ref);
-             a_load_reg_ref(list,size,cgpara.def,r,ref);
-             a_loadfpu_ref_cgpara(list,cgpara.def,ref,cgpara);
+             a_load_reg_ref(list,size,cgpara.location^.def,r,ref);
+             a_loadfpu_ref_cgpara(list,cgpara.location^.def,ref,cgpara);
              tg.ungettemp(list,ref);
            end
          else
@@ -793,13 +775,15 @@ implementation
     begin
        cgpara.check_simple_location;
        paramanager.alloccgpara(list,cgpara);
+      if cgpara.location^.shiftval<0 then
+        a:=a shl -cgpara.location^.shiftval;
        case cgpara.location^.loc of
           LOC_REGISTER,LOC_CREGISTER:
-            a_load_const_reg(list,cgpara.def,a,cgpara.location^.register);
+            a_load_const_reg(list,cgpara.location^.def,a,cgpara.location^.register);
           LOC_REFERENCE,LOC_CREFERENCE:
             begin
                reference_reset_base(ref,cgpara.location^.reference.index,cgpara.location^.reference.offset,cgpara.alignment);
-               a_load_const_ref(list,cgpara.def,a,ref);
+               a_load_const_ref(list,cgpara.location^.def,a,ref);
             end
           else
             internalerror(2010120416);
@@ -808,39 +792,161 @@ implementation
 
   procedure thlcgobj.a_load_ref_cgpara(list: TAsmList; size: tdef; const r: treference; const cgpara: TCGPara);
     var
-      ref: treference;
-    begin
-      cgpara.check_simple_location;
-      paramanager.alloccgpara(list,cgpara);
-      case cgpara.location^.loc of
-         LOC_REGISTER,LOC_CREGISTER:
-           a_load_ref_reg(list,size,cgpara.def,r,cgpara.location^.register);
-         LOC_REFERENCE,LOC_CREFERENCE:
-           begin
-              reference_reset_base(ref,cgpara.location^.reference.index,cgpara.location^.reference.offset,cgpara.alignment);
-              a_load_ref_ref(list,size,cgpara.def,r,ref);
-           end
-(*
-         LOC_MMREGISTER,LOC_CMMREGISTER:
-           begin
-              case location^.size of
-                OS_F32,
-                OS_F64,
-                OS_F128:
-                  a_loadmm_ref_reg(list,cgpara.def,cgpara.def,r,location^.register,mms_movescalar);
-                OS_M8..OS_M128,
-                OS_MS8..OS_MS128:
-                  a_loadmm_ref_reg(list,cgpara.def,cgpara.def,r,location^.register,nil);
-                else
-                  internalerror(2010120417);
+      tmpref, ref: treference;
+      tmpreg: tregister;
+      location: pcgparalocation;
+      orgsizeleft,
+      sizeleft: tcgint;
+      reghasvalue: boolean;
+    begin
+      location:=cgpara.location;
+      tmpref:=r;
+      sizeleft:=cgpara.intsize;
+      while assigned(location) do
+        begin
+          paramanager.allocparaloc(list,location);
+          case location^.loc of
+            LOC_REGISTER,LOC_CREGISTER:
+              begin
+                 { Parameter locations are often allocated in multiples of
+                   entire registers. If a parameter only occupies a part of
+                   such a register (e.g. a 16 bit int on a 32 bit
+                   architecture), the size of this parameter can only be
+                   determined by looking at the "size" parameter of this
+                   method -> if the size parameter is <= sizeof(aint), then
+                   we check that there is only one parameter location and
+                   then use this "size" to load the value into the parameter
+                   location }
+                 if (def_cgsize(size)<>OS_NO) and
+                    (size.size<=sizeof(aint)) then
+                   begin
+                     cgpara.check_simple_location;
+                     a_load_ref_reg(list,size,location^.def,tmpref,location^.register);
+                     if location^.shiftval<0 then
+                       a_op_const_reg(list,OP_SHL,location^.def,-location^.shiftval,location^.register);
+                   end
+                 { there's a lot more data left, and the current paraloc's
+                   register is entirely filled with part of that data }
+                 else if (sizeleft>sizeof(aint)) then
+                   begin
+                     a_load_ref_reg(list,location^.def,location^.def,tmpref,location^.register);
+                   end
+                 { we're at the end of the data, and it can be loaded into
+                   the current location's register with a single regular
+                   load }
+                 else if (sizeleft in [1,2{$ifndef cpu16bitalu},4{$endif}{$ifdef cpu64bitalu},8{$endif}]) then
+                   begin
+                     { don't use cgsize_orddef(int_cgsize(sizeleft)) as fromdef,
+                       because that may be larger than location^.register in
+                       case of padding at the end of a record }
+                     a_load_ref_reg(list,location^.def,location^.def,tmpref,location^.register);
+                     if location^.shiftval<0 then
+                       a_op_const_reg(list,OP_SHL,location^.def,-location^.shiftval,location^.register);
+                   end
+                 { we're at the end of the data, and we need multiple loads
+                   to get it in the register because it's an irregular size }
+                 else
+                   begin
+                     { should be the last part }
+                     if assigned(location^.next) then
+                       internalerror(2010052907);
+                     { load the value piecewise to get it into the register }
+                     orgsizeleft:=sizeleft;
+                     reghasvalue:=false;
+{$ifdef cpu64bitalu}
+                     if sizeleft>=4 then
+                       begin
+                         a_load_ref_reg(list,u32inttype,location^.def,tmpref,location^.register);
+                         dec(sizeleft,4);
+                         if target_info.endian=endian_big then
+                           a_op_const_reg(list,OP_SHL,location^.def,sizeleft*8,location^.register);
+                         inc(tmpref.offset,4);
+                         reghasvalue:=true;
+                       end;
+{$endif cpu64bitalu}
+                     if sizeleft>=2 then
+                       begin
+                         tmpreg:=getintregister(list,location^.def);
+                         a_load_ref_reg(list,u16inttype,location^.def,tmpref,tmpreg);
+                         dec(sizeleft,2);
+                         if reghasvalue then
+                           begin
+                             if target_info.endian=endian_big then
+                               a_op_const_reg(list,OP_SHL,location^.def,sizeleft*8,tmpreg)
+                             else
+                               a_op_const_reg(list,OP_SHL,location^.def,(orgsizeleft-(sizeleft+2))*8,tmpreg);
+                             a_op_reg_reg(list,OP_OR,location^.def,tmpreg,location^.register);
+                           end
+                         else
+                           begin
+                             if target_info.endian=endian_big then
+                               a_op_const_reg_reg(list,OP_SHL,location^.def,sizeleft*8,tmpreg,location^.register)
+                             else
+                               a_load_reg_reg(list,location^.def,location^.def,tmpreg,location^.register);
+                           end;
+                         inc(tmpref.offset,2);
+                         reghasvalue:=true;
+                       end;
+                     if sizeleft=1 then
+                       begin
+                         tmpreg:=getintregister(list,location^.def);
+                         a_load_ref_reg(list,u8inttype,location^.def,tmpref,tmpreg);
+                         dec(sizeleft,1);
+                         if reghasvalue then
+                           begin
+                             if target_info.endian=endian_little then
+                               a_op_const_reg(list,OP_SHL,location^.def,(orgsizeleft-(sizeleft+1))*8,tmpreg);
+                             a_op_reg_reg(list,OP_OR,location^.def,tmpreg,location^.register)
+                           end
+                         else
+                           a_load_reg_reg(list,location^.def,location^.def,tmpreg,location^.register);
+                         inc(tmpref.offset);
+                       end;
+                     if location^.shiftval<0 then
+                       a_op_const_reg(list,OP_SHL,location^.def,-location^.shiftval,location^.register);
+                     { the loop will already adjust the offset and sizeleft }
+                     dec(tmpref.offset,orgsizeleft);
+                     sizeleft:=orgsizeleft;
+                   end;
               end;
-           end
-*)
-         else
-           internalerror(2010120418);
-      end;
+            LOC_REFERENCE,LOC_CREFERENCE:
+              begin
+                 if assigned(location^.next) then
+                   internalerror(2010052906);
+                 reference_reset_base(ref,location^.reference.index,location^.reference.offset,newalignment(cgpara.alignment,cgpara.intsize-sizeleft));
+                 if (def_cgsize(size)<>OS_NO) and
+                    (size.size=sizeleft) and
+                    (sizeleft<=sizeof(aint)) then
+                   a_load_ref_ref(list,size,location^.def,tmpref,ref)
+                 else
+                   { use concatcopy, because the parameter can be larger than }
+                   { what the OS_* constants can handle                       }
+                   g_concatcopy(list,location^.def,tmpref,ref);
+              end;
+            LOC_MMREGISTER,LOC_CMMREGISTER:
+              begin
+                 case location^.size of
+                   OS_F32,
+                   OS_F64,
+                   OS_F128:
+                     a_loadmm_ref_reg(list,location^.def,location^.def,tmpref,location^.register,mms_movescalar);
+                   OS_M8..OS_M128,
+                   OS_MS8..OS_MS128:
+                     a_loadmm_ref_reg(list,location^.def,location^.def,tmpref,location^.register,nil);
+                   else
+                     internalerror(2010053101);
+                 end;
+              end
+            else
+              internalerror(2010053111);
+          end;
+          inc(tmpref.offset,tcgsize2size[location^.size]);
+          dec(sizeleft,tcgsize2size[location^.size]);
+          location:=location^.next;
+        end;
     end;
 
+
   procedure thlcgobj.a_load_loc_cgpara(list: TAsmList; size: tdef; const l: tlocation; const cgpara: TCGPara);
     begin
       case l.loc of
@@ -865,12 +971,12 @@ implementation
        if cgpara.location^.loc in [LOC_CREGISTER,LOC_REGISTER] then
          begin
            paramanager.allocparaloc(list,cgpara.location);
-           a_loadaddr_ref_reg(list,fromsize,cgpara.def,r,cgpara.location^.register)
+           a_loadaddr_ref_reg(list,fromsize,cgpara.location^.def,r,cgpara.location^.register)
          end
        else
          begin
            hr:=getaddressregister(list,cgpara.def);
-           a_loadaddr_ref_reg(list,fromsize,cgpara.def,r,hr);
+           a_loadaddr_ref_reg(list,fromsize,cgpara.location^.def,r,hr);
            a_load_reg_cgpara(list,cgpara.def,hr,cgpara);
          end;
     end;
@@ -942,10 +1048,8 @@ implementation
           a_load_reg_subsetreg(list,fromsize,tosize,reg,loc.sreg);
         LOC_SUBSETREF,LOC_CSUBSETREF:
           a_load_reg_subsetref(list,fromsize,tosize,reg,loc.sref);
-        { we don't have enough type information to handle these here
         LOC_MMREGISTER,LOC_CMMREGISTER:
-          a_loadmm_intreg_reg(list,fromsize,loc.size,reg,loc.register,mms_movescalar);
-        }
+          a_loadmm_intreg_reg(list,fromsize,tosize,reg,loc.register,mms_movescalar);
         else
           internalerror(2010120402);
       end;
@@ -1051,7 +1155,7 @@ implementation
       subsetsizereg: tregister;
       stopbit: byte;
     begin
-      subsetregdef:=tcgsize2orddef(sreg.subsetregsize);
+      subsetregdef:=cgsize_orddef(sreg.subsetregsize);
       tmpreg:=getintregister(list,subsetregdef);
       if is_signed(subsetsize) then
         begin
@@ -1092,8 +1196,8 @@ implementation
     begin
       if (fromsreg.bitlen>=tosreg.bitlen) then
         begin
-          fromsubsetregdef:=tcgsize2orddef(fromsreg.subsetregsize);
-          tosubsetregdef:=tcgsize2orddef(tosreg.subsetregsize);
+          fromsubsetregdef:=cgsize_orddef(fromsreg.subsetregsize);
+          tosubsetregdef:=cgsize_orddef(tosreg.subsetregsize);
           if (fromsreg.startbit<=tosreg.startbit) then
             begin
               { tosreg may be larger -> use its size to perform the shift }
@@ -1152,7 +1256,7 @@ implementation
       bitmask: aword;
       stopbit: byte;
     begin
-       subsetregdef:=tcgsize2orddef(sreg.subsetregsize);
+       subsetregdef:=cgsize_orddef(sreg.subsetregsize);
        stopbit:=sreg.startbit+sreg.bitlen;
        // on x86(64), 1 shl 32(64) = 1 instead of 0
        if (stopbit<>AIntBits) then
@@ -1652,7 +1756,7 @@ implementation
 
       if (intloadsize>sizeof(aint)) then
         intloadsize:=sizeof(aint);
-      loadsize:=tcgsize2orddef(int_cgsize(intloadsize));
+      loadsize:=cgsize_orddef(int_cgsize(intloadsize));
 
       if (sref.bitlen>sizeof(aint)*8) then
         internalerror(2006081312);
@@ -1751,7 +1855,7 @@ implementation
           a_op_reg_reg(list,OP_NEG,osuinttype,tmpreg,tmpreg);
 
           { load next "loadbitsize" bits of the array }
-          a_load_ref_reg(list,tcgsize2orddef(int_cgsize(loadbitsize div 8)),osuinttype,tmpref,extra_value_reg);
+          a_load_ref_reg(list,cgsize_orddef(int_cgsize(loadbitsize div 8)),osuinttype,tmpref,extra_value_reg);
 
           a_op_reg_reg(list,OP_SHR,osuinttype,tmpreg,extra_value_reg);
           { if there are no bits in extra_value_reg, then sref.bitindex was      }
@@ -1775,7 +1879,7 @@ implementation
           a_op_reg_reg(list,OP_NEG,osuinttype,tmpreg,tmpreg);
 
           { load next "loadbitsize" bits of the array }
-          a_load_ref_reg(list,tcgsize2orddef(int_cgsize(loadbitsize div 8)),osuinttype,tmpref,extra_value_reg);
+          a_load_ref_reg(list,cgsize_orddef(int_cgsize(loadbitsize div 8)),osuinttype,tmpref,extra_value_reg);
 
           { tmpreg is in the range 1..<cpu_bitsize>-1 -> always ok }
           a_op_reg_reg(list,OP_SHL,osuinttype,tmpreg,extra_value_reg);
@@ -2120,7 +2224,7 @@ implementation
       subsetregdef: torddef;
       stopbit: byte;
     begin
-      subsetregdef:=tcgsize2orddef(sreg.subsetregsize);
+      subsetregdef:=cgsize_orddef(sreg.subsetregsize);
       stopbit:=sreg.startbit+sreg.bitlen;
       // on x86(64), 1 shl 32(64) = 1 instead of 0
       if (stopbit<>AIntBits) then
@@ -2309,23 +2413,19 @@ implementation
           internalerror(2010120423);
       end;
     end;
-(*
-  procedure thlcgobj.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister; shuffle: pmmshuffle);
-    begin
-      cg.a_loadmm_reg_reg(list,def_cgsize(fromsize),def_cgsize(tosize),reg1,reg2,shuffle);
-    end;
 
-  procedure thlcgobj.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tdef; const ref: treference; reg: tregister; shuffle: pmmshuffle);
-    begin
-      cg.a_loadmm_ref_reg(list,def_cgsize(fromsize),def_cgsize(tosize),ref,reg,shuffle);
-    end;
-
-  procedure thlcgobj.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tdef; reg: tregister; const ref: treference; shuffle: pmmshuffle);
+  procedure thlcgobj.a_loadmm_ref_ref(list: TAsmList; fromsize, tosize: tdef; const fromref, toref: treference; shuffle: pmmshuffle);
+    var
+      reg: tregister;
     begin
-      cg.a_loadmm_reg_ref(list,def_cgsize(fromsize),def_cgsize(tosize),reg,ref,shuffle);
+      reg:=getmmregister(list,tosize);
+      a_loadmm_ref_reg(list,fromsize,tosize,fromref,reg,shuffle);
+      a_loadmm_reg_ref(list,tosize,tosize,reg,toref,shuffle);
     end;
 
   procedure thlcgobj.a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tdef; const loc: tlocation; const reg: tregister; shuffle: pmmshuffle);
+    var
+      tmpreg: tregister;
     begin
       case loc.loc of
         LOC_MMREGISTER,LOC_CMMREGISTER:
@@ -2334,6 +2434,13 @@ implementation
           a_loadmm_ref_reg(list,fromsize,tosize,loc.reference,reg,shuffle);
         LOC_REGISTER,LOC_CREGISTER:
           a_loadmm_intreg_reg(list,fromsize,tosize,loc.register,reg,shuffle);
+        LOC_SUBSETREG,LOC_CSUBSETREG,
+        LOC_SUBSETREF,LOC_CSUBSETREF:
+          begin
+            tmpreg:=getintregister(list,fromsize);
+            a_load_loc_reg(list,fromsize,fromsize,loc,tmpreg);
+            a_loadmm_intreg_reg(list,fromsize,tosize,tmpreg,reg,shuffle);
+          end
         else
           internalerror(2010120414);
       end;
@@ -2369,11 +2476,11 @@ implementation
           begin
             if assigned(shuffle) and
                not shufflescalar(shuffle) then
-              internalerror(2009112510);
-             a_loadmm_reg_intreg(list,deomsize,cgpara.def,reg,cgpara.location^.register,mms_movescalar);
+              internalerror(2012071205);
+             a_loadmm_reg_intreg(list,fromsize,cgpara.def,reg,cgpara.location^.register,mms_movescalar);
           end
         else
-          internalerror(2010120427);
+          internalerror(2012071204);
       end;
     end;
 
@@ -2383,8 +2490,8 @@ implementation
        hs : tmmshuffle;
     begin
        cgpara.check_simple_location;
-       hr:=cg.getmmregister(list,cgpara.size);
-       a_loadmm_ref_reg(list,deomsize,cgpara.def,ref,hr,shuffle);
+       hr:=getmmregister(list,cgpara.def);
+       a_loadmm_ref_reg(list,fromsize,cgpara.def,ref,hr,shuffle);
        if realshuffle(shuffle) then
          begin
            hs:=shuffle^;
@@ -2399,31 +2506,68 @@ implementation
     begin
 {$ifdef extdebug}
       if def_cgsize(fromsize)<>loc.size then
-        internalerror(2010112105);
+        internalerror(2012071203);
 {$endif}
-      cg.a_loadmm_loc_cgpara(list,loc,cgpara,shuffle);
-    end;
-
-  procedure thlcgobj.a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size: tdef; src, dst: tregister; shuffle: pmmshuffle);
-    begin
-      cg.a_opmm_reg_reg(list,op,def_cgsize(size),src,dst,shuffle);
+      case loc.loc of
+        LOC_MMREGISTER,LOC_CMMREGISTER:
+          a_loadmm_reg_cgpara(list,fromsize,loc.register,cgpara,shuffle);
+        LOC_REFERENCE,LOC_CREFERENCE:
+          a_loadmm_ref_cgpara(list,fromsize,loc.reference,cgpara,shuffle);
+        else
+          internalerror(2012071202);
+      end;
     end;
 
   procedure thlcgobj.a_opmm_ref_reg(list: TAsmList; Op: TOpCG; size: tdef; const ref: treference; reg: tregister; shuffle: pmmshuffle);
+    var
+       hr : tregister;
+       hs : tmmshuffle;
     begin
-      cg.a_opmm_ref_reg(list,op,def_cgsize(size),ref,reg,shuffle)
+       hr:=getmmregister(list,size);
+       a_loadmm_ref_reg(list,size,size,ref,hr,shuffle);
+       if realshuffle(shuffle) then
+         begin
+           hs:=shuffle^;
+           removeshuffles(hs);
+           a_opmm_reg_reg(list,op,size,hr,reg,@hs);
+         end
+       else
+         a_opmm_reg_reg(list,op,size,hr,reg,shuffle);
     end;
 
   procedure thlcgobj.a_opmm_loc_reg(list: TAsmList; Op: TOpCG; size: tdef; const loc: tlocation; reg: tregister; shuffle: pmmshuffle);
     begin
-      cg.a_opmm_loc_reg(list,op,def_cgsize(size),loc,reg,shuffle);
+      case loc.loc of
+        LOC_CMMREGISTER,LOC_MMREGISTER:
+          a_opmm_reg_reg(list,op,size,loc.register,reg,shuffle);
+        LOC_CREFERENCE,LOC_REFERENCE:
+          a_opmm_ref_reg(list,op,size,loc.reference,reg,shuffle);
+        else
+          internalerror(2012071201);
+      end;
     end;
 
   procedure thlcgobj.a_opmm_reg_ref(list: TAsmList; Op: TOpCG; size: tdef; reg: tregister; const ref: treference; shuffle: pmmshuffle);
+    var
+       hr : tregister;
+       hs : tmmshuffle;
     begin
-      cg.a_opmm_reg_ref(list,op,def_cgsize(size),reg,ref,shuffle);
+       hr:=getmmregister(list,size);
+       a_loadmm_ref_reg(list,size,size,ref,hr,shuffle);
+       if realshuffle(shuffle) then
+         begin
+           hs:=shuffle^;
+           removeshuffles(hs);
+           a_opmm_reg_reg(list,op,size,reg,hr,@hs);
+           a_loadmm_reg_ref(list,size,size,hr,ref,@hs);
+         end
+       else
+         begin
+           a_opmm_reg_reg(list,op,size,reg,hr,shuffle);
+           a_loadmm_reg_ref(list,size,size,hr,ref,shuffle);
+         end;
     end;
-*)
+
 (*
   procedure thlcgobj.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tdef; intreg, mmreg: tregister; shuffle: pmmshuffle);
     begin
@@ -2801,12 +2945,9 @@ implementation
 
   procedure thlcgobj.g_concatcopy(list: TAsmList; size: tdef; const source, dest: treference);
     begin
-{
       if use_vectorfpu(size) then
-        a_loadmm_ref_ref()
-      else
- }
-      if size.typ<>floatdef then
+        a_loadmm_ref_ref(list,size,size,source,dest,mms_movescalar)
+      else if size.typ<>floatdef then
         a_load_ref_ref(list,size,size,source,dest)
       else
         a_loadfpu_ref_ref(list,size,size,source,dest);
@@ -3560,16 +3701,17 @@ implementation
             location_reset_ref(l,LOC_REFERENCE,l.size,0);
             l.reference:=r;
           end;
-(*
         LOC_MMREGISTER,
         LOC_CMMREGISTER:
           begin
+            { vectors can't be represented yet using tdef }
+            if size.typ<>floatdef then
+              internalerror(2012062301);
             tg.gethltemp(list,size,size.size,tt_normal,r);
-            cg.a_loadmm_reg_ref(list,l.size,l.size,l.register,r,mms_movescalar);
+            a_loadmm_reg_ref(list,size,size,l.register,r,mms_movescalar);
             location_reset_ref(l,LOC_REFERENCE,l.size,0);
             l.reference:=r;
           end;
-*)
         LOC_CONSTANT,
         LOC_REGISTER,
         LOC_CREGISTER,
@@ -3582,7 +3724,7 @@ implementation
                not is_open_array(size) then
               forcesize:=size.size
             else
-              forcesize:=voidpointertype.size;
+              forcesize:=sizeof(pint);
             tg.gethltemp(list,size,forcesize,tt_normal,r);
             a_load_loc_ref(list,size,size,l,r);
             location_reset_ref(l,LOC_REFERENCE,l.size,0);
@@ -3595,6 +3737,55 @@ implementation
       end;
     end;
 
+  procedure thlcgobj.location_force_mmregscalar(list: TAsmList; var l: tlocation; size: tdef; maybeconst: boolean);
+    var
+      reg : tregister;
+      href : treference;
+      newsize : tdef;
+    begin
+      if (l.loc<>LOC_MMREGISTER)  and
+         ((l.loc<>LOC_CMMREGISTER) or (not maybeconst)) then
+        begin
+          { if it's in an fpu register, store to memory first }
+          if (l.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
+            begin
+              tg.gethltemp(list,size,-1,tt_normal,href);
+              hlcg.a_loadfpu_reg_ref(list,size,size,l.register,href);
+              location_reset_ref(l,LOC_REFERENCE,l.size,0);
+              l.reference:=href;
+            end;
+          { on ARM, CFP values may be located in integer registers,
+            and its second_int_to_real() also uses this routine to
+            force integer (memory) values in an mmregister }
+          if (l.size in [OS_32,OS_S32]) then
+            begin
+              size:=cgsize_orddef(l.size);
+              newsize:=s32floattype;
+            end
+          else if (l.size in [OS_64,OS_S64]) then
+            begin
+              size:=cgsize_orddef(l.size);
+              newsize:=s64floattype;
+            end
+          else
+            newsize:=size;
+          case size.size of
+            4:
+              newsize:=s32floattype;
+            8:
+              newsize:=s64floattype;
+            else
+              newsize:=size;
+          end;
+          reg:=hlcg.getmmregister(list,newsize);
+          hlcg.a_loadmm_loc_reg(list,size,newsize,l,reg,mms_movescalar);
+          l.size:=def_cgsize(newsize);
+          location_freetemp(list,l);
+          location_reset(l,LOC_MMREGISTER,l.size);
+          l.register:=reg;
+        end;
+    end;
+
     procedure thlcgobj.location_get_data_ref(list: TAsmList; def: tdef; const l: tlocation; var ref: treference; loadref: boolean; alignment: longint);
       begin
         case l.loc of
@@ -3972,14 +4163,12 @@ implementation
                  a_load_const_reg(TAsmList(arg),tstaticvarsym(p).vardef,0,
                      tstaticvarsym(p).initialloc.register);
              end;
-(*
            LOC_CMMREGISTER :
              { clear the whole register }
-             cg.a_opmm_reg_reg(TAsmList(arg),OP_XOR,reg_cgsize(tstaticvarsym(p).initialloc.register),
+             a_opmm_reg_reg(TAsmList(arg),OP_XOR,tstaticvarsym(p).vardef,
                tstaticvarsym(p).initialloc.register,
                tstaticvarsym(p).initialloc.register,
                nil);
-*)
            LOC_CFPUREGISTER :
              begin
                { initialize fpu regvar by loading from memory }
@@ -4312,9 +4501,10 @@ implementation
     end;
 
   procedure thlcgobj.gen_loadfpu_loc_cgpara(list: TAsmList; size: tdef; const l: tlocation; const cgpara: tcgpara; locintsize: longint);
+    var
+      tmploc: tlocation;
     begin
       case l.loc of
-(*
         LOC_MMREGISTER,
         LOC_CMMREGISTER:
           case cgpara.location^.loc of
@@ -4324,30 +4514,27 @@ implementation
             LOC_CMMREGISTER,
             LOC_REGISTER,
             LOC_CREGISTER :
-              cg.a_loadmm_reg_cgpara(list,locsize,l.register,cgpara,mms_movescalar);
+              a_loadmm_reg_cgpara(list,size,l.register,cgpara,mms_movescalar);
             LOC_FPUREGISTER,
             LOC_CFPUREGISTER:
               begin
                 tmploc:=l;
-                location_force_fpureg(list,tmploc,false);
-                cg.a_loadfpu_reg_cgpara(list,tmploc.size,tmploc.register,cgpara);
+                location_force_fpureg(list,tmploc,size,false);
+                a_loadfpu_reg_cgpara(list,size,tmploc.register,cgpara);
               end;
             else
               internalerror(200204249);
           end;
-*)
         LOC_FPUREGISTER,
         LOC_CFPUREGISTER:
           case cgpara.location^.loc of
-(*
             LOC_MMREGISTER,
             LOC_CMMREGISTER:
               begin
                 tmploc:=l;
-                location_force_mmregscalar(list,tmploc,false);
-                cg.a_loadmm_reg_cgpara(list,tmploc.size,tmploc.register,cgpara,mms_movescalar);
+                location_force_mmregscalar(list,tmploc,size,false);
+                a_loadmm_reg_cgpara(list,size,tmploc.register,cgpara,mms_movescalar);
               end;
-*)
             { Some targets pass floats in normal registers }
             LOC_REGISTER,
             LOC_CREGISTER,
@@ -4362,11 +4549,9 @@ implementation
         LOC_REFERENCE,
         LOC_CREFERENCE:
           case cgpara.location^.loc of
-(*
             LOC_MMREGISTER,
             LOC_CMMREGISTER:
-              cg.a_loadmm_ref_cgpara(list,locsize,l.reference,cgpara,mms_movescalar);
-*)
+              a_loadmm_ref_cgpara(list,size,l.reference,cgpara,mms_movescalar);
             { Some targets pass floats in normal registers }
             LOC_REGISTER,
             LOC_CREGISTER,
@@ -4416,19 +4601,18 @@ implementation
           begin
             a_load_loc_cgpara(list,vardef,l,cgpara);
           end;
-(*
         LOC_MMREGISTER,
         LOC_CMMREGISTER:
           begin
-            case l.size of
-              OS_F32,
-              OS_F64:
-                cg.a_loadmm_loc_cgpara(list,l,cgpara,mms_movescalar);
-              else
-                cg.a_loadmm_loc_cgpara(list,l,cgpara,nil);
-            end;
+            if use_vectorfpu(vardef) then
+              a_loadmm_loc_cgpara(list,vardef,l,cgpara,mms_movescalar)
+            else
+              { no vector support yet }
+              internalerror(2012071212);
+              {
+              cg.a_loadmm_loc_cgpara(list,l,cgpara,nil);
+              }
           end;
-*)
         else
           internalerror(2011010213);
       end;
@@ -4463,6 +4647,7 @@ implementation
     var
       ressym : tabstractnormalvarsym;
       funcretloc : TCGPara;
+      retdef : tdef;
     begin
       { Is the loading needed? }
       if is_void(current_procinfo.procdef.returndef) or
@@ -4478,18 +4663,27 @@ implementation
 
       { constructors return self }
       if (current_procinfo.procdef.proctypeoption=potype_constructor) then
-        ressym:=tabstractnormalvarsym(current_procinfo.procdef.parast.Find('self'))
+        begin
+          ressym:=tabstractnormalvarsym(current_procinfo.procdef.parast.Find('self'));
+          retdef:=ressym.vardef;
+          { and TP-style constructors return a pointer to self }
+          if is_object(ressym.vardef) then
+            retdef:=getpointerdef(retdef);
+        end
       else
-        ressym:=tabstractnormalvarsym(current_procinfo.procdef.funcretsym);
+        begin
+          ressym:=tabstractnormalvarsym(current_procinfo.procdef.funcretsym);
+          retdef:=ressym.vardef;
+        end;
       if (ressym.refs>0) or
-         is_managed_type(ressym.vardef) then
+         is_managed_type(retdef) then
         begin
           { was: don't do anything if funcretloc.loc in [LOC_INVALID,LOC_REFERENCE] }
           if not paramanager.ret_in_param(current_procinfo.procdef.returndef,current_procinfo.procdef) then
-            gen_load_loc_cgpara(list,ressym.vardef,ressym.localloc,funcretloc);
+            gen_load_loc_cgpara(list,retdef,ressym.localloc,funcretloc);
         end
       else
-        gen_load_uninitialized_function_result(list,current_procinfo.procdef,ressym.vardef,funcretloc)
+        gen_load_uninitialized_function_result(list,current_procinfo.procdef,retdef,funcretloc)
     end;
 
   procedure thlcgobj.record_generated_code_for_procdef(pd: tprocdef; code, data: TAsmList);

+ 13 - 3
compiler/i386/cgcpu.pas

@@ -293,6 +293,16 @@ unit cgcpu;
 
 
     procedure tcg386.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
+
+      procedure increase_fp(a : tcgint);
+        var
+          href : treference;
+        begin
+          reference_reset_base(href,current_procinfo.framepointer,a,0);
+          { normally, lea is a better choice than an add }
+          list.concat(Taicpu.op_ref_reg(A_LEA,TCGSize2OpSize[OS_ADDR],href,current_procinfo.framepointer));
+        end;
+
       var
         stacksize : longint;
       begin
@@ -304,7 +314,7 @@ unit cgcpu;
         { remove stackframe }
         if not nostackframe then
           begin
-            if (current_procinfo.framepointer=NR_STACK_POINTER_REG) then
+            if current_procinfo.framepointer=NR_STACK_POINTER_REG then
               begin
                 stacksize:=current_procinfo.calc_stackframe_size;
                 if (target_info.stackalign>4) and
@@ -314,8 +324,8 @@ unit cgcpu;
                     { if you (think you) know what you are doing              }
                     (po_assembler in current_procinfo.procdef.procoptions)) then
                   stacksize := align(stacksize+sizeof(aint),target_info.stackalign) - sizeof(aint);
-                if (stacksize<>0) then
-                  cg.a_op_const_reg(list,OP_ADD,OS_ADDR,stacksize,current_procinfo.framepointer);
+                if stacksize<>0 then
+                  increase_fp(stacksize);
               end
             else
               list.concat(Taicpu.op_none(A_LEAVE,S_NO));

+ 8 - 4
compiler/i386/cpuinfo.pas

@@ -59,7 +59,8 @@ Type
       fpu_ssse3,
       fpu_sse41,
       fpu_sse42,
-      fpu_avx
+      fpu_avx,
+      fpu_avx2
      );
 
 
@@ -96,11 +97,14 @@ Const
      'SSSE3',
      'SSE41',
      'SSE42',
-     'AVX'
+     'AVX',
+     'AVX2'
    );
 
-   sse_singlescalar : set of tfputype = [fpu_sse,fpu_sse2,fpu_sse3];
-   sse_doublescalar : set of tfputype = [fpu_sse2,fpu_sse3];
+   sse_singlescalar = [fpu_sse..fpu_avx2];
+   sse_doublescalar = [fpu_sse2..fpu_avx2];
+
+   fpu_avx_instructionsets = [fpu_avx,fpu_avx2];
 
    { Supported optimizations, only used for information }
    supported_optimizerswitches = genericlevel1optimizerswitches+

+ 43 - 11
compiler/i386/cpupara.pas

@@ -277,18 +277,23 @@ unit cpupara;
     procedure ti386paramanager.getintparaloc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);
       var
         paraloc : pcgparalocation;
-        def : tdef;
+        psym: tparavarsym;
+        pdef: tdef;
       begin
-        def:=tparavarsym(pd.paras[nr-1]).vardef;
+        psym:=tparavarsym(pd.paras[nr-1]);
+        pdef:=psym.vardef;
+        if push_addr_param(psym.varspez,pdef,pd.proccalloption) then
+          pdef:=getpointerdef(pdef);
         cgpara.reset;
-        cgpara.size:=def_cgsize(def);
+        cgpara.size:=def_cgsize(pdef);
         cgpara.intsize:=tcgsize2size[cgpara.size];
         cgpara.alignment:=get_para_align(pd.proccalloption);
-        cgpara.def:=def;
+        cgpara.def:=pdef;
         paraloc:=cgpara.add_location;
         with paraloc^ do
          begin
-           size:=OS_INT;
+           size:=def_cgsize(pdef);
+           def:=pdef;
            if pd.proccalloption=pocall_register then
              begin
                if (nr<=length(parasupregs)) then
@@ -367,6 +372,7 @@ unit cpupara;
             paraloc^.loc:=LOC_FPUREGISTER;
             paraloc^.register:=NR_FPU_RESULT_REG;
             paraloc^.size:=retcgsize;
+            paraloc^.def:=result.def;
           end
         else
          { Return in register }
@@ -381,6 +387,7 @@ unit cpupara;
                else
                  paraloc^.register:=NR_FUNCTION_RETURN64_LOW_REG;
                paraloc^.size:=OS_32;
+               paraloc^.def:=u32inttype;
 
                { high 32bits }
                paraloc:=result.add_location;
@@ -390,10 +397,12 @@ unit cpupara;
                else
                  paraloc^.register:=NR_FUNCTION_RETURN64_HIGH_REG;
                paraloc^.size:=OS_32;
+               paraloc^.def:=u32inttype;
              end
             else
              begin
                paraloc^.size:=retcgsize;
+               paraloc^.def:=result.def;
                if side=callerside then
                  paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,cgsize2subreg(R_INTREGISTER,retcgsize))
                else
@@ -413,8 +422,9 @@ unit cpupara;
         paralen,
         varalign   : longint;
         paraalign  : shortint;
-        pushaddr   : boolean;
         paracgsize : tcgsize;
+        firstparaloc,
+        pushaddr   : boolean;
       begin
         paraalign:=get_para_align(p.proccalloption);
         { we push Flags and CS as long
@@ -476,6 +486,7 @@ unit cpupara;
                 paraloc:=hp.paraloc[side].add_location;
                 paraloc^.loc:=LOC_REFERENCE;
                 paraloc^.size:=paracgsize;
+                paraloc^.def:=paradef;
                 if side=callerside then
                   paraloc^.reference.index:=NR_STACK_POINTER_REG
                 else
@@ -497,6 +508,7 @@ unit cpupara;
               begin
                 if paralen=0 then
                   internalerror(200501163);
+                firstparaloc:=true;
                 while (paralen>0) do
                   begin
                     paraloc:=hp.paraloc[side].add_location;
@@ -505,15 +517,22 @@ unit cpupara;
                     if (paracgsize in [OS_F64,OS_F32]) then
                       begin
                         paraloc^.size:=paracgsize;
+                        paraloc^.def:=paradef;
                         l:=paralen;
                       end
                     else
                       begin
                         { We can allocate at maximum 32 bits per location }
                         if paralen>sizeof(aint) then
-                          l:=sizeof(aint)
+                          begin
+                            l:=sizeof(aint);
+                            paraloc^.def:=uinttype;
+                          end
                         else
-                          l:=paralen;
+                          begin
+                            l:=paralen;
+                            paraloc^.def:=get_paraloc_def(paradef,l,firstparaloc);
+                          end;
                         paraloc^.size:=int_cgsize(l);
                       end;
                     if (side=callerside) or
@@ -531,6 +550,7 @@ unit cpupara;
                         inc(paraloc^.reference.offset,4);
                     parasize:=align(parasize+l,varalign);
                     dec(paralen,l);
+                    firstparaloc:=false;
                   end;
               end;
             if p.proccalloption in pushleftright_pocalls then
@@ -552,9 +572,10 @@ unit cpupara;
         l,
         paralen,
         varalign : longint;
-        pushaddr : boolean;
         paraalign : shortint;
         pass : byte;
+        firstparaloc,
+        pushaddr : boolean;
       begin
         if paras.count=0 then
           exit;
@@ -621,6 +642,7 @@ unit cpupara;
                           begin
                             paraloc:=hp.paraloc[side].add_location;
                             paraloc^.size:=paracgsize;
+                            paraloc^.def:=paradef;
                             paraloc^.loc:=LOC_REGISTER;
                             paraloc^.register:=newreg(R_INTREGISTER,parasupregs[parareg],cgsize2subreg(R_INTREGISTER,paracgsize));
                             inc(parareg);
@@ -636,6 +658,7 @@ unit cpupara;
                               paraloc:=hp.paraloc[side].add_location;
                               paraloc^.loc:=LOC_REFERENCE;
                               paraloc^.size:=paracgsize;
+                              paraloc^.def:=paradef;
                               if side=callerside then
                                 paraloc^.reference.index:=NR_STACK_POINTER_REG
                               else
@@ -650,6 +673,7 @@ unit cpupara;
                             begin
                               if paralen=0 then
                                 internalerror(200501163);
+                              firstparaloc:=true;
                               while (paralen>0) do
                                 begin
                                   paraloc:=hp.paraloc[side].add_location;
@@ -658,15 +682,22 @@ unit cpupara;
                                   if (paracgsize in [OS_F64,OS_F32]) then
                                     begin
                                       paraloc^.size:=paracgsize;
+                                      paraloc^.def:=paradef;
                                       l:=paralen;
                                     end
                                   else
                                     begin
                                       { We can allocate at maximum 32 bits per location }
                                       if paralen>sizeof(aint) then
-                                        l:=sizeof(aint)
+                                        begin
+                                          l:=sizeof(aint);
+                                          paraloc^.def:=uinttype;
+                                        end
                                       else
-                                        l:=paralen;
+                                        begin
+                                          l:=paralen;
+                                          paraloc^.def:=get_paraloc_def(paradef,l,firstparaloc);
+                                        end;
                                       paraloc^.size:=int_cgsize(l);
                                     end;
                                   if side=callerside then
@@ -679,6 +710,7 @@ unit cpupara;
                                     inc(paraloc^.reference.offset,target_info.first_parm_offset);
                                   parasize:=align(parasize+l,varalign);
                                   dec(paralen,l);
+                                  firstparaloc:=false;
                                 end;
                             end;
                         end;

+ 7 - 1
compiler/i386/i386att.inc

@@ -943,5 +943,11 @@
 'vxorpd',
 'vxorps',
 'vzeroall',
-'vzeroupper'
+'vzeroupper',
+'andn',
+'bextr',
+'rorx',
+'sarx',
+'shlx',
+'shrx'
 );

+ 6 - 0
compiler/i386/i386atts.inc

@@ -943,5 +943,11 @@ attsufNONE,
 attsufNONE,
 attsufNONE,
 attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
+attsufNONE,
 attsufNONE
 );

+ 7 - 1
compiler/i386/i386int.inc

@@ -943,5 +943,11 @@
 'vxorpd',
 'vxorps',
 'vzeroall',
-'vzeroupper'
+'vzeroupper',
+'andn',
+'bextr',
+'rorx',
+'sarx',
+'shlx',
+'shrx'
 );

+ 1 - 1
compiler/i386/i386nop.inc

@@ -1,2 +1,2 @@
 { don't edit, this file is generated from x86ins.dat }
-1652;
+1658;

+ 7 - 1
compiler/i386/i386op.inc

@@ -943,5 +943,11 @@ A_VUNPCKLPS,
 A_VXORPD,
 A_VXORPS,
 A_VZEROALL,
-A_VZEROUPPER
+A_VZEROUPPER,
+A_ANDN,
+A_BEXTR,
+A_RORX,
+A_SARX,
+A_SHLX,
+A_SHRX
 );

+ 16 - 10
compiler/i386/i386prop.inc

@@ -685,6 +685,10 @@
 (Ch: (Ch_RRAX, Ch_WMemEDI, Ch_RWRDI)),
 (Ch: (Ch_WRAX, Ch_RWRSI, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -772,11 +776,17 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
+(Ch: (Ch_Wop3, Ch_Rop2, Ch_Rop1)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
@@ -934,14 +944,10 @@
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
 (Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None)),
-(Ch: (Ch_All, Ch_None, Ch_None))
+(Ch: (Ch_Wop1, Ch_Rop2, Ch_Rop3)),
+(Ch: (Ch_Wop1, Ch_Rop2, Ch_Rop3)),
+(Ch: (Ch_Wop1, Ch_Rop2, Ch_None)),
+(Ch: (Ch_Wop1, Ch_Rop2, Ch_Rop3)),
+(Ch: (Ch_Wop1, Ch_Rop2, Ch_Rop3)),
+(Ch: (Ch_Wop1, Ch_Rop2, Ch_Rop3))
 );

+ 42 - 0
compiler/i386/i386tab.inc

@@ -11563,5 +11563,47 @@
     optypes : (ot_none,ot_none,ot_none,ot_none);
     code    : #242#248#1#119;
     flags   : if_avx or if_sandybridge
+  ),
+  (
+    opcode  : A_ANDN;
+    ops     : 3;
+    optypes : (ot_reg32,ot_reg32,ot_rm_gpr or ot_bits32,ot_none);
+    code    : #242#249#1#242#61#80;
+    flags   : if_bmi1
+  ),
+  (
+    opcode  : A_BEXTR;
+    ops     : 3;
+    optypes : (ot_reg32,ot_rm_gpr or ot_bits32,ot_reg32,ot_none);
+    code    : #242#249#1#247#62#72;
+    flags   : if_bmi1
+  ),
+  (
+    opcode  : A_RORX;
+    ops     : 3;
+    optypes : (ot_reg32,ot_rm_gpr or ot_bits32,ot_immediate or ot_bits8,ot_none);
+    code    : #220#242#250#1#240#72#22;
+    flags   : if_bmi2
+  ),
+  (
+    opcode  : A_SARX;
+    ops     : 3;
+    optypes : (ot_reg32,ot_rm_gpr or ot_bits32,ot_reg32,ot_none);
+    code    : #219#242#249#1#247#62#72;
+    flags   : if_bmi2
+  ),
+  (
+    opcode  : A_SHLX;
+    ops     : 3;
+    optypes : (ot_reg32,ot_rm_gpr or ot_bits32,ot_reg32,ot_none);
+    code    : #241#242#249#1#247#62#72;
+    flags   : if_bmi2
+  ),
+  (
+    opcode  : A_SHRX;
+    ops     : 3;
+    optypes : (ot_reg32,ot_rm_gpr or ot_bits32,ot_reg32,ot_none);
+    code    : #220#242#249#1#247#62#72;
+    flags   : if_bmi2
   )
 );

+ 4 - 4
compiler/i386/r386ari.inc

@@ -8,21 +8,21 @@
 15,
 6,
 5,
+38,
 39,
 40,
 41,
-42,
 26,
 7,
 10,
 19,
 9,
+32,
 33,
 34,
 35,
 36,
 37,
-38,
 27,
 11,
 4,
@@ -35,7 +35,7 @@
 28,
 18,
 24,
-32,
+47,
 30,
 31,
 57,
@@ -58,11 +58,11 @@
 53,
 54,
 55,
+42,
 43,
 44,
 45,
 46,
-47,
 65,
 66,
 67,

+ 1 - 1
compiler/i386/r386att.inc

@@ -31,7 +31,6 @@
 '%ss',
 '%fs',
 '%gs',
-'%flags',
 '%dr0',
 '%dr1',
 '%dr2',
@@ -47,6 +46,7 @@
 '%tr5',
 '%tr6',
 '%tr7',
+'%flags',
 '%st(0)',
 '%st(1)',
 '%st(2)',

+ 1 - 1
compiler/i386/r386con.inc

@@ -31,7 +31,6 @@ NR_ES = tregister($05000003);
 NR_SS = tregister($05000004);
 NR_FS = tregister($05000005);
 NR_GS = tregister($05000006);
-NR_FLAGS = tregister($05000007);
 NR_DR0 = tregister($05000007);
 NR_DR1 = tregister($05000008);
 NR_DR2 = tregister($05000009);
@@ -47,6 +46,7 @@ NR_TR4 = tregister($05000012);
 NR_TR5 = tregister($05000013);
 NR_TR6 = tregister($05000014);
 NR_TR7 = tregister($05000015);
+NR_FLAGS = tregister($05000016);
 NR_ST0 = tregister($02000000);
 NR_ST1 = tregister($02000001);
 NR_ST2 = tregister($02000002);

+ 1 - 1
compiler/i386/r386int.inc

@@ -31,7 +31,6 @@
 'ss',
 'fs',
 'gs',
-'flags',
 'dr0',
 'dr1',
 'dr2',
@@ -47,6 +46,7 @@
 'tr5',
 'tr6',
 'tr7',
+'flags',
 'st(0)',
 'st(1)',
 'st(2)',

+ 4 - 4
compiler/i386/r386iri.inc

@@ -9,21 +9,21 @@
 15,
 6,
 5,
+38,
 39,
 40,
 41,
-42,
 26,
 7,
 10,
 19,
 9,
+32,
 33,
 34,
 35,
 36,
 37,
-38,
 27,
 11,
 4,
@@ -36,7 +36,7 @@
 28,
 18,
 24,
-32,
+47,
 30,
 31,
 57,
@@ -59,11 +59,11 @@
 53,
 54,
 55,
+42,
 43,
 44,
 45,
 46,
-47,
 65,
 66,
 67,

+ 1 - 1
compiler/i386/r386nasm.inc

@@ -31,7 +31,6 @@
 'ss',
 'fs',
 'gs',
-'flags',
 'dr0',
 'dr1',
 'dr2',
@@ -47,6 +46,7 @@
 'tr5',
 'tr6',
 'tr7',
+'flags',
 'st0',
 'st1',
 'st2',

+ 4 - 4
compiler/i386/r386nri.inc

@@ -9,21 +9,21 @@
 15,
 6,
 5,
+38,
 39,
 40,
 41,
-42,
 26,
 7,
 10,
 19,
 9,
+32,
 33,
 34,
 35,
 36,
 37,
-38,
 27,
 11,
 4,
@@ -36,7 +36,7 @@
 28,
 18,
 24,
-32,
+47,
 30,
 31,
 57,
@@ -59,11 +59,11 @@
 53,
 54,
 55,
+42,
 43,
 44,
 45,
 46,
-47,
 65,
 66,
 67,

+ 1 - 1
compiler/i386/r386num.inc

@@ -32,7 +32,6 @@ tregister($05000004),
 tregister($05000005),
 tregister($05000006),
 tregister($05000007),
-tregister($05000007),
 tregister($05000008),
 tregister($05000009),
 tregister($0500000a),
@@ -47,6 +46,7 @@ tregister($05000012),
 tregister($05000013),
 tregister($05000014),
 tregister($05000015),
+tregister($05000016),
 tregister($02000000),
 tregister($02000001),
 tregister($02000002),

+ 1 - 1
compiler/i386/r386op.inc

@@ -32,7 +32,6 @@
 4,
 5,
 0,
-0,
 1,
 2,
 3,
@@ -48,6 +47,7 @@
 6,
 7,
 0,
+0,
 1,
 2,
 3,

+ 1 - 1
compiler/i386/r386ot.inc

@@ -31,7 +31,6 @@ OT_REG_DESS,
 OT_REG_DESS,
 OT_REG_FSGS,
 OT_REG_FSGS,
-OT_NONE,
 OT_REG_DREG,
 OT_REG_DREG,
 OT_REG_DREG,
@@ -47,6 +46,7 @@ OT_REG_TREG,
 OT_REG_TREG,
 OT_REG_TREG,
 OT_REG_TREG,
+OT_NONE,
 OT_FPU0,
 OT_FPUREG,
 OT_FPUREG,

+ 1 - 1
compiler/i386/r386rni.inc

@@ -63,8 +63,8 @@
 29,
 30,
 31,
-33,
 32,
+33,
 34,
 35,
 36,

+ 4 - 4
compiler/i386/r386sri.inc

@@ -9,21 +9,21 @@
 15,
 6,
 5,
+38,
 39,
 40,
 41,
-42,
 26,
 7,
 10,
 19,
 9,
+32,
 33,
 34,
 35,
 36,
 37,
-38,
 27,
 11,
 4,
@@ -36,7 +36,7 @@
 28,
 18,
 24,
-32,
+47,
 30,
 31,
 57,
@@ -59,11 +59,11 @@
 53,
 54,
 55,
+42,
 43,
 44,
 45,
 46,
-47,
 65,
 66,
 67,

+ 1 - 1
compiler/i386/r386std.inc

@@ -31,7 +31,6 @@
 'ss',
 'fs',
 'gs',
-'flags',
 'dr0',
 'dr1',
 'dr2',
@@ -47,6 +46,7 @@
 'tr5',
 'tr6',
 'tr7',
+'flags',
 'st(0)',
 'st(1)',
 'st(2)',

+ 275 - 59
compiler/i8086/cgcpu.pas

@@ -43,6 +43,15 @@ unit cgcpu;
 
         function getintregister(list:TAsmList;size:Tcgsize):Tregister;override;
 
+        procedure a_call_name(list : TAsmList;const s : string; weak: boolean);override;
+        procedure a_call_name_far(list : TAsmList;const s : string; weak: boolean);
+        procedure a_call_name_static(list : TAsmList;const s : string);override;
+        procedure a_call_name_static_far(list : TAsmList;const s : string);
+        procedure a_call_reg(list : TAsmList;reg : tregister);override;
+        procedure a_call_reg_far(list : TAsmList;reg : tregister);
+        procedure a_call_ref(list : TAsmList;ref : treference);override;
+        procedure a_call_ref_far(list : TAsmList;ref : treference);
+
         procedure a_op_const_reg(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; reg: TRegister); override;
         procedure a_op_const_ref(list : TAsmList; Op: TOpCG; size: TCGSize; a: tcgint; const ref: TReference); override;
         procedure a_op_reg_reg(list : TAsmList; Op: TOpCG; size: TCGSize; src, dst: TRegister); override;
@@ -98,7 +107,8 @@ unit cgcpu;
        globals,verbose,systems,cutils,
        paramgr,procinfo,fmodule,
        rgcpu,rgx86,cpuinfo,
-       symtype,symsym;
+       symtype,symsym,
+       tgobj;
 
     function use_push(const cgpara:tcgpara):boolean;
       begin
@@ -157,12 +167,109 @@ unit cgcpu;
       end;
 
 
+    procedure tcg8086.a_call_name(list: TAsmList; const s: string; weak: boolean);
+      begin
+        if current_settings.x86memorymodel in x86_far_code_models then
+          a_call_name_far(list,s,weak)
+        else
+          a_call_name_near(list,s,weak);
+      end;
+
+
+    procedure tcg8086.a_call_name_far(list: TAsmList; const s: string;
+      weak: boolean);
+      var
+        sym : tasmsymbol;
+        r : treference;
+      begin
+        if not(weak) then
+          sym:=current_asmdata.RefAsmSymbol(s)
+        else
+          sym:=current_asmdata.WeakRefAsmSymbol(s);
+        reference_reset_symbol(r,sym,0,sizeof(pint));
+        r.refaddr:=addr_far;
+        list.concat(taicpu.op_ref(A_CALL,S_NO,r));
+      end;
+
+
+    procedure tcg8086.a_call_name_static(list: TAsmList; const s: string);
+      begin
+        if current_settings.x86memorymodel in x86_far_code_models then
+          a_call_name_static_far(list,s)
+        else
+          a_call_name_static_near(list,s);
+      end;
+
+
+    procedure tcg8086.a_call_name_static_far(list: TAsmList; const s: string);
+      var
+        sym : tasmsymbol;
+        r : treference;
+      begin
+        sym:=current_asmdata.RefAsmSymbol(s);
+        reference_reset_symbol(r,sym,0,sizeof(pint));
+        r.refaddr:=addr_far;
+        list.concat(taicpu.op_ref(A_CALL,S_NO,r));
+      end;
+
+
+    procedure tcg8086.a_call_reg(list: TAsmList; reg: tregister);
+      begin
+        if current_settings.x86memorymodel in x86_far_code_models then
+          a_call_reg_far(list,reg)
+        else
+          a_call_reg_near(list,reg);
+      end;
+
+
+    procedure tcg8086.a_call_reg_far(list: TAsmList; reg: tregister);
+      var
+        href: treference;
+      begin
+        { unfortunately, x86 doesn't have a 'call far reg:reg' instruction, so }
+        { we have to use a temp }
+        tg.gettemp(list,4,2,tt_normal,href);
+        { HACK!!! at this point all registers are allocated, due to the fact that
+          in the pascal calling convention, all registers are caller saved. This
+          causes the register allocator to fail on the next move instruction, so we
+          temporarily deallocate 2 registers.
+          TODO: figure out a better way to do this. }
+        cg.ungetcpuregister(list,NR_BX);
+        cg.ungetcpuregister(list,NR_SI);
+        a_load_reg_ref(list,OS_32,OS_32,reg,href);
+        cg.getcpuregister(list,NR_BX);
+        cg.getcpuregister(list,NR_SI);
+        a_call_ref_far(list,href);
+        tg.ungettemp(list,href);
+      end;
+
+
+    procedure tcg8086.a_call_ref(list: TAsmList; ref: treference);
+      begin
+        if current_settings.x86memorymodel in x86_far_code_models then
+          a_call_ref_far(list,ref)
+        else
+          a_call_ref_near(list,ref);
+      end;
+
+
+    procedure tcg8086.a_call_ref_far(list: TAsmList; ref: treference);
+      begin
+        ref.refaddr:=addr_far_ref;
+        list.concat(taicpu.op_ref(A_CALL,S_NO,ref));
+      end;
+
+
     procedure tcg8086.a_op_const_reg(list: TAsmList; Op: TOpCG; size: TCGSize;
       a: tcgint; reg: TRegister);
       var
         tmpreg: tregister;
         op1, op2: TAsmOp;
         ax_subreg: tregister;
+        hl_loop_start: tasmlabel;
+        ai: taicpu;
+        use_loop: Boolean;
+        i: Integer;
       begin
         optimize_op_const(op, a);
         check_register_size(size,reg);
@@ -206,6 +313,103 @@ unit cgcpu;
                     list.concat(taicpu.op_const_reg(op2,S_W,aint(a shr 16),GetNextReg(reg)));
                   end;
                 end;
+              OP_SHR,OP_SHL,OP_SAR:
+                begin
+                  a:=a and 31;
+                  { for shl with const >= 16, we can just move the low register
+                    to the high reg, then zero the low register, then do the
+                    remaining part of the shift (by const-16) in 16 bit on the
+                    high register. the same thing applies to shr with low and high
+                    reversed. sar is exactly like shr, except that instead of
+                    zeroing the high register, we sar it by 15. }
+                  if a>=16 then
+                    case op of
+                      OP_SHR:
+                        begin
+                          a_load_reg_reg(list,OS_16,OS_16,GetNextReg(reg),reg);
+                          a_load_const_reg(list,OS_16,0,GetNextReg(reg));
+                          a_op_const_reg(list,OP_SHR,OS_16,a-16,reg);
+                        end;
+                      OP_SHL:
+                        begin
+                          a_load_reg_reg(list,OS_16,OS_16,reg,GetNextReg(reg));
+                          a_load_const_reg(list,OS_16,0,reg);
+                          a_op_const_reg(list,OP_SHL,OS_16,a-16,GetNextReg(reg));
+                        end;
+                      OP_SAR:
+                        begin
+                          a_load_reg_reg(list,OS_16,OS_16,GetNextReg(reg),reg);
+                          a_op_const_reg(list,OP_SAR,OS_16,15,GetNextReg(reg));
+                          a_op_const_reg(list,OP_SAR,OS_16,a-16,reg);
+                        end;
+                      else
+                        internalerror(2013060201);
+                    end
+                  else if a<>0 then
+                    begin
+                      use_loop:=a>2;
+
+                      if use_loop then
+                        begin
+                          getcpuregister(list,NR_CX);
+                          a_load_const_reg(list,OS_16,a,NR_CX);
+
+                          current_asmdata.getjumplabel(hl_loop_start);
+                          a_label(list,hl_loop_start);
+
+                          case op of
+                            OP_SHR:
+                              begin
+                                list.concat(taicpu.op_const_reg(A_SHR,S_W,1,GetNextReg(reg)));
+                                list.concat(taicpu.op_const_reg(A_RCR,S_W,1,reg));
+                              end;
+                            OP_SAR:
+                              begin
+                                list.concat(taicpu.op_const_reg(A_SAR,S_W,1,GetNextReg(reg)));
+                                list.concat(taicpu.op_const_reg(A_RCR,S_W,1,reg));
+                              end;
+                            OP_SHL:
+                              begin
+                                list.concat(taicpu.op_const_reg(A_SHL,S_W,1,reg));
+                                list.concat(taicpu.op_const_reg(A_RCL,S_W,1,GetNextReg(reg)));
+                              end;
+                            else
+                              internalerror(2013030903);
+                          end;
+
+                          ai:=Taicpu.Op_Sym(A_LOOP,S_W,hl_loop_start);
+                          ai.is_jmp:=true;
+                          list.concat(ai);
+
+                          ungetcpuregister(list,NR_CX);
+                        end
+                      else
+                        begin
+                          for i:=1 to a do
+                            begin
+                              case op of
+                                OP_SHR:
+                                  begin
+                                    list.concat(taicpu.op_const_reg(A_SHR,S_W,1,GetNextReg(reg)));
+                                    list.concat(taicpu.op_const_reg(A_RCR,S_W,1,reg));
+                                  end;
+                                OP_SAR:
+                                  begin
+                                    list.concat(taicpu.op_const_reg(A_SAR,S_W,1,GetNextReg(reg)));
+                                    list.concat(taicpu.op_const_reg(A_RCR,S_W,1,reg));
+                                  end;
+                                OP_SHL:
+                                  begin
+                                    list.concat(taicpu.op_const_reg(A_SHL,S_W,1,reg));
+                                    list.concat(taicpu.op_const_reg(A_RCL,S_W,1,GetNextReg(reg)));
+                                  end;
+                                else
+                                  internalerror(2013030903);
+                              end;
+                            end;
+                        end;
+                    end;
+                end;
               else
                 begin
                   tmpreg:=getintregister(list,size);
@@ -643,7 +847,7 @@ unit cgcpu;
           { for go32v2 we obtain OS_F32,
             but pushs is not valid, we need pushl }
           if opsize=S_FS then
-            opsize:=S_L;
+            opsize:=S_W;
           if tcgsize2size[paraloc^.size]<cgpara.alignment then
             begin
               tmpreg:=getintregister(list,pushsize);
@@ -789,19 +993,54 @@ unit cgcpu;
             else
               internalerror(2013030310);
           OS_16,OS_S16:
-            if fromsize in [OS_16,OS_S16] then
-              list.concat(taicpu.op_reg_ref(A_MOV, S_W, reg, tmpref))
-            else
-              internalerror(2013030312);
+            case fromsize of
+              OS_8:
+                begin
+                  reg := makeregsize(list, reg, OS_16);
+                  setsubreg(reg, R_SUBH);
+                  list.concat(taicpu.op_const_reg(A_MOV, S_B, 0, reg));
+                  setsubreg(reg, R_SUBW);
+                  list.concat(taicpu.op_reg_ref(A_MOV, S_W, reg, tmpref));
+                end;
+              OS_S8: internalerror(2013052503);  { TODO }
+              OS_16,OS_S16:
+                begin
+                  list.concat(taicpu.op_reg_ref(A_MOV, S_W, reg, tmpref));
+                end;
+              else
+                internalerror(2013030312);
+            end;
           OS_32,OS_S32:
-            if fromsize in [OS_32,OS_S32] then
-              begin
-                list.concat(taicpu.op_reg_ref(A_MOV, S_W, reg, tmpref));
-                inc(tmpref.offset, 2);
-                list.concat(taicpu.op_reg_ref(A_MOV, S_W, GetNextReg(reg), tmpref));
-              end
-            else
-              internalerror(2013030313);
+            case fromsize of
+              OS_8:
+                begin
+                  reg := makeregsize(list, reg, OS_16);
+                  setsubreg(reg, R_SUBH);
+                  list.concat(taicpu.op_const_reg(A_MOV, S_B, 0, reg));
+                  setsubreg(reg, R_SUBW);
+                  list.concat(taicpu.op_reg_ref(A_MOV, S_W, reg, tmpref));
+                  inc(tmpref.offset, 2);
+                  list.concat(taicpu.op_const_ref(A_MOV, S_W, 0, tmpref));
+                end;
+              OS_S8:
+                internalerror(2013052501);  { TODO }
+              OS_16:
+                begin
+                  list.concat(taicpu.op_reg_ref(A_MOV, S_W, reg, tmpref));
+                  inc(tmpref.offset, 2);
+                  list.concat(taicpu.op_const_ref(A_MOV, S_W, 0, tmpref));
+                end;
+              OS_S16:
+                internalerror(2013052502);  { TODO }
+              OS_32,OS_S32:
+                begin
+                  list.concat(taicpu.op_reg_ref(A_MOV, S_W, reg, tmpref));
+                  inc(tmpref.offset, 2);
+                  list.concat(taicpu.op_reg_ref(A_MOV, S_W, GetNextReg(reg), tmpref));
+                end;
+              else
+                internalerror(2013030313);
+            end;
           else
             internalerror(2013030311);
         end;
@@ -976,13 +1215,13 @@ unit cgcpu;
                   OS_S8:
                     begin
                       getcpuregister(list, NR_AX);
-                      getcpuregister(list, NR_DX);
                       add_mov(taicpu.op_reg_reg(A_MOV, S_B, reg1, NR_AL));
+                      getcpuregister(list, NR_DX);
                       list.concat(taicpu.op_none(A_CBW));
                       list.concat(taicpu.op_none(A_CWD));
                       add_mov(taicpu.op_reg_reg(A_MOV, S_W, NR_AX, reg2));
-                      add_mov(taicpu.op_reg_reg(A_MOV, S_W, NR_DX, GetNextReg(reg2)));
                       ungetcpuregister(list, NR_AX);
+                      add_mov(taicpu.op_reg_reg(A_MOV, S_W, NR_DX, GetNextReg(reg2)));
                       ungetcpuregister(list, NR_DX);
                     end;
                   OS_16:
@@ -993,12 +1232,12 @@ unit cgcpu;
                   OS_S16:
                     begin
                       getcpuregister(list, NR_AX);
-                      getcpuregister(list, NR_DX);
                       add_mov(taicpu.op_reg_reg(A_MOV, S_W, reg1, NR_AX));
+                      getcpuregister(list, NR_DX);
                       list.concat(taicpu.op_none(A_CWD));
                       add_mov(taicpu.op_reg_reg(A_MOV, S_W, NR_AX, reg2));
-                      add_mov(taicpu.op_reg_reg(A_MOV, S_W, NR_DX, GetNextReg(reg2)));
                       ungetcpuregister(list, NR_AX);
+                      add_mov(taicpu.op_reg_reg(A_MOV, S_W, NR_DX, GetNextReg(reg2)));
                       ungetcpuregister(list, NR_DX);
                     end;
                   OS_32,OS_S32:
@@ -1060,7 +1299,12 @@ unit cgcpu;
     procedure tcg8086.g_proc_exit(list : TAsmList;parasize:longint;nostackframe:boolean);
       var
         stacksize : longint;
+        ret_instr: TAsmOp;
       begin
+        if po_far in current_procinfo.procdef.procoptions then
+          ret_instr:=A_RETF
+        else
+          ret_instr:=A_RET;
         { MMX needs to call EMMS }
         if assigned(rg[R_MMXREGISTER]) and
            (rg[R_MMXREGISTER].uses_registers) then
@@ -1095,45 +1339,17 @@ unit cgcpu;
             list.concat(tai_regalloc.dealloc(current_procinfo.framepointer,nil));
           end;
 
-        { return from proc }
-        if (po_interrupt in current_procinfo.procdef.procoptions) and
-           { this messes up stack alignment }
-           (target_info.stackalign=4) then
+        { return from interrupt }
+        if po_interrupt in current_procinfo.procdef.procoptions then
           begin
-            if assigned(current_procinfo.procdef.funcretloc[calleeside].location) and
-               (current_procinfo.procdef.funcretloc[calleeside].location^.loc=LOC_REGISTER) then
-              begin
-                if (getsupreg(current_procinfo.procdef.funcretloc[calleeside].location^.register)=RS_EAX) then
-                  list.concat(Taicpu.Op_const_reg(A_ADD,S_L,4,NR_ESP))
-                else
-                  internalerror(2010053001);
-              end
-            else
-              list.concat(Taicpu.Op_reg(A_POP,S_L,NR_EAX));
-            list.concat(Taicpu.Op_reg(A_POP,S_L,NR_EBX));
-            list.concat(Taicpu.Op_reg(A_POP,S_L,NR_ECX));
-
-            if (current_procinfo.procdef.funcretloc[calleeside].size in [OS_64,OS_S64]) and
-               assigned(current_procinfo.procdef.funcretloc[calleeside].location) and
-               assigned(current_procinfo.procdef.funcretloc[calleeside].location^.next) and
-               (current_procinfo.procdef.funcretloc[calleeside].location^.next^.loc=LOC_REGISTER) then
-              begin
-                if (getsupreg(current_procinfo.procdef.funcretloc[calleeside].location^.next^.register)=RS_EDX) then
-                  list.concat(Taicpu.Op_const_reg(A_ADD,S_L,4,NR_ESP))
-                else
-                  internalerror(2010053002);
-              end
-            else
-              list.concat(Taicpu.Op_reg(A_POP,S_L,NR_EDX));
-
-            list.concat(Taicpu.Op_reg(A_POP,S_L,NR_ESI));
-            list.concat(Taicpu.Op_reg(A_POP,S_L,NR_EDI));
-            { .... also the segment registers }
-            list.concat(Taicpu.Op_reg(A_POP,S_W,NR_DS));
             list.concat(Taicpu.Op_reg(A_POP,S_W,NR_ES));
-            list.concat(Taicpu.Op_reg(A_POP,S_W,NR_FS));
-            list.concat(Taicpu.Op_reg(A_POP,S_W,NR_GS));
-            { this restores the flags }
+            list.concat(Taicpu.Op_reg(A_POP,S_W,NR_DS));
+            list.concat(Taicpu.Op_reg(A_POP,S_W,NR_DI));
+            list.concat(Taicpu.Op_reg(A_POP,S_W,NR_SI));
+            list.concat(Taicpu.Op_reg(A_POP,S_W,NR_DX));
+            list.concat(Taicpu.Op_reg(A_POP,S_W,NR_CX));
+            list.concat(Taicpu.Op_reg(A_POP,S_W,NR_BX));
+            list.concat(Taicpu.Op_reg(A_POP,S_W,NR_AX));
             list.concat(Taicpu.Op_none(A_IRET,S_NO));
           end
         { Routines with the poclearstack flag set use only a ret }
@@ -1149,19 +1365,19 @@ unit cgcpu;
                (tf_safecall_exceptions in target_info.flags)) and
               paramanager.ret_in_param(current_procinfo.procdef.returndef,
                                        current_procinfo.procdef) then
-             list.concat(Taicpu.Op_const(A_RET,S_W,sizeof(aint)))
+             list.concat(Taicpu.Op_const(ret_instr,S_W,sizeof(aint)))
            else
-             list.concat(Taicpu.Op_none(A_RET,S_NO));
+             list.concat(Taicpu.Op_none(ret_instr,S_NO));
          end
         { ... also routines with parasize=0 }
         else if (parasize=0) then
-         list.concat(Taicpu.Op_none(A_RET,S_NO))
+         list.concat(Taicpu.Op_none(ret_instr,S_NO))
         else
          begin
            { parameters are limited to 65535 bytes because ret allows only imm16 }
            if (parasize>65535) then
              CGMessage(cg_e_parasize_too_big);
-           list.concat(Taicpu.Op_const(A_RET,S_W,parasize));
+           list.concat(Taicpu.Op_const(ret_instr,S_W,parasize));
          end;
       end;
 

+ 13 - 4
compiler/i8086/cpuinfo.pas

@@ -62,7 +62,8 @@ Type
       fpu_ssse3,
       fpu_sse41,
       fpu_sse42,
-      fpu_avx
+      fpu_avx,
+      fpu_avx2
      );
 
 
@@ -102,11 +103,14 @@ Const
      'SSSE3',
      'SSE41',
      'SSE42',
-     'AVX'
+     'AVX',
+     'AVX2'
    );
 
-   sse_singlescalar : set of tfputype = [fpu_sse,fpu_sse2,fpu_sse3];
-   sse_doublescalar : set of tfputype = [fpu_sse2,fpu_sse3];
+   sse_singlescalar : set of tfputype = [fpu_sse..fpu_avx2];
+   sse_doublescalar : set of tfputype = [fpu_sse2..fpu_avx2];
+
+   fpu_avx_instructionsets = [fpu_avx,fpu_avx2];
 
    { Supported optimizations, only used for information }
    supported_optimizerswitches = genericlevel1optimizerswitches+
@@ -125,6 +129,11 @@ Const
    level3optimizerswitches = genericlevel3optimizerswitches + level2optimizerswitches + [{,cs_opt_loopunroll}];
    level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [cs_useebp];
 
+   x86_near_code_models = [mm_tiny,mm_small,mm_compact];
+   x86_far_code_models = [mm_medium,mm_large,mm_huge];
+   x86_near_data_models = [mm_tiny,mm_small,mm_medium];
+   x86_far_data_models = [mm_compact,mm_large,mm_huge];
+
 Implementation
 
 end.

+ 5 - 6
compiler/i8086/cpunode.pas

@@ -46,16 +46,15 @@ unit cpunode;
          after the generic one (FK)
        }
        nx86set,
-       nx86con,
-       nx86cnv,
-       nx86mem,
 
        n8086add,
-       n8086cal{,
-       n386mem,
+       n8086cal,
+       n8086cnv,
+       n8086mem{,
        n386set},
        n8086inl,
-       n8086mat
+       n8086mat,
+       n8086con
        ;
 
 end.

+ 72 - 20
compiler/i8086/cpupara.pas

@@ -231,18 +231,23 @@ unit cpupara;
     procedure ti8086paramanager.getintparaloc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);
       var
         paraloc : pcgparalocation;
-        def : tdef;
+        psym: tparavarsym;
+        pdef : tdef;
       begin
-        def:=tparavarsym(pd.paras[nr-1]).vardef;
+        psym:=tparavarsym(pd.paras[nr-1]);
+        pdef:=psym.vardef;
+        if push_addr_param(psym.varspez,pdef,pd.proccalloption) then
+          pdef:=getpointerdef(pdef);
         cgpara.reset;
-        cgpara.size:=def_cgsize(def);
+        cgpara.size:=def_cgsize(pdef);
         cgpara.intsize:=tcgsize2size[cgpara.size];
         cgpara.alignment:=get_para_align(pd.proccalloption);
-        cgpara.def:=def;
+        cgpara.def:=pdef;
         paraloc:=cgpara.add_location;
         with paraloc^ do
          begin
-           size:=OS_INT;
+           size:=def_cgsize(pdef);
+           def:=pdef;
            if pd.proccalloption=pocall_register then
              begin
                if (nr<=length(parasupregs)) then
@@ -321,6 +326,7 @@ unit cpupara;
             paraloc^.loc:=LOC_FPUREGISTER;
             paraloc^.register:=NR_FPU_RESULT_REG;
             paraloc^.size:=retcgsize;
+            paraloc^.def:=result.def;
           end
         else
          { Return in register }
@@ -335,6 +341,7 @@ unit cpupara;
                else
                  paraloc^.register:=NR_FUNCTION_RETURN64_LOW_REG;
                paraloc^.size:=OS_16;
+               paraloc^.def:=u16inttype;
 
                { bits 16..31 }
                paraloc:=result.add_location;
@@ -344,6 +351,7 @@ unit cpupara;
                else
                  paraloc^.register:=NR_FUNCTION_RETURN64_HIGH_REG;
                paraloc^.size:=OS_16;
+               paraloc^.def:=u16inttype;
 
                { bits 32..47 }
                paraloc:=result.add_location;
@@ -353,6 +361,7 @@ unit cpupara;
                else
                  paraloc^.register:=NR_FUNCTION_RETURN64_HIGHER_REG;
                paraloc^.size:=OS_16;
+               paraloc^.def:=u16inttype;
 
                { bits 48..63 }
                paraloc:=result.add_location;
@@ -362,6 +371,7 @@ unit cpupara;
                else
                  paraloc^.register:=NR_FUNCTION_RETURN64_HIGHEST_REG;
                paraloc^.size:=OS_16;
+               paraloc^.def:=u16inttype;
              end
             else if retcgsize in [OS_32,OS_S32] then
              begin
@@ -371,6 +381,7 @@ unit cpupara;
                else
                  paraloc^.register:=NR_FUNCTION_RETURN32_LOW_REG;
                paraloc^.size:=OS_16;
+               paraloc^.def:=u16inttype;
 
                { high 16bits }
                paraloc:=result.add_location;
@@ -380,10 +391,12 @@ unit cpupara;
                else
                  paraloc^.register:=NR_FUNCTION_RETURN32_HIGH_REG;
                paraloc^.size:=OS_16;
+               paraloc^.def:=u16inttype;
              end
             else
              begin
                paraloc^.size:=retcgsize;
+               paraloc^.def:=result.def;
                if side=callerside then
                  paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,cgsize2subreg(R_INTREGISTER,retcgsize))
                else
@@ -403,8 +416,9 @@ unit cpupara;
         paralen,
         varalign   : longint;
         paraalign  : shortint;
-        pushaddr   : boolean;
         paracgsize : tcgsize;
+        firstparaloc,
+        pushaddr   : boolean;
       begin
         paraalign:=get_para_align(p.proccalloption);
         { we push Flags and CS as long
@@ -466,6 +480,7 @@ unit cpupara;
                 paraloc:=hp.paraloc[side].add_location;
                 paraloc^.loc:=LOC_REFERENCE;
                 paraloc^.size:=paracgsize;
+                paraloc^.def:=paradef;
                 if side=callerside then
                   paraloc^.reference.index:=NR_STACK_POINTER_REG
                 else
@@ -480,13 +495,18 @@ unit cpupara;
                   paralen:=16;
                 paraloc^.reference.offset:=parasize;
                 if side=calleeside then
-                  inc(paraloc^.reference.offset,target_info.first_parm_offset);
+                  begin
+                    inc(paraloc^.reference.offset,target_info.first_parm_offset);
+                    if po_far in p.procoptions then
+                      inc(paraloc^.reference.offset,2);
+                  end;
                 parasize:=align(parasize+paralen,varalign);
               end
             else
               begin
                 if paralen=0 then
                   internalerror(200501163);
+                firstparaloc:=true;
                 while (paralen>0) do
                   begin
                     paraloc:=hp.paraloc[side].add_location;
@@ -495,15 +515,22 @@ unit cpupara;
                     if (paracgsize in [OS_F64,OS_F32]) then
                       begin
                         paraloc^.size:=paracgsize;
+                        paraloc^.def:=paradef;
                         l:=paralen;
                       end
                     else
                       begin
-                        { We can allocate at maximum 32 bits per location }
-                        if paralen>sizeof(aint) then
-                          l:=sizeof(aint)
+                        { We can allocate at maximum 16 bits per location }
+                        if paralen>=sizeof(aint) then
+                          begin
+                            l:=sizeof(aint);
+                            paraloc^.def:=uinttype;
+                          end
                         else
-                          l:=paralen;
+                          begin
+                            l:=paralen;
+                            paraloc^.def:=get_paraloc_def(paradef,l,firstparaloc);
+                          end;
                         paraloc^.size:=int_cgsize(l);
                       end;
                     if (side=callerside) or
@@ -514,13 +541,18 @@ unit cpupara;
                     varalign:=used_align(size_2_align(l),paraalign,paraalign);
                     paraloc^.reference.offset:=parasize;
                     if side=calleeside then
-                      if not(po_nostackframe in p.procoptions) then
-                        inc(paraloc^.reference.offset,target_info.first_parm_offset)
-                      else
-                        { return addres }
-                        inc(paraloc^.reference.offset,4);
+                      begin
+                        if not(po_nostackframe in p.procoptions) then
+                          inc(paraloc^.reference.offset,target_info.first_parm_offset)
+                        else
+                          { return addres }
+                          inc(paraloc^.reference.offset,2);
+                        if po_far in p.procoptions then
+                          inc(paraloc^.reference.offset,2);
+                      end;
                     parasize:=align(parasize+l,varalign);
                     dec(paralen,l);
+                    firstparaloc:=false;
                   end;
               end;
             if p.proccalloption in pushleftright_pocalls then
@@ -545,6 +577,7 @@ unit cpupara;
         pushaddr : boolean;
         paraalign : shortint;
         pass : byte;
+        firstparaloc : boolean;
       begin
         if paras.count=0 then
           exit;
@@ -611,6 +644,7 @@ unit cpupara;
                           begin
                             paraloc:=hp.paraloc[side].add_location;
                             paraloc^.size:=paracgsize;
+                            paraloc^.def:=paradef;
                             paraloc^.loc:=LOC_REGISTER;
                             paraloc^.register:=newreg(R_INTREGISTER,parasupregs[parareg],cgsize2subreg(R_INTREGISTER,paracgsize));
                             inc(parareg);
@@ -626,6 +660,7 @@ unit cpupara;
                               paraloc:=hp.paraloc[side].add_location;
                               paraloc^.loc:=LOC_REFERENCE;
                               paraloc^.size:=paracgsize;
+                              paraloc^.def:=paradef;
                               if side=callerside then
                                 paraloc^.reference.index:=NR_STACK_POINTER_REG
                               else
@@ -633,13 +668,18 @@ unit cpupara;
                               varalign:=used_align(size_2_align(paralen),paraalign,paraalign);
                               paraloc^.reference.offset:=parasize;
                               if side=calleeside then
-                                inc(paraloc^.reference.offset,target_info.first_parm_offset);
+                                begin
+                                  inc(paraloc^.reference.offset,target_info.first_parm_offset);
+                                  if po_far in p.procoptions then
+                                    inc(paraloc^.reference.offset,2);
+                                end;
                               parasize:=align(parasize+paralen,varalign);
                             end
                           else
                             begin
                               if paralen=0 then
                                 internalerror(200501163);
+                              firstparaloc:=true;
                               while (paralen>0) do
                                 begin
                                   paraloc:=hp.paraloc[side].add_location;
@@ -648,15 +688,22 @@ unit cpupara;
                                   if (paracgsize in [OS_F64,OS_F32]) then
                                     begin
                                       paraloc^.size:=paracgsize;
+                                      paraloc^.def:=paradef;
                                       l:=paralen;
                                     end
                                   else
                                     begin
                                       { We can allocate at maximum 32 bits per location }
                                       if paralen>sizeof(aint) then
-                                        l:=sizeof(aint)
+                                        begin
+                                          l:=sizeof(aint);
+                                          paraloc^.def:=uinttype;
+                                        end
                                       else
-                                        l:=paralen;
+                                        begin
+                                          l:=paralen;
+                                          paraloc^.def:=get_paraloc_def(paradef,l,firstparaloc);
+                                        end;
                                       paraloc^.size:=int_cgsize(l);
                                     end;
                                   if side=callerside then
@@ -666,9 +713,14 @@ unit cpupara;
                                   varalign:=used_align(size_2_align(l),paraalign,paraalign);
                                   paraloc^.reference.offset:=parasize;
                                   if side=calleeside then
-                                    inc(paraloc^.reference.offset,target_info.first_parm_offset);
+                                    begin
+                                      inc(paraloc^.reference.offset,target_info.first_parm_offset);
+                                      if po_far in p.procoptions then
+                                        inc(paraloc^.reference.offset,2);
+                                    end;
                                   parasize:=align(parasize+l,varalign);
                                   dec(paralen,l);
+                                  firstparaloc:=false;
                                 end;
                             end;
                         end;

+ 10 - 11
compiler/i8086/n8086add.pas

@@ -84,7 +84,6 @@ interface
       var
         op         : TOpCG;
         op1,op2    : TAsmOp;
-        opsize     : TOpSize;
         hregister,
         hregister2 : tregister;
         hl4        : tasmlabel;
@@ -97,7 +96,6 @@ interface
         op1:=A_NONE;
         op2:=A_NONE;
         mboverflow:=false;
-        opsize:=S_L;
         unsigned:=((left.resultdef.typ=orddef) and
                    (torddef(left.resultdef).ordtype=u64bit)) or
                   ((right.resultdef.typ=orddef) and
@@ -174,16 +172,16 @@ interface
             begin
               r:=cg.getintregister(current_asmdata.CurrAsmList,OS_32);
               cg64.a_load64low_loc_reg(current_asmdata.CurrAsmList,right.location,r);
-              emit_reg_reg(op1,opsize,left.location.register64.reglo,r);
-              emit_reg_reg(op2,opsize,GetNextReg(left.location.register64.reglo),GetNextReg(r));
-              emit_reg_reg(A_MOV,opsize,r,left.location.register64.reglo);
-              emit_reg_reg(A_MOV,opsize,GetNextReg(r),GetNextReg(left.location.register64.reglo));
+              emit_reg_reg(op1,S_W,left.location.register64.reglo,r);
+              emit_reg_reg(op2,S_W,GetNextReg(left.location.register64.reglo),GetNextReg(r));
+              emit_reg_reg(A_MOV,S_W,r,left.location.register64.reglo);
+              emit_reg_reg(A_MOV,S_W,GetNextReg(r),GetNextReg(left.location.register64.reglo));
               cg64.a_load64high_loc_reg(current_asmdata.CurrAsmList,right.location,r);
               { the carry flag is still ok }
-              emit_reg_reg(op2,opsize,left.location.register64.reghi,r);
-              emit_reg_reg(op2,opsize,GetNextReg(left.location.register64.reghi),GetNextReg(r));
-              emit_reg_reg(A_MOV,opsize,r,left.location.register64.reghi);
-              emit_reg_reg(A_MOV,opsize,GetNextReg(r),GetNextReg(left.location.register64.reghi));
+              emit_reg_reg(op2,S_W,left.location.register64.reghi,r);
+              emit_reg_reg(op2,S_W,GetNextReg(left.location.register64.reghi),GetNextReg(r));
+              emit_reg_reg(A_MOV,S_W,r,left.location.register64.reghi);
+              emit_reg_reg(A_MOV,S_W,GetNextReg(r),GetNextReg(left.location.register64.reghi));
             end
            else
             begin
@@ -561,7 +559,8 @@ interface
                  inc(href.offset,2);
                  emit_ref_reg(A_CMP,S_W,href,GetNextReg(left.location.register));
                  firstjmp32bitcmp;
-                 emit_ref_reg(A_CMP,S_W,right.location.reference,left.location.register);
+                 dec(href.offset,2);
+                 emit_ref_reg(A_CMP,S_W,href,left.location.register);
                  secondjmp32bitcmp;
                  cg.a_jmp_always(current_asmdata.CurrAsmList,current_procinfo.CurrFalseLabel);
                  location_freetemp(current_asmdata.CurrAsmList,right.location);

+ 136 - 0
compiler/i8086/n8086cnv.pas

@@ -0,0 +1,136 @@
+{
+    Copyright (c) 1998-2002 by Florian Klaempfl
+
+    Generate code for i8086 assembler for type converting nodes
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit n8086cnv;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+      node,ncgcnv,nx86cnv,defutil,defcmp;
+
+    type
+       t8086typeconvnode = class(tx86typeconvnode)
+       protected
+         procedure second_proc_to_procvar;override;
+       end;
+
+
+implementation
+
+   uses
+      verbose,systems,globals,globtype,
+      aasmbase,aasmtai,aasmdata,aasmcpu,
+      symconst,symdef,
+      cgbase,cga,procinfo,pass_1,pass_2,
+      ncon,ncal,ncnv,
+      cpubase,cpuinfo,
+      cgutils,cgobj,hlcgobj,cgx86,ncgutil,
+      tgobj;
+
+
+    procedure t8086typeconvnode.second_proc_to_procvar;
+      var
+        tmpreg: tregister;
+        tmpref: treference;
+      begin
+        if not (po_far in tabstractprocdef(resultdef).procoptions) then
+          begin
+            inherited;
+            exit;
+          end;
+
+        if tabstractprocdef(resultdef).is_addressonly then
+          begin
+            location_reset(location,LOC_REGISTER,OS_32);
+            { only a code pointer? (when taking the address of classtype.method
+              we also only get a code pointer even though the resultdef is a
+              procedure of object, and hence is_addressonly would return false)
+             }
+  	    if left.location.size = OS_32 then
+              begin
+                case left.location.loc of
+                  LOC_REFERENCE,LOC_CREFERENCE:
+                    begin
+                      { the procedure symbol is encoded in reference.symbol -> take address }
+                      location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_32);
+                      cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.location.reference,location.register);
+                      tmpref:=left.location.reference;
+                      tmpref.refaddr:=addr_seg;
+                      cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_16,OS_16,tmpref,GetNextReg(location.register));
+                    end;
+                  else
+                    internalerror(2013031501)
+                end;
+              end
+            else
+              begin
+                { conversion from a procedure of object/nested procvar to plain procvar }
+                case left.location.loc of
+                  LOC_REFERENCE,LOC_CREFERENCE:
+                    begin
+                      location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_32);
+                      { code field is the first one }
+                      cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_32,OS_32,left.location.reference,location.register);
+                    end;
+                  LOC_REGISTER,LOC_CREGISTER:
+                    begin
+                      if target_info.endian=endian_little then
+                        location.register:=left.location.register
+                      else
+                        location.register:=left.location.registerhi;
+                    end;
+                  else
+                    internalerror(2013031502)
+                end;
+              end;
+          end
+        else
+          begin
+            { TODO: update for far procs }
+            if not tabstractprocdef(left.resultdef).is_addressonly then
+              location_copy(location,left.location)
+            else
+              begin
+                { assigning a global function to a nested procvar -> create
+                  tmethodpointer record and set the "frame pointer" to nil }
+                if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
+                  internalerror(2013031503);
+                location_reset_ref(location,LOC_REFERENCE,int_cgsize(sizeof(pint)*2),sizeof(pint));
+                tg.gethltemp(current_asmdata.CurrAsmList,resultdef,resultdef.size,tt_normal,location.reference);
+                tmpreg:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.location.reference,tmpreg);
+                cg.a_load_reg_ref(current_asmdata.CurrAsmList,OS_ADDR,OS_ADDR,tmpreg,location.reference);
+                { setting the frame pointer to nil is not strictly necessary
+                  since the global procedure won't use it, but it can help with
+                  debugging }
+                inc(location.reference.offset,sizeof(pint));
+                cg.a_load_const_ref(current_asmdata.CurrAsmList,OS_ADDR,0,location.reference);
+                dec(location.reference.offset,sizeof(pint));
+              end;
+          end;
+      end;
+
+
+begin
+  ctypeconvnode:=t8086typeconvnode
+end.

+ 71 - 0
compiler/i8086/n8086con.pas

@@ -0,0 +1,71 @@
+{
+    Copyright (c) 1998-2012 by Florian Klaempfl and others
+
+    Generate i8086 assembler for constants
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit n8086con;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+       node,ncon,ncgcon,nx86con;
+
+    type
+
+      { tcgpointerconstnode }
+
+      ti8086pointerconstnode = class(tcgpointerconstnode)
+        procedure pass_generate_code;override;
+      end;
+
+implementation
+
+    uses
+      systems,globals,globtype,
+      symconst,symdef,
+      defutil,
+      cpubase,
+      cga,cgx86,cgobj,cgbase,cgutils;
+
+    {*****************************************************************************
+                               T8086POINTERCONSTNODE
+    *****************************************************************************}
+
+    procedure ti8086pointerconstnode.pass_generate_code;
+      begin
+        { far pointer? }
+        if (typedef.typ=pointerdef) and (tpointerdef(typedef).x86pointertyp in [x86pt_far,x86pt_huge]) then
+          begin
+            location_reset(location,LOC_CONSTANT,OS_32);
+            location.value:=longint(value);
+          end
+        else
+          begin
+            { an integer const. behaves as a memory reference }
+            location_reset(location,LOC_CONSTANT,OS_ADDR);
+            location.value:=aint(value);
+          end;
+      end;
+
+
+begin
+  cpointerconstnode:=ti8086pointerconstnode;
+end.

+ 132 - 0
compiler/i8086/n8086mem.pas

@@ -0,0 +1,132 @@
+{
+    Copyright (c) 1998-2002 by Florian Klaempfl
+
+    Generate i8086 assembler for in memory related nodes
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ ****************************************************************************
+}
+unit n8086mem;
+
+{$i fpcdefs.inc}
+
+interface
+
+    uses
+      globtype,
+      cgbase,cpuinfo,cpubase,
+      node,nmem,ncgmem,nx86mem;
+
+    type
+       ti8086derefnode = class(tx86derefnode)
+         procedure pass_generate_code;override;
+       end;
+
+implementation
+
+    uses
+      systems,globals,
+      cutils,verbose,
+      symbase,symconst,symdef,symtable,symtype,symsym,
+      parabase,paramgr,
+      aasmtai,aasmdata,
+      nld,ncon,nadd,
+      cgutils,cgobj,
+      defutil,hlcgobj,
+      pass_2,ncgutil;
+
+{*****************************************************************************
+                             TI8086DEREFNODE
+*****************************************************************************}
+
+    procedure ti8086derefnode.pass_generate_code;
+      var
+        paraloc1 : tcgpara;
+        pd : tprocdef;
+        sym : tsym;
+        st : tsymtable;
+        tmpref: treference;
+      begin
+        if tpointerdef(left.resultdef).x86pointertyp in [x86pt_far,x86pt_huge] then
+          begin
+            secondpass(left);
+            { assume natural alignment, except for packed records }
+            if not(resultdef.typ in [recorddef,objectdef]) or
+               (tabstractrecordsymtable(tabstractrecorddef(resultdef).symtable).usefieldalignment<>1) then
+              location_reset_ref(location,LOC_REFERENCE,def_cgsize(resultdef),resultdef.alignment)
+            else
+              location_reset_ref(location,LOC_REFERENCE,def_cgsize(resultdef),1);
+            if not(left.location.loc in [LOC_CREGISTER,LOC_REGISTER,LOC_CREFERENCE,LOC_REFERENCE,LOC_CONSTANT]) then
+              hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
+            case left.location.loc of
+               LOC_CREGISTER,
+               LOC_REGISTER:
+                 begin
+                   maybechangeloadnodereg(current_asmdata.CurrAsmList,left,true);
+                   location.reference.base := left.location.register;
+                   location.reference.segment := GetNextReg(left.location.register);
+                 end;
+               LOC_CREFERENCE,
+               LOC_REFERENCE:
+                 begin
+                    location.reference.base:=cg.getaddressregister(current_asmdata.CurrAsmList);
+                    cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_16,OS_16,left.location.reference,location.reference.base);
+                    location.reference.segment:=cg.getintregister(current_asmdata.CurrAsmList,OS_16);
+                    tmpref:=left.location.reference;
+                    inc(tmpref.offset,2);
+                    cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_16,OS_16,tmpref,location.reference.segment);
+                 end;
+               LOC_CONSTANT:
+                 begin
+                   location.reference.offset:=left.location.value and $FFFF;
+                   location.reference.segment:=cg.getintregister(current_asmdata.CurrAsmList,OS_16);
+                   cg.a_load_const_reg(current_asmdata.CurrAsmList,OS_16,(left.location.value shr 16) and $FFFF,location.reference.segment);
+                 end;
+               else
+                 internalerror(200507031);
+            end;
+            if (cs_use_heaptrc in current_settings.globalswitches) and
+               (cs_checkpointer in current_settings.localswitches) and
+               not(cs_compilesystem in current_settings.moduleswitches) and
+   {$ifdef x86}
+               (tpointerdef(left.resultdef).x86pointertyp = default_x86_data_pointer_type) and
+   {$endif x86}
+               not(nf_no_checkpointer in flags) and
+               { can be NR_NO in case of LOC_CONSTANT }
+               (location.reference.base<>NR_NO) then
+             begin
+               if not searchsym_in_named_module('HEAPTRC','CHECKPOINTER',sym,st) or
+                  (sym.typ<>procsym) then
+                 internalerror(2012010601);
+               pd:=tprocdef(tprocsym(sym).ProcdefList[0]);
+               paraloc1.init;
+               paramanager.getintparaloc(pd,1,paraloc1);
+               hlcg.a_load_reg_cgpara(current_asmdata.CurrAsmList,resultdef,location.reference.base,paraloc1);
+               paramanager.freecgpara(current_asmdata.CurrAsmList,paraloc1);
+               paraloc1.done;
+               hlcg.allocallcpuregisters(current_asmdata.CurrAsmList);
+               hlcg.a_call_name(current_asmdata.CurrAsmList,pd,'FPC_CHECKPOINTER',nil,false);
+               hlcg.deallocallcpuregisters(current_asmdata.CurrAsmList);
+             end;
+          end
+        else
+          inherited pass_generate_code;
+      end;
+
+
+begin
+  cderefnode:=ti8086derefnode;
+end.

+ 4 - 4
compiler/i8086/r8086ari.inc

@@ -8,21 +8,21 @@
 15,
 6,
 5,
+38,
 39,
 40,
 41,
-42,
 26,
 7,
 10,
 19,
 9,
+32,
 33,
 34,
 35,
 36,
 37,
-38,
 27,
 11,
 4,
@@ -35,7 +35,7 @@
 28,
 18,
 24,
-32,
+47,
 30,
 31,
 57,
@@ -58,11 +58,11 @@
 53,
 54,
 55,
+42,
 43,
 44,
 45,
 46,
-47,
 65,
 66,
 67,

+ 1 - 1
compiler/i8086/r8086att.inc

@@ -31,7 +31,6 @@
 '%ss',
 '%fs',
 '%gs',
-'%flags',
 '%dr0',
 '%dr1',
 '%dr2',
@@ -47,6 +46,7 @@
 '%tr5',
 '%tr6',
 '%tr7',
+'%flags',
 '%st(0)',
 '%st(1)',
 '%st(2)',

+ 1 - 1
compiler/i8086/r8086con.inc

@@ -31,7 +31,6 @@ NR_ES = tregister($05000003);
 NR_SS = tregister($05000004);
 NR_FS = tregister($05000005);
 NR_GS = tregister($05000006);
-NR_FLAGS = tregister($05000007);
 NR_DR0 = tregister($05000007);
 NR_DR1 = tregister($05000008);
 NR_DR2 = tregister($05000009);
@@ -47,6 +46,7 @@ NR_TR4 = tregister($05000012);
 NR_TR5 = tregister($05000013);
 NR_TR6 = tregister($05000014);
 NR_TR7 = tregister($05000015);
+NR_FLAGS = tregister($05000016);
 NR_ST0 = tregister($02000000);
 NR_ST1 = tregister($02000001);
 NR_ST2 = tregister($02000002);

+ 1 - 1
compiler/i8086/r8086int.inc

@@ -31,7 +31,6 @@
 'ss',
 'fs',
 'gs',
-'flags',
 'dr0',
 'dr1',
 'dr2',
@@ -47,6 +46,7 @@
 'tr5',
 'tr6',
 'tr7',
+'flags',
 'st(0)',
 'st(1)',
 'st(2)',

+ 4 - 4
compiler/i8086/r8086iri.inc

@@ -9,21 +9,21 @@
 15,
 6,
 5,
+38,
 39,
 40,
 41,
-42,
 26,
 7,
 10,
 19,
 9,
+32,
 33,
 34,
 35,
 36,
 37,
-38,
 27,
 11,
 4,
@@ -36,7 +36,7 @@
 28,
 18,
 24,
-32,
+47,
 30,
 31,
 57,
@@ -59,11 +59,11 @@
 53,
 54,
 55,
+42,
 43,
 44,
 45,
 46,
-47,
 65,
 66,
 67,

+ 1 - 1
compiler/i8086/r8086nasm.inc

@@ -31,7 +31,6 @@
 'ss',
 'fs',
 'gs',
-'flags',
 'dr0',
 'dr1',
 'dr2',
@@ -47,6 +46,7 @@
 'tr5',
 'tr6',
 'tr7',
+'flags',
 'st0',
 'st1',
 'st2',

+ 4 - 4
compiler/i8086/r8086nri.inc

@@ -9,21 +9,21 @@
 15,
 6,
 5,
+38,
 39,
 40,
 41,
-42,
 26,
 7,
 10,
 19,
 9,
+32,
 33,
 34,
 35,
 36,
 37,
-38,
 27,
 11,
 4,
@@ -36,7 +36,7 @@
 28,
 18,
 24,
-32,
+47,
 30,
 31,
 57,
@@ -59,11 +59,11 @@
 53,
 54,
 55,
+42,
 43,
 44,
 45,
 46,
-47,
 65,
 66,
 67,

+ 1 - 1
compiler/i8086/r8086num.inc

@@ -32,7 +32,6 @@ tregister($05000004),
 tregister($05000005),
 tregister($05000006),
 tregister($05000007),
-tregister($05000007),
 tregister($05000008),
 tregister($05000009),
 tregister($0500000a),
@@ -47,6 +46,7 @@ tregister($05000012),
 tregister($05000013),
 tregister($05000014),
 tregister($05000015),
+tregister($05000016),
 tregister($02000000),
 tregister($02000001),
 tregister($02000002),

+ 1 - 1
compiler/i8086/r8086op.inc

@@ -32,7 +32,6 @@
 4,
 5,
 0,
-0,
 1,
 2,
 3,
@@ -48,6 +47,7 @@
 6,
 7,
 0,
+0,
 1,
 2,
 3,

+ 1 - 1
compiler/i8086/r8086ot.inc

@@ -31,7 +31,6 @@ OT_REG_DESS,
 OT_REG_DESS,
 OT_REG_FSGS,
 OT_REG_FSGS,
-OT_NONE,
 OT_REG_DREG,
 OT_REG_DREG,
 OT_REG_DREG,
@@ -47,6 +46,7 @@ OT_REG_TREG,
 OT_REG_TREG,
 OT_REG_TREG,
 OT_REG_TREG,
+OT_NONE,
 OT_FPU0,
 OT_FPUREG,
 OT_FPUREG,

+ 1 - 1
compiler/i8086/r8086rni.inc

@@ -63,8 +63,8 @@
 29,
 30,
 31,
-33,
 32,
+33,
 34,
 35,
 36,

+ 4 - 4
compiler/i8086/r8086sri.inc

@@ -9,21 +9,21 @@
 15,
 6,
 5,
+38,
 39,
 40,
 41,
-42,
 26,
 7,
 10,
 19,
 9,
+32,
 33,
 34,
 35,
 36,
 37,
-38,
 27,
 11,
 4,
@@ -36,7 +36,7 @@
 28,
 18,
 24,
-32,
+47,
 30,
 31,
 57,
@@ -59,11 +59,11 @@
 53,
 54,
 55,
+42,
 43,
 44,
 45,
 46,
-47,
 65,
 66,
 67,

+ 1 - 1
compiler/i8086/r8086std.inc

@@ -31,7 +31,6 @@
 'ss',
 'fs',
 'gs',
-'flags',
 'dr0',
 'dr1',
 'dr2',
@@ -47,6 +46,7 @@
 'tr5',
 'tr6',
 'tr7',
+'flags',
 'st(0)',
 'st(1)',
 'st(2)',

+ 15 - 0
compiler/jvm/cpupara.pas

@@ -147,6 +147,7 @@ implementation
             result.size:=OS_NO;
             result.intsize:=0;
             paraloc^.size:=OS_NO;
+            paraloc^.def:=voidtype;
             paraloc^.loc:=LOC_VOID;
             exit;
           end;
@@ -156,6 +157,11 @@ implementation
             retcgsize:=OS_INT;
             result.intsize:=sizeof(pint);
           end
+        else if jvmimplicitpointertype(result.def) then
+          begin
+            retcgsize:=OS_ADDR;
+            result.def:=getpointerdef(result.def);
+          end
         else
           begin
             retcgsize:=def_cgsize(result.def);
@@ -168,6 +174,8 @@ implementation
         paraloc^.loc:=LOC_REFERENCE;
         paraloc^.reference.index:=NR_EVAL_STACK_BASE;
         paraloc^.reference.offset:=0;
+        paraloc^.size:=result.size;
+        paraloc^.def:=result.def;
       end;
 
     function TJVMParaManager.param_use_paraloc(const cgpara: tcgpara): boolean;
@@ -226,6 +234,11 @@ implementation
                 paracgsize:=OS_ADDR;
                 paradef:=java_jlobject;
               end
+            else if jvmimplicitpointertype(hp.vardef) then
+              begin
+                paracgsize:=OS_ADDR;
+                paradef:=getpointerdef(hp.vardef);
+              end
             else
               begin
                 paracgsize:=def_cgsize(hp.vardef);
@@ -246,6 +259,8 @@ implementation
               taking up two slots) }
             paraloc^.loc:=LOC_REFERENCE;;
             paraloc^.reference.offset:=paraofs;
+            paraloc^.size:=paracgsize;
+            paraloc^.def:=paradef;
             case side of
               callerside:
                 begin

+ 38 - 2
compiler/jvm/hlcgcpu.pas

@@ -113,7 +113,13 @@ uses
 
       { unimplemented/unnecessary routines }
       procedure a_bit_scan_reg_reg(list: TAsmList; reverse: boolean; size: tdef; src, dst: tregister); override;
-      procedure a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tcgsize; const loc: tlocation; const reg: tregister; shuffle: pmmshuffle); override;
+      procedure a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tdef; const loc: tlocation; const reg: tregister; shuffle: pmmshuffle); override;
+      procedure a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister; shuffle: pmmshuffle); override;
+      procedure a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tdef; const ref: treference; reg: tregister; shuffle: pmmshuffle); override;
+      procedure a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tdef; reg: tregister; const ref: treference; shuffle: pmmshuffle); override;
+      procedure a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size: tdef; src, dst: tregister; shuffle: pmmshuffle); override;
+      procedure a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tdef; intreg, mmreg: tregister; shuffle: pmmshuffle); override;
+      procedure a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tdef; mmreg, intreg: tregister; shuffle: pmmshuffle); override;
       procedure g_stackpointer_alloc(list: TAsmList; size: longint); override;
       procedure g_intf_wrapper(list: TAsmList; procdef: tprocdef; const labelname: string; ioffset: longint); override;
       procedure g_adjust_self_value(list: TAsmList; procdef: tprocdef; ioffset: aint); override;
@@ -1876,11 +1882,41 @@ implementation
       internalerror(2012090201);
     end;
 
-  procedure thlcgjvm.a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tcgsize; const loc: tlocation; const reg: tregister; shuffle: pmmshuffle);
+  procedure thlcgjvm.a_loadmm_loc_reg(list: TAsmList; fromsize, tosize: tdef; const loc: tlocation; const reg: tregister; shuffle: pmmshuffle);
     begin
       internalerror(2012090202);
     end;
 
+  procedure thlcgjvm.a_loadmm_reg_reg(list: TAsmList; fromsize, tosize: tdef; reg1, reg2: tregister; shuffle: pmmshuffle);
+    begin
+      internalerror(2012060130);
+    end;
+
+  procedure thlcgjvm.a_loadmm_ref_reg(list: TAsmList; fromsize, tosize: tdef; const ref: treference; reg: tregister; shuffle: pmmshuffle);
+    begin
+      internalerror(2012060131);
+    end;
+
+  procedure thlcgjvm.a_loadmm_reg_ref(list: TAsmList; fromsize, tosize: tdef; reg: tregister; const ref: treference; shuffle: pmmshuffle);
+    begin
+      internalerror(2012060132);
+    end;
+
+  procedure thlcgjvm.a_opmm_reg_reg(list: TAsmList; Op: TOpCG; size: tdef; src, dst: tregister; shuffle: pmmshuffle);
+    begin
+      internalerror(2012060133);
+    end;
+
+  procedure thlcgjvm.a_loadmm_intreg_reg(list: TAsmList; fromsize, tosize: tdef; intreg, mmreg: tregister; shuffle: pmmshuffle);
+    begin
+      internalerror(2012060134);
+    end;
+
+  procedure thlcgjvm.a_loadmm_reg_intreg(list: TAsmList; fromsize, tosize: tdef; mmreg, intreg: tregister; shuffle: pmmshuffle);
+    begin
+      internalerror(2012060135);
+    end;
+
   procedure thlcgjvm.g_stackpointer_alloc(list: TAsmList; size: longint);
     begin
       internalerror(2012090203);

+ 3 - 3
compiler/jvm/njvmcal.pas

@@ -124,7 +124,7 @@ implementation
                 begin
                   parent:=tunarynode(p);
                   { skip typeconversions that don't change the node type }
-                  p:=p.actualtargetnode;
+                  p:=actualtargetnode(@p)^;
                 end;
               derefn:
                 begin
@@ -239,8 +239,8 @@ implementation
           local variables (fields, arrays etc are all initialized on creation) }
         verifyout:=
           (cs_check_var_copyout in current_settings.localswitches) and
-          ((left.actualtargetnode.nodetype<>loadn) or
-           (tloadnode(left.actualtargetnode).symtableentry.typ<>localvarsym));
+          ((actualtargetnode(@left)^.nodetype<>loadn) or
+           (tloadnode(actualtargetnode(@left)^).symtableentry.typ<>localvarsym));
 
         { in case of a non-out parameter, pass in the original value (also
           always in case of implicitpointer type, since that pointer points to

+ 2 - 2
compiler/jvm/njvmld.pas

@@ -62,7 +62,7 @@ implementation
 
 uses
   verbose,globals,
-  nbas,nld,ncal,ncon,ninl,nmem,ncnv,
+  nbas,nld,ncal,ncon,ninl,nmem,ncnv,nutils,
   symconst,symsym,symdef,symtable,defutil,jvmdef,
   paramgr,
   pass_1,
@@ -100,7 +100,7 @@ function tjvmassignmentnode.pass_1: tnode;
     { intercept writes to string elements, because Java strings are immutable
       -> detour via StringBuilder
     }
-    target:=left.actualtargetnode;
+    target:=actualtargetnode(@left)^;
     if (target.nodetype=vecn) and
        (is_wide_or_unicode_string(tvecnode(target).left.resultdef) or
         is_ansistring(tvecnode(target).left.resultdef)) then

+ 3 - 2
compiler/jvm/njvmmem.pas

@@ -63,7 +63,8 @@ implementation
       aasmbase,
       symconst,symtype,symtable,symsym,symdef,defutil,jvmdef,
       htypechk,paramgr,
-      nadd,ncal,ncnv,ncon,nld,pass_1,njvmcon,
+      nadd,ncal,ncnv,ncon,nld,nutils,
+      pass_1,njvmcon,
       aasmdata,aasmcpu,pass_2,
       cgutils,hlcgobj,hlcgcpu;
 
@@ -151,7 +152,7 @@ implementation
       var
         target: tnode;
       begin
-        target:=left.actualtargetnode;
+        target:=actualtargetnode(@left)^;
         result:=
           (left.nodetype=derefn);
       end;

+ 20 - 5
compiler/m68k/cpupara.pas

@@ -68,16 +68,20 @@ unit cpupara;
     procedure tm68kparamanager.getintparaloc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);
       var
         paraloc : pcgparalocation;
-        def : tdef;
+        psym: tparavarsym;
+        pdef: tdef;
       begin
          if nr<1 then
            internalerror(2002070801);
-         def:=tparavarsym(pd.paras[nr-1]).vardef;
+         psym:=tparavarsym(pd.paras[nr-1]);
+         pdef:=psym.vardef;
+         if push_addr_param(psym.varspez,pdef,pd.proccalloption) then
+           pdef:=getpointerdef(pdef);
          cgpara.reset;
-         cgpara.size:=def_cgsize(def);
+         cgpara.size:=def_cgsize(pdef);
          cgpara.intsize:=tcgsize2size[cgpara.size];
          cgpara.alignment:=std_param_align;
-         cgpara.def:=def;
+         cgpara.def:=pdef;
          paraloc:=cgpara.add_location;
          with paraloc^ do
            begin
@@ -87,7 +91,8 @@ unit cpupara;
               loc:=LOC_REFERENCE;
               reference.index:=NR_STACK_POINTER_REG;
               reference.offset:=target_info.first_parm_offset+nr*4;
-              size:=OS_INT;
+              size:=def_cgsize(pdef);
+              def:=pdef;
            end;
       end;
 
@@ -206,6 +211,7 @@ unit cpupara;
             paraloc^.loc:=LOC_FPUREGISTER;
             paraloc^.register:=NR_FPU_RESULT_REG;
             paraloc^.size:=retcgsize;
+            paraloc^.def:=result.def;
           end
         else
          { Return in register }
@@ -215,6 +221,7 @@ unit cpupara;
                { low 32bits }
                paraloc^.loc:=LOC_REGISTER;
                paraloc^.size:=OS_32;
+               paraloc^.def:=u32inttype;
                if side=callerside then
                  paraloc^.register:=NR_FUNCTION_RESULT64_LOW_REG
                else
@@ -223,6 +230,7 @@ unit cpupara;
                paraloc:=result.add_location;
                paraloc^.loc:=LOC_REGISTER;
                paraloc^.size:=OS_32;
+               paraloc^.def:=u32inttype;
                if side=calleeside then
                  paraloc^.register:=NR_FUNCTION_RESULT64_HIGH_REG
                else
@@ -232,6 +240,7 @@ unit cpupara;
              begin
                paraloc^.loc:=LOC_REGISTER;
                paraloc^.size:=retcgsize;
+               paraloc^.def:=result.def;
                if side=callerside then
                  paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,cgsize2subreg(R_INTREGISTER,retcgsize))
                else
@@ -266,6 +275,7 @@ unit cpupara;
 	nextintreg,
 	nextfloatreg : tsuperregister;
 	stack_offset : longint;
+        firstparaloc : boolean;
 
       begin
         result:=0;
@@ -301,6 +311,7 @@ unit cpupara;
                 paraloc^.loc:=LOC_REGISTER;
 		paraloc^.register:=NR_D0;
                 paraloc^.size:=OS_ADDR;
+                paraloc^.def:=voidpointertype;
                 break;
               end;
 
@@ -344,6 +355,7 @@ unit cpupara;
                 end
               else
                 internalerror(200506052);
+            firstparaloc:=true;
             { can become < 0 for e.g. 3-byte records }
             while (paralen > 0) do
               begin
@@ -384,6 +396,7 @@ unit cpupara;
 {$endif DEBUG_CHARLIE}
                     paraloc^.loc:=LOC_REFERENCE;
                     paraloc^.size:=int_cgsize(paralen);
+                    paraloc^.def:=get_paraloc_def(paradef,paralen,firstparaloc);
                     if (side = callerside) then
                       paraloc^.reference.index:=NR_STACK_POINTER_REG
                     else
@@ -392,6 +405,7 @@ unit cpupara;
                     inc(stack_offset,align(paralen,4));
                     paralen := 0;
                   end;
+                firstparaloc:=false;
               end;
           end;
          result:=stack_offset;
@@ -493,6 +507,7 @@ unit cpupara;
               paraloc:=p.paraloc[callerside].add_location;
               paraloc^.loc:=LOC_REGISTER;
               paraloc^.size:=def_cgsize(p.vardef);
+              paraloc^.def:=p.vardef;
               { pattern is always uppercase'd }
               if s='D0' then
                 paraloc^.register:=NR_D0

+ 2 - 2
compiler/m68k/n68kadd.pas

@@ -672,7 +672,7 @@ implementation
             secondpass(left);
             if left.location.loc in [LOC_FLAGS,LOC_JUMP] then begin
 //             writeln('ajjaj');
-             hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,hlcg.tcgsize2orddef(cgsize),false);
+             hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,cgsize_orddef(cgsize),false);
 //             writeln('reccs?');
             end;
             if isjump then
@@ -691,7 +691,7 @@ implementation
               end;
             secondpass(right);
             if right.location.loc in [LOC_FLAGS,LOC_JUMP] then
-             hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,hlcg.tcgsize2orddef(cgsize),false);
+             hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,cgsize_orddef(cgsize),false);
             if isjump then
              begin
                current_procinfo.CurrTrueLabel:=otl;

+ 17 - 13
compiler/mips/aasmcpu.pas

@@ -56,6 +56,8 @@ type
 
     constructor op_reg_reg_ref(op: tasmop; _op1, _op2: tregister; const _op3: treference);
     constructor op_reg_reg_const(op: tasmop; _op1, _op2: tregister; _op3: aint);
+    { INS and EXT }
+    constructor op_reg_reg_const_const(op: tasmop; _op1,_op2: tregister; _op3,_op4: aint);
     constructor op_reg_const_reg(op: tasmop; _op1: tregister; _op2: aint; _op3: tregister);
 
     { this is for Jmp instructions }
@@ -186,6 +188,17 @@ begin
 end;
 
 
+constructor taicpu.op_reg_reg_const_const(op: tasmop; _op1, _op2: tregister; _op3, _op4: aint);
+begin
+  inherited create(op);
+  ops := 4;
+  loadreg(0, _op1);
+  loadreg(1, _op2);
+  loadconst(2, _op3);
+  loadconst(3, _op4);
+end;
+
+
 constructor taicpu.op_reg_const_reg(op: tasmop; _op1: tregister; _op2: aint;
  _op3: tregister);
 begin
@@ -316,18 +329,6 @@ end;
       A_DMULTU,
       A_MFHI,
       A_MFLO,
-      A_MULTG,
-      A_DMULTG,
-      A_MULTUG,
-      A_DMULTUG,
-      A_DIVG,
-      A_DDIVG,
-      A_DIVUG,
-      A_DDIVUG,
-      A_MODG,
-      A_DMODG,
-      A_MODUG,
-      A_DMODUG,
 
       A_SLL,
       A_SRL,
@@ -397,7 +398,10 @@ end;
       A_SGTU,
       A_SLE,
       A_SLEU,
-      A_SNE];
+      A_SNE,
+      A_EXT,
+      A_INS,
+      A_MFC0];
 
       begin
         result := operand_read;

+ 179 - 254
compiler/mips/cgcpu.pas

@@ -39,7 +39,6 @@ type
 
     procedure init_register_allocators; override;
     procedure done_register_allocators; override;
-    function getfpuregister(list: tasmlist; size: Tcgsize): Tregister; override;
 ///    { needed by cg64 }
     procedure make_simple_ref(list: tasmlist; var ref: treference);
     procedure handle_reg_const_reg(list: tasmlist; op: Tasmop; src: tregister; a: tcgint; dst: tregister);
@@ -121,145 +120,11 @@ uses
   procinfo, cpupi;
 
 
-  function f_TOpCG2AsmOp(op: TOpCG; size: tcgsize): TAsmOp;
-  begin
-    if size = OS_32 then
-      case op of
-        OP_ADD:       { simple addition          }
-          f_TOpCG2AsmOp := A_ADDU;
-        OP_AND:       { simple logical and       }
-          f_TOpCG2AsmOp := A_AND;
-        OP_DIV:       { simple unsigned division }
-          f_TOpCG2AsmOp := A_DIVU;
-        OP_IDIV:      { simple signed division   }
-          f_TOpCG2AsmOp := A_DIV;
-        OP_IMUL:      { simple signed multiply   }
-          f_TOpCG2AsmOp := A_MULT;
-        OP_MUL:       { simple unsigned multiply }
-          f_TOpCG2AsmOp := A_MULTU;
-        OP_NEG:       { simple negate            }
-          f_TOpCG2AsmOp := A_NEGU;
-        OP_NOT:       { simple logical not       }
-          f_TOpCG2AsmOp := A_NOT;
-        OP_OR:        { simple logical or        }
-          f_TOpCG2AsmOp := A_OR;
-        OP_SAR:       { arithmetic shift-right   }
-          f_TOpCG2AsmOp := A_SRA;
-        OP_SHL:       { logical shift left       }
-          f_TOpCG2AsmOp := A_SLL;
-        OP_SHR:       { logical shift right      }
-          f_TOpCG2AsmOp := A_SRL;
-        OP_SUB:       { simple subtraction       }
-          f_TOpCG2AsmOp := A_SUBU;
-        OP_XOR:       { simple exclusive or      }
-          f_TOpCG2AsmOp := A_XOR;
-        else
-          InternalError(2007070401);
-      end{ case }
-    else
-      case op of
-        OP_ADD:       { simple addition          }
-          f_TOpCG2AsmOp := A_ADDU;
-        OP_AND:       { simple logical and       }
-          f_TOpCG2AsmOp := A_AND;
-        OP_DIV:       { simple unsigned division }
-          f_TOpCG2AsmOp := A_DIVU;
-        OP_IDIV:      { simple signed division   }
-          f_TOpCG2AsmOp := A_DIV;
-        OP_IMUL:      { simple signed multiply   }
-          f_TOpCG2AsmOp := A_MULT;
-        OP_MUL:       { simple unsigned multiply }
-          f_TOpCG2AsmOp := A_MULTU;
-        OP_NEG:       { simple negate            }
-          f_TOpCG2AsmOp := A_NEGU;
-        OP_NOT:       { simple logical not       }
-          f_TOpCG2AsmOp := A_NOT;
-        OP_OR:        { simple logical or        }
-          f_TOpCG2AsmOp := A_OR;
-        OP_SAR:       { arithmetic shift-right   }
-          f_TOpCG2AsmOp := A_SRA;
-        OP_SHL:       { logical shift left       }
-          f_TOpCG2AsmOp := A_SLL;
-        OP_SHR:       { logical shift right      }
-          f_TOpCG2AsmOp := A_SRL;
-        OP_SUB:       { simple subtraction       }
-          f_TOpCG2AsmOp := A_SUBU;
-        OP_XOR:       { simple exclusive or      }
-          f_TOpCG2AsmOp := A_XOR;
-        else
-          InternalError(2007010701);
-      end;{ case }
-  end;
-
-  function f_TOpCG2AsmOp_ovf(op: TOpCG; size: tcgsize): TAsmOp;
-  begin
-    if size = OS_32 then
-      case op of
-        OP_ADD:       { simple addition          }
-          f_TOpCG2AsmOp_ovf := A_ADD;
-        OP_AND:       { simple logical and       }
-          f_TOpCG2AsmOp_ovf := A_AND;
-        OP_DIV:       { simple unsigned division }
-          f_TOpCG2AsmOp_ovf := A_DIVU;
-        OP_IDIV:      { simple signed division   }
-          f_TOpCG2AsmOp_ovf := A_DIV;
-        OP_IMUL:      { simple signed multiply   }
-          f_TOpCG2AsmOp_ovf := A_MULO;
-        OP_MUL:       { simple unsigned multiply }
-          f_TOpCG2AsmOp_ovf := A_MULOU;
-        OP_NEG:       { simple negate            }
-          f_TOpCG2AsmOp_ovf := A_NEG;
-        OP_NOT:       { simple logical not       }
-          f_TOpCG2AsmOp_ovf := A_NOT;
-        OP_OR:        { simple logical or        }
-          f_TOpCG2AsmOp_ovf := A_OR;
-        OP_SAR:       { arithmetic shift-right   }
-          f_TOpCG2AsmOp_ovf := A_SRA;
-        OP_SHL:       { logical shift left       }
-          f_TOpCG2AsmOp_ovf := A_SLL;
-        OP_SHR:       { logical shift right      }
-          f_TOpCG2AsmOp_ovf := A_SRL;
-        OP_SUB:       { simple subtraction       }
-          f_TOpCG2AsmOp_ovf := A_SUB;
-        OP_XOR:       { simple exclusive or      }
-          f_TOpCG2AsmOp_ovf := A_XOR;
-        else
-          InternalError(2007070403);
-      end{ case }
-    else
-      case op of
-        OP_ADD:       { simple addition          }
-          f_TOpCG2AsmOp_ovf := A_ADD;
-        OP_AND:       { simple logical and       }
-          f_TOpCG2AsmOp_ovf := A_AND;
-        OP_DIV:       { simple unsigned division }
-          f_TOpCG2AsmOp_ovf := A_DIVU;
-        OP_IDIV:      { simple signed division   }
-          f_TOpCG2AsmOp_ovf := A_DIV;
-        OP_IMUL:      { simple signed multiply   }
-          f_TOpCG2AsmOp_ovf := A_MULO;
-        OP_MUL:       { simple unsigned multiply }
-          f_TOpCG2AsmOp_ovf := A_MULOU;
-        OP_NEG:       { simple negate            }
-          f_TOpCG2AsmOp_ovf := A_NEG;
-        OP_NOT:       { simple logical not       }
-          f_TOpCG2AsmOp_ovf := A_NOT;
-        OP_OR:        { simple logical or        }
-          f_TOpCG2AsmOp_ovf := A_OR;
-        OP_SAR:       { arithmetic shift-right   }
-          f_TOpCG2AsmOp_ovf := A_SRA;
-        OP_SHL:       { logical shift left       }
-          f_TOpCG2AsmOp_ovf := A_SLL;
-        OP_SHR:       { logical shift right      }
-          f_TOpCG2AsmOp_ovf := A_SRL;
-        OP_SUB:       { simple subtraction       }
-          f_TOpCG2AsmOp_ovf := A_SUB;
-        OP_XOR:       { simple exclusive or      }
-          f_TOpCG2AsmOp_ovf := A_XOR;
-        else
-          InternalError(2007010703);
-      end;{ case }
-  end;
+const
+  TOpcg2AsmOp: array[TOpCg] of TAsmOp = (
+    A_NONE,A_NONE,A_ADDU,A_AND,A_NONE,A_NONE,A_MULT,A_MULTU,A_NONE,A_NONE,
+    A_OR,A_SRAV,A_SLLV,A_SRLV,A_SUBU,A_XOR,A_NONE,A_NONE
+  );
 
 
 procedure TCGMIPS.make_simple_ref(list: tasmlist; var ref: treference);
@@ -302,6 +167,8 @@ begin
       reference_reset_symbol(tmpref,ref.symbol,ref.offset,ref.alignment);
       if (cs_create_pic in current_settings.moduleswitches) then
         begin
+          if not (pi_needs_got in current_procinfo.flags) then
+            InternalError(2013060102);
           { For PIC global symbols offset must be handled separately.
             Otherwise (non-PIC or local symbols) offset can be encoded
             into relocation even if exceeds 16 bits. }
@@ -360,16 +227,33 @@ end;
 procedure TCGMIPS.handle_reg_const_reg(list: tasmlist; op: Tasmop; src: tregister; a: tcgint; dst: tregister);
 var
   tmpreg: tregister;
+  op2: Tasmop;
+  negate: boolean;
 begin
-  if (a < simm16lo) or
-    (a > simm16hi) then
+  case op of
+    A_ADD,A_SUB:
+      op2:=A_ADDI;
+    A_ADDU,A_SUBU:
+      op2:=A_ADDIU;
+  else
+    InternalError(2013052001);
+  end;
+  negate:=op in [A_SUB,A_SUBU];
+  { subtraction is actually addition of negated value, so possible range is
+    off by one (-32767..32768) }
+  if (a < simm16lo+ord(negate)) or
+    (a > simm16hi+ord(negate)) then
   begin
     tmpreg := GetIntRegister(list, OS_INT);
     a_load_const_reg(list, OS_INT, a, tmpreg);
     list.concat(taicpu.op_reg_reg_reg(op, dst, src, tmpreg));
   end
   else
-    list.concat(taicpu.op_reg_reg_const(op, dst, src, a));
+  begin
+    if negate then
+      a:=-a;
+    list.concat(taicpu.op_reg_reg_const(op2, dst, src, a));
+  end;
 end;
 
 
@@ -386,14 +270,14 @@ begin
     (pi_needs_got in current_procinfo.flags) then
     begin
       current_procinfo.got := NR_GP;
-      rg[R_INTREGISTER]    := Trgcpu.Create(R_INTREGISTER, R_SUBD,
+      rg[R_INTREGISTER]    := Trgintcpu.Create(R_INTREGISTER, R_SUBD,
         [RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,RS_R9,
        RS_R10,RS_R11,RS_R12,RS_R13,RS_R14,RS_R15,RS_R16,RS_R17,RS_R18,RS_R19,
        RS_R20,RS_R21,RS_R22,RS_R23,RS_R24{,RS_R25}],
         first_int_imreg, []);
     end
   else
-    rg[R_INTREGISTER] := trgcpu.Create(R_INTREGISTER, R_SUBD,
+    rg[R_INTREGISTER] := trgintcpu.Create(R_INTREGISTER, R_SUBD,
       [RS_R2,RS_R3,RS_R4,RS_R5,RS_R6,RS_R7,RS_R8,RS_R9,
        RS_R10,RS_R11,RS_R12,RS_R13,RS_R14,RS_R15,RS_R16,RS_R17,RS_R18,RS_R19,
        RS_R20,RS_R21,RS_R22,RS_R23,RS_R24{,RS_R25}],
@@ -428,15 +312,6 @@ begin
 end;
 
 
-function TCGMIPS.getfpuregister(list: tasmlist; size: Tcgsize): Tregister;
-begin
-  if size = OS_F64 then
-    Result := rg[R_FPUREGISTER].getregister(list, R_SUBFD)
-  else
-    Result := rg[R_FPUREGISTER].getregister(list, R_SUBFS);
-end;
-
-
 procedure TCGMIPS.a_loadfpu_ref_cgpara(list: tasmlist; size: tcgsize; const ref: treference; const paraloc: TCGPara);
 var
   href, href2: treference;
@@ -576,17 +451,16 @@ procedure TCGMIPS.a_load_const_reg(list: tasmlist; size: TCGSize; a: tcgint; reg
 begin
   if (a = 0) then
     list.concat(taicpu.op_reg_reg(A_MOVE, reg, NR_R0))
-  { LUI allows to set the upper 16 bits, so we'll take full advantage of it }
-  else if (a and aint($ffff)) = 0 then
-    list.concat(taicpu.op_reg_const(A_LUI, reg, aint(a) shr 16))
   else if (a >= simm16lo) and (a <= simm16hi) then
     list.concat(taicpu.op_reg_reg_const(A_ADDIU, reg, NR_R0, a))
   else if (a>=0) and (a <= 65535) then
     list.concat(taicpu.op_reg_reg_const(A_ORI, reg, NR_R0, a))
   else
-  begin
-    list.concat(taicpu.op_reg_const(A_LI, reg, aint(a) ));
-  end;
+    begin
+      list.concat(taicpu.op_reg_const(A_LUI, reg, aint(a) shr 16));
+      if (a and aint($FFFF))<>0 then
+        list.concat(taicpu.op_reg_reg_const(A_ORI,reg,reg,a and aint($FFFF)));
+    end;
 end;
 
 
@@ -662,6 +536,7 @@ end;
 procedure TCGMIPS.a_load_reg_reg(list: tasmlist; fromsize, tosize: tcgsize; reg1, reg2: tregister);
 var
   instr: taicpu;
+  done: boolean;
 begin
   if (tcgsize2size[tosize] < tcgsize2size[fromsize]) or
     (
@@ -669,6 +544,7 @@ begin
     ) or  ((fromsize = OS_S8) and
              (tosize = OS_16)) then
   begin
+    done:=true;
     case tosize of
       OS_8:
         list.concat(taicpu.op_reg_reg_const(A_ANDI, reg2, reg1, $ff));
@@ -676,13 +552,7 @@ begin
         list.concat(taicpu.op_reg_reg_const(A_ANDI, reg2, reg1, $ffff));
       OS_32,
       OS_S32:
-      begin
-        instr := taicpu.op_reg_reg(A_MOVE, reg2, reg1);
-        list.Concat(instr);
-                  { Notify the register allocator that we have written a move instruction so
-                   it can try to eliminate it. }
-        add_move_instruction(instr);
-      end;
+        done:=false;
       OS_S8:
       begin
         list.concat(taicpu.op_reg_reg_const(A_SLL, reg2, reg1, 24));
@@ -698,17 +568,16 @@ begin
     end;
   end
   else
-  begin
-    if reg1 <> reg2 then
-    begin
-      { same size, only a register mov required }
-      instr := taicpu.op_reg_reg(A_MOVE, reg2, reg1);
-      list.Concat(instr);
-//                { Notify the register allocator that we have written a move instruction so
-//                  it can try to eliminate it. }
+    done:=false;
 
-      add_move_instruction(instr);
-    end;
+  if (not done) and (reg1 <> reg2) then
+  begin
+    { same size, only a register mov required }
+    instr := taicpu.op_reg_reg(A_MOVE, reg2, reg1);
+    list.Concat(instr);
+    { Notify the register allocator that we have written a move instruction so
+      it can try to eliminate it. }
+    add_move_instruction(instr);
   end;
 end;
 
@@ -753,6 +622,8 @@ begin
   reference_reset_symbol(href,ref.symbol,ref.offset,ref.alignment);
   if (cs_create_pic in current_settings.moduleswitches) then
     begin
+      if not (pi_needs_got in current_procinfo.flags) then
+        InternalError(2013060103);
       { For PIC global symbols offset must be handled separately.
         Otherwise (non-PIC or local symbols) offset can be encoded
         into relocation even if exceeds 16 bits. }
@@ -874,10 +745,10 @@ end;
 
 
 const
-  ops_mul_ovf: array[boolean] of TAsmOp = (A_MULOU, A_MULO);
-  ops_mul: array[boolean] of TAsmOp = (A_MULTU,A_MULT);
   ops_add: array[boolean] of TAsmOp = (A_ADDU, A_ADD);
   ops_sub: array[boolean] of TAsmOp = (A_SUBU, A_SUB);
+  ops_slt: array[boolean] of TAsmOp = (A_SLTU, A_SLT);
+  ops_slti: array[boolean] of TAsmOp = (A_SLTIU, A_SLTI);
   ops_and: array[boolean] of TAsmOp = (A_AND, A_ANDI);
   ops_or:  array[boolean] of TAsmOp = (A_OR, A_ORI);
   ops_xor: array[boolean] of TasmOp = (A_XOR, A_XORI);
@@ -912,11 +783,12 @@ begin
 
     OP_IMUL,OP_MUL:
       begin
-        list.concat(taicpu.op_reg_reg(ops_mul[op=OP_IMUL], dst, src));
+        list.concat(taicpu.op_reg_reg(TOpcg2AsmOp[op], dst, src));
         list.concat(taicpu.op_reg(A_MFLO, dst));
       end;
   else
-    list.concat(taicpu.op_reg_reg_reg(f_TOpCG2AsmOp(op, size), dst, dst, src));
+    a_op_reg_reg_reg(list, op, size, src, dst, dst);
+    exit;
   end;
   maybeadjustresult(list,op,size,dst);
 end;
@@ -932,8 +804,9 @@ end;
 
 procedure TCGMIPS.a_op_reg_reg_reg(list: tasmlist; op: TOpCg; size: tcgsize; src1, src2, dst: tregister);
 begin
-
-  list.concat(taicpu.op_reg_reg_reg(f_TOpCG2AsmOp(op, size), dst, src2, src1));
+  if (TOpcg2AsmOp[op]=A_NONE) then
+    InternalError(2013070305);
+  list.concat(taicpu.op_reg_reg_reg(TOpCG2AsmOp[op], dst, src2, src1));
   maybeadjustresult(list,op,size,dst);
 end;
 
@@ -1005,8 +878,14 @@ begin
           end;
       end;
 
-    OP_SHL,OP_SHR,OP_SAR:
-      list.concat(taicpu.op_reg_reg_const(f_TOpCG2AsmOp_ovf(op,size),dst,src,a));
+    OP_SHL:
+      list.concat(taicpu.op_reg_reg_const(A_SLL,dst,src,a));
+
+    OP_SHR:
+      list.concat(taicpu.op_reg_reg_const(A_SRL,dst,src,a));
+
+    OP_SAR:
+      list.concat(taicpu.op_reg_reg_const(A_SRA,dst,src,a));
 
   else
     internalerror(2007012601);
@@ -1018,7 +897,8 @@ end;
 procedure TCGMIPS.a_op_reg_reg_reg_checkoverflow(list: tasmlist; op: TOpCg; size: tcgsize; src1, src2, dst: tregister; setflags: boolean; var ovloc: tlocation);
 var
   signed: boolean;
-  hreg: TRegister;
+  hreg,hreg2: TRegister;
+  hl: tasmlabel;
 begin
   ovloc.loc := LOC_VOID;
   signed:=(size in [OS_S8,OS_S16,OS_S32]);
@@ -1043,18 +923,28 @@ begin
       end;
     OP_MUL,OP_IMUL:
       begin
+        list.concat(taicpu.op_reg_reg(TOpCg2AsmOp[op], src2, src1));
+        list.concat(taicpu.op_reg(A_MFLO, dst));
         if setflags then
-          { TODO: still uses a macro }
-          list.concat(taicpu.op_reg_reg_reg(ops_mul_ovf[op=OP_IMUL], dst, src2, src1))
-        else
-        begin
-          list.concat(taicpu.op_reg_reg(ops_mul[op=OP_IMUL], src2, src1));
-          list.concat(taicpu.op_reg(A_MFLO, dst));
-        end;
+          begin
+            current_asmdata.getjumplabel(hl);
+            hreg:=GetIntRegister(list,OS_INT);
+            list.concat(taicpu.op_reg(A_MFHI,hreg));
+            if (op=OP_IMUL) then
+              begin
+                hreg2:=GetIntRegister(list,OS_INT);
+                list.concat(taicpu.op_reg_reg_const(A_SRA,hreg2,dst,31));
+                a_cmp_reg_reg_label(list,OS_INT,OC_EQ,hreg2,hreg,hl);
+              end
+            else
+              a_cmp_reg_reg_label(list,OS_INT,OC_EQ,hreg,NR_R0,hl);
+            list.concat(taicpu.op_const(A_BREAK,6));
+            a_label(list,hl);
+          end;
       end;
     OP_AND,OP_OR,OP_XOR:
       begin
-        list.concat(taicpu.op_reg_reg_reg(f_TOpCG2AsmOp_ovf(op, size), dst, src2, src1));
+        list.concat(taicpu.op_reg_reg_reg(TOpCG2AsmOp[op], dst, src2, src1));
       end;
     else
       internalerror(2007012602);
@@ -1069,29 +959,80 @@ end;
 procedure TCGMIPS.a_cmp_const_reg_label(list: tasmlist; size: tcgsize; cmp_op: topcmp; a: tcgint; reg: tregister; l: tasmlabel);
 var
   tmpreg: tregister;
-  ai : Taicpu;
 begin
-if a = 0 then
-  tmpreg := NR_R0
-else
-begin
-  tmpreg := GetIntRegister(list, OS_INT);
-  a_load_const_reg(list,OS_INT,a,tmpreg);
-end;
-  ai := taicpu.op_reg_reg_sym(A_BC, reg, tmpreg, l);
-  ai.SetCondition(TOpCmp2AsmCond[cmp_op]);
-  list.concat(ai);
-  { Delay slot }
-  list.Concat(TAiCpu.Op_none(A_NOP));
+  if a = 0 then
+    a_cmp_reg_reg_label(list,size,cmp_op,NR_R0,reg,l)
+  else
+    begin
+      tmpreg := GetIntRegister(list,OS_INT);
+      if (a>=simm16lo) and (a<=simm16hi) and
+        (cmp_op in [OC_LT,OC_B,OC_GTE,OC_AE]) then
+        begin
+          list.concat(taicpu.op_reg_reg_const(ops_slti[cmp_op in [OC_LT,OC_GTE]],tmpreg,reg,a));
+          if cmp_op in [OC_LT,OC_B] then
+            a_cmp_reg_reg_label(list,size,OC_NE,NR_R0,tmpreg,l)
+          else
+            a_cmp_reg_reg_label(list,size,OC_EQ,NR_R0,tmpreg,l);
+        end
+      else
+        begin
+          a_load_const_reg(list,OS_INT,a,tmpreg);
+          a_cmp_reg_reg_label(list, size, cmp_op, tmpreg, reg, l);
+        end;
+    end;
 end;
 
+const
+  TOpCmp2AsmCond_z : array[OC_GT..OC_LTE] of TAsmCond=(
+    C_GTZ,C_LTZ,C_GEZ,C_LEZ
+  );
+  TOpCmp2AsmCond_eqne: array[topcmp] of TAsmCond = (C_NONE,
+   { eq      gt    lt    gte   lte   ne     }
+    C_NONE, C_NE, C_NE, C_EQ, C_EQ, C_NONE,
+   { be    b     ae    a }
+    C_EQ, C_NE, C_EQ, C_NE
+  );
 
 procedure TCGMIPS.a_cmp_reg_reg_label(list: tasmlist; size: tcgsize; cmp_op: topcmp; reg1, reg2: tregister; l: tasmlabel);
 var
   ai : Taicpu;
+  op: TAsmOp;
+  hreg: TRegister;
 begin
-  ai := taicpu.op_reg_reg_sym(A_BC, reg2, reg1, l);
-  ai.SetCondition(TOpCmp2AsmCond[cmp_op]);
+  if not (cmp_op in [OC_EQ,OC_NE]) then
+    begin
+      if ((reg1=NR_R0) or (reg2=NR_R0)) and (cmp_op in [OC_GT,OC_LT,OC_GTE,OC_LTE]) then
+        begin
+          if (reg2=NR_R0) then
+            begin
+              ai:=taicpu.op_reg_sym(A_BC,reg1,l);
+              ai.setcondition(TOpCmp2AsmCond_z[swap_opcmp(cmp_op)]);
+            end
+          else
+            begin
+              ai:=taicpu.op_reg_sym(A_BC,reg2,l);
+              ai.setcondition(TOpCmp2AsmCond_z[cmp_op]);
+            end;
+        end
+      else
+        begin
+          hreg:=GetIntRegister(list,OS_INT);
+          op:=ops_slt[cmp_op in [OC_LT,OC_LTE,OC_GT,OC_GTE]];
+          if (cmp_op in [OC_LTE,OC_GT,OC_BE,OC_A]) then   { swap operands }
+            list.concat(taicpu.op_reg_reg_reg(op,hreg,reg1,reg2))
+          else
+            list.concat(taicpu.op_reg_reg_reg(op,hreg,reg2,reg1));
+          if (TOpCmp2AsmCond_eqne[cmp_op]=C_NONE) then
+            InternalError(2013051501);
+          ai:=taicpu.op_reg_reg_sym(A_BC,hreg,NR_R0,l);
+          ai.SetCondition(TOpCmp2AsmCond_eqne[cmp_op]);
+        end;
+    end
+  else
+    begin
+      ai:=taicpu.op_reg_reg_sym(A_BC,reg2,reg1,l);
+      ai.SetCondition(TOpCmp2AsmCond[cmp_op]);
+    end;
   list.concat(ai);
   { Delay slot }
   list.Concat(TAiCpu.Op_none(A_NOP));
@@ -1131,6 +1072,20 @@ end;
 
 { *********** entry/exit code and address loading ************ }
 
+procedure FixupOffsets(p:TObject;arg:pointer);
+var
+  sym: tabstractnormalvarsym absolute p;
+begin
+  if (tsym(p).typ=paravarsym) and
+    (sym.localloc.loc=LOC_REFERENCE) and
+    (sym.localloc.reference.base=NR_FRAME_POINTER_REG) then
+    begin
+      sym.localloc.reference.base:=NR_STACK_POINTER_REG;
+      Inc(sym.localloc.reference.offset,PLongint(arg)^);
+    end;
+end;
+
+
 procedure TCGMIPS.g_proc_entry(list: tasmlist; localsize: longint; nostackframe: boolean);
 var
   lastintoffset,lastfpuoffset,
@@ -1233,8 +1188,8 @@ begin
     end
   else
     begin
-      list.concat(Taicpu.Op_reg_const(A_LI,NR_R9,-LocalSize));
-      list.concat(Taicpu.Op_reg_reg_reg(A_ADD,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R9));
+      a_load_const_reg(list,OS_32,-LocalSize,NR_R9);
+      list.concat(Taicpu.Op_reg_reg_reg(A_ADDU,NR_STACK_POINTER_REG,NR_STACK_POINTER_REG,NR_R9));
       if assigned(ra_save) then
         list.concat(ra_save);
       if assigned(framesave) then
@@ -1257,28 +1212,20 @@ begin
       list.concat(Taicpu.op_none(A_P_SET_NOMACRO));
     end;
 
-  with TMIPSProcInfo(current_procinfo) do
-    begin
-      href.offset:=0;
-      //if current_procinfo.framepointer<>NR_STACK_POINTER_REG then
-        href.base:=NR_FRAME_POINTER_REG;
+  href.base:=NR_STACK_POINTER_REG;
+
+  for i:=0 to MIPS_MAX_REGISTERS_USED_IN_CALL-1 do
+    if TMIPSProcInfo(current_procinfo).register_used[i] then
+      begin
+        reg:=parasupregs[i];
+        href.offset:=i*sizeof(aint)+LocalSize;
+        list.concat(taicpu.op_reg_ref(A_SW, newreg(R_INTREGISTER,reg,R_SUBWHOLE), href));
+      end;
 
-      for i:=0 to MIPS_MAX_REGISTERS_USED_IN_CALL-1 do
-        if (register_used[i]) then
-          begin
-            reg:=parasupregs[i];
-            if register_offset[i]=-1 then
-              comment(V_warning,'Register parameter has offset -1 in TCGMIPS.g_proc_entry');
-
-            //if current_procinfo.framepointer=NR_STACK_POINTER_REG then
-            //  href.offset:=register_offset[i]+Localsize
-            //else
-            href.offset:=register_offset[i];
-            list.concat(taicpu.op_reg_ref(A_SW, newreg(R_INTREGISTER,reg,R_SUBWHOLE), href));
-        end;
-    end;
   list.concatList(helplist);
   helplist.Free;
+  if current_procinfo.has_nestedprocs then
+    current_procinfo.procdef.parast.SymList.ForEachCall(@FixupOffsets,@LocalSize);
 end;
 
 
@@ -1388,7 +1335,6 @@ var
   src, dst: TReference;
   lab:      tasmlabel;
   Count, count2: aint;
-  ai : TaiCpu;
 
   function reference_is_reusable(const ref: treference): boolean;
     begin
@@ -1436,15 +1382,9 @@ begin
     { generate a loop }
     if Count > 4 then
     begin
-      { the offsets are zero after the a_loadaddress_ref_reg and just }
-      { have to be set to 8. I put an Inc there so debugging may be   }
-      { easier (should offset be different from zero here, it will be }
-      { easy to notice in the generated assembler                     }
       countreg := GetIntRegister(list, OS_INT);
       tmpreg1  := GetIntRegister(list, OS_INT);
       a_load_const_reg(list, OS_INT, Count, countreg);
-      { explicitely allocate R_O0 since it can be used safely here }
-      { (for holding date that's being copied)                    }
       current_asmdata.getjumplabel(lab);
       a_label(list, lab);
       list.concat(taicpu.op_reg_ref(A_LW, tmpreg1, src));
@@ -1452,11 +1392,7 @@ begin
       list.concat(taicpu.op_reg_reg_const(A_ADDIU, src.base, src.base, 4));
       list.concat(taicpu.op_reg_reg_const(A_ADDIU, dst.base, dst.base, 4));
       list.concat(taicpu.op_reg_reg_const(A_ADDIU, countreg, countreg, -1));
-      //list.concat(taicpu.op_reg_sym(A_BGTZ, countreg, lab));
-      ai := taicpu.op_reg_reg_sym(A_BC,countreg, NR_R0, lab);
-      ai.setcondition(C_GT);
-      list.concat(ai);
-      list.concat(taicpu.op_none(A_NOP));
+      a_cmp_reg_reg_label(list,OS_INT,OC_GT,NR_R0,countreg,lab);
       len := len mod 4;
     end;
     { unrolled loop }
@@ -1506,7 +1442,6 @@ var
   tmpreg1, countreg: TRegister;
   i:   aint;
   lab: tasmlabel;
-  ai : TaiCpu;
 begin
   if (len > 31) and
     { see comment in g_concatcopy }
@@ -1526,15 +1461,9 @@ begin
     { generate a loop }
     if len > 4 then
     begin
-      { the offsets are zero after the a_loadaddress_ref_reg and just }
-      { have to be set to 8. I put an Inc there so debugging may be   }
-      { easier (should offset be different from zero here, it will be }
-      { easy to notice in the generated assembler                     }
       countreg := cg.GetIntRegister(list, OS_INT);
       tmpreg1  := cg.GetIntRegister(list, OS_INT);
       a_load_const_reg(list, OS_INT, len, countreg);
-      { explicitely allocate R_O0 since it can be used safely here }
-      { (for holding date that's being copied)                    }
       current_asmdata.getjumplabel(lab);
       a_label(list, lab);
       list.concat(taicpu.op_reg_ref(A_LBU, tmpreg1, src));
@@ -1542,11 +1471,7 @@ begin
       list.concat(taicpu.op_reg_reg_const(A_ADDIU, src.base, src.base, 1));
       list.concat(taicpu.op_reg_reg_const(A_ADDIU, dst.base, dst.base, 1));
       list.concat(taicpu.op_reg_reg_const(A_ADDIU, countreg, countreg, -1));
-      //list.concat(taicpu.op_reg_sym(A_BGTZ, countreg, lab));
-      ai := taicpu.op_reg_reg_sym(A_BC,countreg, NR_R0, lab);
-      ai.setcondition(C_GT);
-      list.concat(ai);
-      list.concat(taicpu.op_none(A_NOP));
+      a_cmp_reg_reg_label(list,OS_INT,OC_GT,NR_R0,countreg,lab);
     end
     else
     begin

+ 19 - 31
compiler/mips/cpubase.pas

@@ -112,19 +112,16 @@ unit cpubase;
     type
       TAsmCond=(C_None,
         C_EQ, C_NE, C_LT, C_LE, C_GT, C_GE, C_LTU, C_LEU, C_GTU, C_GEU,
-        C_FEQ,  {Equal}
-        C_FNE, {Not Equal}
-        C_FGT,  {Greater}
-        C_FLT,  {Less}
-        C_FGE, {Greater or Equal}
-        C_FLE  {Less or Equal}
-
+        C_LTZ, C_LEZ, C_GTZ, C_GEZ,
+        C_COP1TRUE,
+        C_COP1FALSE
       );
 
     const
       cond2str : array[TAsmCond] of string[3]=('',
         'eq','ne','lt','le','gt','ge','ltu','leu','gtu','geu',
-        'feq','fne','fgt','flt','fge','fle'
+        'ltz','lez','gtz','gez',
+        'c1t','c1f'
       );
 
 {*****************************************************************************
@@ -138,17 +135,6 @@ unit cpubase;
       maxfpuregs = 8;
       maxaddrregs = 0;
 
-{*****************************************************************************
-                                Operand Sizes
-*****************************************************************************}
-
-    type
-      topsize = (S_NO,
-        S_B,S_W,S_L,S_BW,S_BL,S_WL,
-        S_IS,S_IL,S_IQ,
-        S_FS,S_FL,S_FX,S_D,S_Q,S_FV,S_FXX
-      );
-
 {*****************************************************************************
                                  Constants
 *****************************************************************************}
@@ -310,10 +296,17 @@ unit cpubase;
 
     function cgsize2subreg(regtype: tregistertype; s:tcgsize):tsubregister;
       begin
-        if s in [OS_64,OS_S64] then
-          cgsize2subreg:=R_SUBQ
+        case regtype of
+          R_FPUREGISTER:
+            if s=OS_F32 then
+              result:=R_SUBFS
+            else if s=OS_F64 then
+              result:=R_SUBFD
+            else
+              internalerror(2013021301);
         else
-          cgsize2subreg:=R_SUBWHOLE;
+          result:=R_SUBWHOLE;
+        end;
       end;
 
 
@@ -337,9 +330,7 @@ unit cpubase;
 
     function is_calljmp(o:tasmop):boolean;
       begin
-        { This isn't 100% perfect because the arm allows jumps also by writing to PC=R15.
-          To overcome this problem we simply forbid that FPC generates jumps by loading R15 }
-        is_calljmp:= o in [A_J,A_JAL,A_JALR,{ A_JALX, }A_JR, A_BA, A_BC, A_BC1T, A_BC1F];
+        is_calljmp:= o in [A_J,A_JAL,A_JALR,{ A_JALX, }A_JR, A_BA, A_BC];
       end;
 
 
@@ -347,12 +338,9 @@ unit cpubase;
       const
         inverse: array[TAsmCond] of TAsmCond=(C_None,
         C_NE, C_EQ, C_GE, C_GT, C_LE, C_LT, C_GEU, C_GTU, C_LEU, C_LTU,
-        C_FNE, 
-        C_FEQ, 
-        C_FLE, 
-        C_FGE, 
-        C_FLT, 
-        C_FGT  
+        C_GEZ, C_GTZ, C_LEZ, C_LTZ,
+        C_COP1FALSE,
+        C_COP1TRUE
         );
       begin
         result := inverse[c];

+ 27 - 22
compiler/mips/cpugas.pas

@@ -53,7 +53,7 @@ unit cpugas;
 
     uses
       cutils, systems, cpuinfo,
-      verbose, itcpugas, cgbase, cgutils;
+      globals, verbose, itcpugas, cgbase, cgutils;
 
     function gas_std_regname(r:Tregister):string;
       var
@@ -67,7 +67,10 @@ unit cpugas;
           R_SUBL, R_SUBW, R_SUBD, R_SUBQ:
            setsubreg(hr, R_SUBD);
         end;
-        result:=std_regname(hr);
+        if getregtype(r)=R_SPECIALREGISTER then
+          result:=tostr(getsupreg(r))
+        else
+          result:=std_regname(hr);
       end;
 
 
@@ -99,7 +102,7 @@ unit cpugas;
          { ABI selection }
          Replace(result,'$ABI','-mabi='+abitypestr[mips_abi]);
          { ARCH selection }
-         Replace(result,'$ARCH','-march='+lower(cputypestr[mips_cpu]));
+         Replace(result,'$ARCH','-march='+lower(cputypestr[current_settings.cputype]));
       end;
 
 {****************************************************************************}
@@ -121,7 +124,10 @@ unit cpugas;
             if assigned(ref.symbol) then
               result:=result+'+';
             result:=result+tostr(ref.offset);
-          end;
+          end
+        { asmreader appears to treat literal numbers as references }
+        else if (ref.symbol=nil) and (ref.base=NR_NO) and (ref.index=NR_NO) then
+          result:='0';
 
         { either base or index may be present, but not both }
         reg:=ref.base;
@@ -232,9 +238,13 @@ unit cpugas;
           (op=A_SEQ) or (op = A_SGE) or (op=A_SGEU) or (op=A_SGT) or
           (op=A_SGTU) or (op=A_SLE) or (op=A_SLEU) or (op=A_SNE)
           { JAL is not here! See comments in TCGMIPS.a_call_name. }
-          or (op=A_LA) or ((op=A_BC) and not (ai.condition in [C_EQ,C_NE])) {or (op=A_JAL)}
+          or (op=A_LA) or ((op=A_BC) and
+            not (ai.condition in [C_EQ,C_NE,C_GTZ,C_GEZ,C_LTZ,C_LEZ,C_COP1TRUE,C_COP1FALSE])) {or (op=A_JAL)}
           or (op=A_REM) or (op=A_REMU)
-          or (op=A_DIV) or (op=A_DIVU)
+          { DIV and DIVU are normally macros, but use $zero as first arg to generate a CPU instruction. }
+          or ((op=A_DIV) or (op=A_DIVU) and
+            ((ai.ops<>3) or (ai.oper[0]^.typ<>top_reg) or (ai.oper[0]^.reg<>NR_R0)))
+          or (op=A_MULO) or (op=A_MULOU)
           { A_LI is only a macro if the immediate is not in thez 16-bit range }
           or (op=A_LI);
       end;
@@ -255,8 +265,7 @@ unit cpugas;
         case op of
           A_P_SET_NOMIPS16:
             begin
-              s := #9 + '.set' + #9 + 'nomips16';
-              owner.AsmWriteLn(s);
+              owner.AsmWriteLn(#9'.set'#9'nomips16');
             end;
           A_P_MASK,
           A_P_FMASK:
@@ -266,37 +275,33 @@ unit cpugas;
             end;
           A_P_SET_MACRO:
             begin
-              s := #9 + '.set' + #9 + 'macro';
-              owner.AsmWriteLn(s);
+              owner.AsmWriteLn(#9'.set'#9'macro');
               TMIPSGNUAssembler(owner).nomacro:=false;
             end;
           A_P_SET_REORDER:
             begin
-              s := #9 + '.set' + #9 + 'reorder';
-              owner.AsmWriteLn(s);
+              owner.AsmWriteLn(#9'.set'#9'reorder');
               TMIPSGNUAssembler(owner).noreorder:=false;
             end;
           A_P_SET_NOMACRO:
             begin
-              s := #9 + '.set' + #9 + 'nomacro';
-              owner.AsmWriteLn(s);
+              owner.AsmWriteLn(#9'.set'#9'nomacro');
               TMIPSGNUAssembler(owner).nomacro:=true;
             end;
           A_P_SET_NOREORDER:
             begin
-              s := #9 + '.set' + #9 + 'noreorder';
-              owner.AsmWriteLn(s);
+              owner.AsmWriteLn(#9'.set'#9'noreorder');
               TMIPSGNUAssembler(owner).noreorder:=true;
             end;
-          A_P_SW:
+          A_P_SET_NOAT:
             begin
-              s := #9 + gas_op2str[A_SW] + #9 + getopstr(taicpu(hp).oper[0]^)+ ',' + getopstr(taicpu(hp).oper[2]^) + '(' + getopstr(taicpu(hp).oper[1]^) + ')';
-              owner.AsmWriteLn(s);
+              owner.AsmWriteln(#9'.set'#9'noat');
+              TMIPSGNUAssembler(owner).noat:=true;
             end;
-          A_P_LW:
+          A_P_SET_AT:
             begin
-              s := #9 + gas_op2str[A_LW] + #9 + getopstr(taicpu(hp).oper[0]^)+ ',' + getopstr(taicpu(hp).oper[2]^) + '(' + getopstr(taicpu(hp).oper[1]^) + ')';
-              owner.AsmWriteLn(s);
+              owner.AsmWriteln(#9'.set'#9'at');
+              TMIPSGNUAssembler(owner).noat:=false;
             end;
           A_LDC1:
             begin

+ 2 - 6
compiler/mips/cpuinfo.pas

@@ -32,9 +32,8 @@ Type
    { possible supported processors for this target }
    tcputype =
       (cpu_none,
-       cpu_mips_default,
        cpu_mips1,
-       cpu_mis2,
+       cpu_mips2,
        cpu_mips3,
        cpu_mips4,
        cpu_mips5,
@@ -80,7 +79,6 @@ Const
      GNU assembler in -arch=XXX option 
      this ilist needs to be uppercased }
    cputypestr : array[tcputype] of string[8] = ('',
-     { cpu_mips_default } 'MIPS2',
      { cpu_mips1        } 'MIPS1',
      { cpu_mips2        } 'MIPS2',
      { cpu_mips3        } 'MIPS3',
@@ -90,8 +88,6 @@ Const
      { cpu_mips32r2     } 'MIPS32R2'
    );
 
-   mips_cpu : tcputype = cpu_mips_default;
-
    fputypestr : array[tfputype] of string[9] = ('',
      'SOFT',
      'FPU_MIPS2','FPU_MIPS3'
@@ -115,7 +111,7 @@ Const
    supported_optimizerswitches = [cs_opt_regvar,cs_opt_loopunroll,cs_opt_nodecse,
                                   cs_opt_reorder_fields,cs_opt_fastmath];
 
-   level1optimizerswitches = [];
+   level1optimizerswitches = [cs_opt_level1];
    level2optimizerswitches = level1optimizerswitches + [cs_opt_regvar,cs_opt_stackframe,cs_opt_nodecse];
    level3optimizerswitches = level2optimizerswitches + [cs_opt_loopunroll];
    level4optimizerswitches = genericlevel4optimizerswitches + level3optimizerswitches + [];

+ 59 - 53
compiler/mips/cpupara.pas

@@ -31,8 +31,6 @@ interface
       symconst,symbase,symsym,symtype,symdef,paramgr,parabase,cgbase,cgutils;
 
     const
-      MIPS_MAX_OFFSET = 20;
-
       { The value below is OK for O32 and N32 calling conventions }
       MIPS_MAX_REGISTERS_USED_IN_CALL = 6;
 
@@ -63,9 +61,6 @@ interface
     type
       tparasupregs = array[0..MIPS_MAX_REGISTERS_USED_IN_CALL-1] of tsuperregister;
       tparasupregsused = array[0..MIPS_MAX_REGISTERS_USED_IN_CALL-1] of boolean;
-      tparasupregsize = array[0..MIPS_MAX_REGISTERS_USED_IN_CALL-1] of tcgsize;
-      tparasuprename = array[0..MIPS_MAX_REGISTERS_USED_IN_CALL-1] of shortstring;
-      tparasupregsoffset = array[0..MIPS_MAX_REGISTERS_USED_IN_CALL-1] of longint;
 
     const
 
@@ -76,13 +71,10 @@ interface
         function  push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;override;
         function  get_volatile_registers_int(calloption : tproccalloption):TCpuRegisterSet;override;
         function  get_volatile_registers_fpu(calloption : tproccalloption):TCpuRegisterSet;override;
-        {Returns a structure giving the information on the storage of the parameter
-        (which must be an integer parameter)
-        @param(nr Parameter number of routine, starting from 1)}
-        procedure getintparaloc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);override;
         function  create_paraloc_info(p : TAbstractProcDef; side: tcallercallee):longint;override;
         function  create_varargs_paraloc_info(p : TAbstractProcDef; varargspara:tvarargsparalist):longint;override;
         function  get_funcretloc(p : tabstractprocdef; side: tcallercallee; forcetempdef: tdef): tcgpara;override;
+        function  param_use_paraloc(const cgpara: tcgpara): boolean; override;
       private
         intparareg,
         intparasize : longint;
@@ -123,41 +115,17 @@ implementation
       end;
 
 
-    procedure TMIPSParaManager.GetIntParaLoc(pd : tabstractprocdef; nr : longint; var cgpara : tcgpara);
+    function TMIPSParaManager.param_use_paraloc(const cgpara: tcgpara): boolean;
       var
-        paraloc : pcgparalocation;
-        def : tdef;
+        paraloc: pcgparalocation;
       begin
-        if nr<1 then
-          InternalError(2002100806);
-        def:=tparavarsym(pd.paras[nr-1]).vardef;
-        cgpara.reset;
-        cgpara.size:=def_cgsize(def);
-        cgpara.intsize:=tcgsize2size[cgpara.size];
-        cgpara.alignment:=std_param_align;
-        cgpara.def:=def;
-        paraloc:=cgpara.add_location;
-        with paraloc^ do
-          begin
-            { MIPS: ABI dependent number of first parameters
-              are passed into registers }
-            dec(nr);
-            if nr<mips_nb_used_registers then
-              begin
-                loc:=LOC_REGISTER;
-                register:=newreg(R_INTREGISTER,parasupregs[nr],R_SUBWHOLE);
-              end
-            else
-              begin
-                { The other parameters are passed on the stack }
-                loc:=LOC_REFERENCE;
-                reference.index:=NR_STACK_POINTER_REG;
-                reference.offset:=nr*mips_sizeof_register_param;
-              end;
-            size:=OS_INT;
-          end;
+        paraloc:=cgpara.location;
+        if not assigned(paraloc) then
+          internalerror(200410102);
+        result:=(paraloc^.loc=LOC_REFERENCE) and (paraloc^.next=nil);
       end;
 
+
     { true if a parameter is too large to copy and only the address is pushed }
     function TMIPSParaManager.push_addr_param(varspez:tvarspez;def : tdef;calloption : tproccalloption) : boolean;
       begin
@@ -225,6 +193,7 @@ implementation
             if retcgsize=OS_F64 then
               setsubreg(paraloc^.register,R_SUBFD);
             paraloc^.size:=retcgsize;
+            paraloc^.def:=result.def;
           end
         else
          { Return in register }
@@ -239,6 +208,7 @@ implementation
                else
                  paraloc^.register:=NR_FUNCTION_RETURN64_LOW_REG;
                paraloc^.size:=OS_32;
+               paraloc^.def:=u32inttype;
                { high }
                paraloc:=result.add_location;
                paraloc^.loc:=LOC_REGISTER;
@@ -247,12 +217,14 @@ implementation
                else
                  paraloc^.register:=NR_FUNCTION_RETURN64_HIGH_REG;
                paraloc^.size:=OS_32;
+               paraloc^.def:=u32inttype;
              end
             else
 {$endif cpu64bitalu}
              begin
                paraloc^.loc:=LOC_REGISTER;
                paraloc^.size:=retcgsize;
+               paraloc^.def:=result.def;
                if side=callerside then
                  paraloc^.register:=newreg(R_INTREGISTER,RS_FUNCTION_RESULT_REG,cgsize2subreg(R_INTREGISTER,retcgsize))
                else
@@ -265,15 +237,18 @@ implementation
     procedure TMIPSParaManager.create_paraloc_info_intern(p : tabstractprocdef; side: tcallercallee;paras:tparalist);
       var
         paraloc      : pcgparalocation;
-        i            : integer;
+        i,j          : integer;
         hp           : tparavarsym;
         paracgsize   : tcgsize;
         paralen      : longint;
-    paradef      : tdef;
-    fpparareg    : integer;
-    reg           : tsuperregister;
-    alignment     : longint;
-    tmp          : longint;
+        locdef       : tdef;
+        paradef      : tdef;
+        fpparareg    : integer;
+        reg          : tsuperregister;
+        alignment    : longint;
+        tmp          : longint;
+        firstparaloc : boolean;
+        reg_and_stack: boolean;
       begin
         fpparareg := 0;
         for i:=0 to paras.count-1 do
@@ -290,6 +265,7 @@ implementation
                 paraloc^.loc:=LOC_REGISTER;
                 paraloc^.register:=NR_R0;
                 paraloc^.size:=OS_ADDR;
+                paraloc^.def:=voidpointertype;
                 break;
               end;
 
@@ -321,6 +297,7 @@ implementation
             //writeln('para: ',hp.Name,' typ=',hp.vardef.typ,' paracgsize=',paracgsize,' align=',hp.vardef.alignment);
             hp.paraloc[side].reset;
             hp.paraloc[side].Alignment:=alignment;
+            locdef:=paradef;
             if (paracgsize=OS_NO) or
               { Ordinals on caller side must be promoted to machine word }
               ((target_info.endian=endian_big) and     // applies to little-endian too?
@@ -329,9 +306,15 @@ implementation
               (paralen<tcgsize2size[OS_INT]))then
               begin
                 if is_signed(paradef) then
-                  paracgsize:=OS_S32
+                  begin
+                    paracgsize:=OS_S32;
+                    locdef:=s32inttype;
+                  end
                 else
-                  paracgsize:=OS_32;
+                  begin
+                    paracgsize:=OS_32;
+                    locdef:=u32inttype;
+                  end;
                 paralen:=align(paralen,4);
               end
             else
@@ -363,6 +346,10 @@ implementation
             if (not(paracgsize in [OS_F32, OS_F64])) or (fpparareg = 2) then
               can_use_float := false;
 
+            firstparaloc:=true;
+            { Is parameter split between stack and registers? }
+            reg_and_stack:=(side=calleeside) and
+              (paralen+intparasize>16) and (intparasize<16);
             while paralen>0 do
               begin
                 paraloc:=hp.paraloc[side].add_location;
@@ -370,9 +357,15 @@ implementation
                 if (paracgsize in [OS_64,OS_S64]) or
                    ((paracgsize in [OS_F32,OS_F64]) and
                      not(can_use_float)) then
-                  paraloc^.size:=OS_32
+                  begin
+                    paraloc^.size:=OS_32;
+                    paraloc^.def:=u32inttype;
+                  end
                 else
-                  paraloc^.size:=paracgsize;
+                  begin
+                    paraloc^.size:=paracgsize;
+                    paraloc^.def:=locdef;
+                  end;
 
                 { ret in param? }
                 if (vo_is_funcret in hp.varoptions) and
@@ -385,11 +378,14 @@ implementation
                     paraloc^.register:=newreg(R_INTREGISTER,parasupregs[0],R_SUBWHOLE);
                     inc(intparasize,align(tcgsize2size[paraloc^.size],sizeof(aint)));
                   end
-                { In case of po_delphi_nested_cc, the parent frame pointer
-                  is always passed on the stack. }
+                { "In case of po_delphi_nested_cc, the parent frame pointer
+                  is always passed on the stack". On other targets it is
+                  used to provide caller-side stack cleanup and prevent stackframe
+                  optimization. For MIPS this does not matter. }
                 else if (intparareg<mips_nb_used_registers) and
+                   (not reg_and_stack) {and
                    (not(vo_is_parentfp in hp.varoptions) or
-                    not(po_delphi_nested_cc in p.procoptions)) then
+                    not(po_delphi_nested_cc in p.procoptions))} then
                   begin
                     if (can_use_float) then
                       begin
@@ -427,6 +423,7 @@ implementation
                          begin
                            paraloc^.shiftval := (sizeof(aint)-tcgsize2size[paraloc^.size])*(-8);
                            paraloc^.size := OS_INT;
+                           paraloc^.def := osuinttype;
                          end;
                        inc(intparareg);
                        inc(intparasize,align(tcgsize2size[paraloc^.size],mips_sizeof_register_param));
@@ -434,8 +431,16 @@ implementation
                   end
                 else
                   begin
+                    if reg_and_stack then
+                      begin
+                        for j:=intparareg to mips_nb_used_registers-1 do
+                          tmipsprocinfo(current_procinfo).register_used[j]:=true;
+                        { all registers used now }
+                        intparareg:=mips_nb_used_registers;
+                      end;
                     paraloc^.loc:=LOC_REFERENCE;
                     paraloc^.size:=int_cgsize(paralen);
+                    paraloc^.def:=get_paraloc_def(locdef,paralen,firstparaloc);
 
                     if side=callerside then
                       begin
@@ -463,6 +468,7 @@ implementation
                     paralen:=0;
                   end;
                 dec(paralen,tcgsize2size[paraloc^.size]);
+                firstparaloc:=false;
               end;
           end;
         { O32 ABI reqires at least 16 bytes }

+ 9 - 16
compiler/mips/cpupi.pas

@@ -41,9 +41,6 @@ interface
       intregssave,
       floatregssave : byte;
       register_used : tparasupregsused;
-      register_size : tparasupregsize;
-      register_name : tparasuprename;
-      register_offset : tparasupregsoffset;
       computed_local_size : longint;
       save_gp_ref: treference;
       //intparareg,
@@ -66,20 +63,12 @@ implementation
       tgobj,paramgr,symconst;
 
     constructor TMIPSProcInfo.create(aparent: tprocinfo);
-      var
-        i : longint;
       begin
         inherited create(aparent);
         { if (cs_generate_stackframes in current_settings.localswitches) or
            not (cs_opt_stackframe in current_settings.optimizerswitches) then }
           include(flags,pi_needs_stackframe);
-        for i:=low(tparasupregs)  to high(tparasupregs) do
-          begin
-            register_used[i]:=false;
-            register_size[i]:=OS_NO;
-            register_name[i]:='invalid';
-            register_offset[i]:=-1;
-          end;
+
         floatregssave:=12; { f20-f31 }
         intregssave:=10;   { r16-r23,r30,r31 }
         computed_local_size:=-1;
@@ -129,10 +118,14 @@ implementation
 
     procedure TMIPSProcInfo.allocate_got_register(list:tasmlist);
       begin
-        if (cs_create_pic in current_settings.moduleswitches) and
-           (pi_needs_got in flags) and
-           not (po_nostackframe in procdef.procoptions) then
-          tg.gettemp(list,sizeof(aint),sizeof(aint),tt_noreuse,save_gp_ref);
+        if (cs_create_pic in current_settings.moduleswitches) then
+          begin
+            if (pi_do_call in flags) then
+              include(flags,pi_needs_got);
+            if (pi_needs_got in flags) and
+               not (po_nostackframe in procdef.procoptions) then
+              tg.gettemp(list,sizeof(aint),sizeof(aint),tt_noreuse,save_gp_ref);
+          end;
       end;
 
 

+ 67 - 0
compiler/mips/hlcgcpu.pas

@@ -38,6 +38,9 @@ uses
   type
     thlcgmips = class(thlcg2ll)
       function a_call_name(list: TAsmList; pd: tprocdef; const s: TSymStr; forceresdef: tdef; weak: boolean): tcgpara; override;
+      procedure a_load_subsetreg_reg(list: TAsmList; subsetsize, tosize: tdef; const sreg: tsubsetregister; destreg: tregister);override;
+    protected
+      procedure a_load_regconst_subsetreg_intern(list: TAsmList; fromsize, subsetsize: tdef; fromreg: tregister; const sreg: tsubsetregister; slopt: tsubsetloadopt); override;
   end;
 
   procedure create_hlcodegen;
@@ -45,11 +48,15 @@ uses
 implementation
 
   uses
+    verbose,
     aasmtai,
+    aasmcpu,
     cutils,
     globals,
+    defutil,
     cgobj,
     cpubase,
+    cpuinfo,
     cgcpu;
 
   function thlcgmips.a_call_name(list: TAsmList; pd: tprocdef; const s: TSymStr; forceresdef: tdef; weak: boolean): tcgpara;
@@ -79,6 +86,66 @@ implementation
     end;
 
 
+  procedure thlcgmips.a_load_subsetreg_reg(list: TAsmList; subsetsize, tosize: tdef; const sreg: tsubsetregister; destreg: tregister);
+    var
+      cgsubsetsize,
+      cgtosize: tcgsize;
+    begin
+      cgsubsetsize:=def_cgsize(subsetsize);
+      cgtosize:=def_cgsize(tosize);
+      if (current_settings.cputype<>cpu_mips32r2) then
+        inherited a_load_subsetreg_reg(list,subsetsize,tosize,sreg,destreg)
+      else if (sreg.bitlen>32) then
+        InternalError(2013070201)
+      else if (sreg.bitlen<>32) then
+        begin
+          list.concat(taicpu.op_reg_reg_const_const(A_EXT,destreg,sreg.subsetreg,
+            sreg.startbit,sreg.bitlen));
+          { types with a negative lower bound are always a base type (8, 16, 32 bits) }
+          if (cgsubsetsize in [OS_S8..OS_S128]) then
+            if ((sreg.bitlen mod 8) = 0) then
+              begin
+                cg.a_load_reg_reg(list,tcgsize2unsigned[cgsubsetsize],cgsubsetsize,destreg,destreg);
+                cg.a_load_reg_reg(list,cgsubsetsize,cgtosize,destreg,destreg);
+              end
+            else
+              begin
+                cg.a_op_const_reg(list,OP_SHL,OS_INT,32-sreg.bitlen,destreg);
+                cg.a_op_const_reg(list,OP_SAR,OS_INT,32-sreg.bitlen,destreg);
+              end;
+        end
+      else
+        cg.a_load_reg_reg(list,cgsubsetsize,cgtosize,sreg.subsetreg,destreg);
+    end;
+
+
+  procedure thlcgmips.a_load_regconst_subsetreg_intern(list: TAsmList; fromsize, subsetsize: tdef; fromreg: tregister; const sreg: tsubsetregister; slopt: tsubsetloadopt);
+    begin
+      if (current_settings.cputype<>cpu_mips32r2) then
+        inherited a_load_regconst_subsetreg_intern(list,fromsize,subsetsize,fromreg,sreg,slopt)
+      else if (sreg.bitlen>32) then
+        InternalError(2013070202)
+      else if (sreg.bitlen<>32) then
+        begin
+          case slopt of
+            SL_SETZERO:
+              fromreg:=NR_R0;
+            SL_SETMAX:
+              begin
+                fromreg:=cg.getintregister(list,OS_INT);
+                cg.a_load_const_reg(list,OS_INT,-1,fromreg);
+              end;
+          end;
+          list.concat(taicpu.op_reg_reg_const_const(A_INS,sreg.subsetreg,fromreg,
+            sreg.startbit,sreg.bitlen));
+        end
+      else if not (slopt in [SL_SETZERO,SL_SETMAX]) then
+        cg.a_load_reg_reg(list,def_cgsize(fromsize),def_cgsize(subsetsize),fromreg,sreg.subsetreg)
+      else
+        inherited a_load_regconst_subsetreg_intern(list,fromsize,subsetsize,fromreg,sreg,slopt);
+    end;
+
+
   procedure create_hlcodegen;
     begin
       hlcg:=thlcgmips.create;

+ 15 - 21
compiler/mips/ncpuadd.pas

@@ -158,30 +158,23 @@ begin
 end;
 
 
+const
+  cmpops: array[boolean] of TOpCmp = (OC_LT,OC_B);
+
 procedure tmipsaddnode.cmp64_lt(left_reg, right_reg: TRegister64;unsigned: boolean);
-var
-  hreg: tregister;
 begin
-  hreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_INT);
-  current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(ops[unsigned], hreg, left_reg.reghi, right_reg.reghi));
-  cg.a_cmp_reg_reg_label(current_asmdata.CurrAsmList,OS_INT,OC_NE,NR_R0,hreg,current_procinfo.CurrTrueLabel);
+  cg.a_cmp_reg_reg_label(current_asmdata.CurrAsmList,OS_INT,cmpops[unsigned],right_reg.reghi,left_reg.reghi,current_procinfo.CurrTrueLabel);
   cg.a_cmp_reg_reg_label(current_asmdata.CurrAsmList,OS_INT,OC_NE,left_reg.reghi,right_reg.reghi,current_procinfo.CurrFalseLabel);
-  current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_SLTU, hreg, left_reg.reglo, right_reg.reglo));
-  cg.a_cmp_reg_reg_label(current_asmdata.CurrAsmList,OS_INT,OC_NE,NR_R0,hreg,current_procinfo.CurrTrueLabel);
+  cg.a_cmp_reg_reg_label(current_asmdata.CurrAsmList,OS_INT,OC_B,right_reg.reglo,left_reg.reglo,current_procinfo.CurrTrueLabel);
   cg.a_jmp_always(current_asmdata.CurrAsmList,current_procinfo.CurrFalseLabel);
 end;
 
 
 procedure tmipsaddnode.cmp64_le(left_reg, right_reg: TRegister64;unsigned: boolean);
-var
-  hreg: TRegister;
 begin
-  hreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_INT);
-  current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(ops[unsigned], hreg, right_reg.reghi, left_reg.reghi));
-  cg.a_cmp_reg_reg_label(current_asmdata.CurrAsmList,OS_INT,OC_NE,NR_R0,hreg,current_procinfo.CurrFalseLabel);
+  cg.a_cmp_reg_reg_label(current_asmdata.CurrAsmList,OS_INT,cmpops[unsigned],left_reg.reghi,right_reg.reghi,current_procinfo.CurrFalseLabel);
   cg.a_cmp_reg_reg_label(current_asmdata.CurrAsmList,OS_INT,OC_NE,left_reg.reghi,right_reg.reghi,current_procinfo.CurrTrueLabel);
-  current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_SLTU, hreg, right_reg.reglo, left_reg.reglo));
-  cg.a_cmp_reg_reg_label(current_asmdata.CurrAsmList,OS_INT,OC_NE,NR_R0,hreg,current_procinfo.CurrFalseLabel);
+  cg.a_cmp_reg_reg_label(current_asmdata.CurrAsmList,OS_INT,OC_B,left_reg.reglo,right_reg.reglo,current_procinfo.CurrFalseLabel);
   cg.a_jmp_always(current_asmdata.CurrAsmList,current_procinfo.CurrTrueLabel);
 end;
 
@@ -343,8 +336,9 @@ const
 
 procedure tmipsaddnode.second_cmpfloat;
 var
-  op,op2: tasmop;
+  op: tasmop;
   lreg,rreg: tregister;
+  ai: Taicpu;
 begin
   pass_left_right;
   if nf_swapped in flags then
@@ -356,11 +350,6 @@ begin
 
   op:=ops_cmpfloat[left.location.size=OS_F64,nodetype];
 
-  if (nodetype=unequaln) then
-    op2:=A_BC1F
-  else
-    op2:=A_BC1T;
-
   if (nodetype in [gtn,gten]) then
     begin
       lreg:=right.location.register;
@@ -373,7 +362,12 @@ begin
     end;
 
   current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,lreg,rreg));
-  current_asmdata.CurrAsmList.concat(Taicpu.op_sym(op2,current_procinfo.CurrTrueLabel));
+  ai:=taicpu.op_sym(A_BC,current_procinfo.CurrTrueLabel);
+  if (nodetype=unequaln) then
+    ai.SetCondition(C_COP1FALSE)
+  else
+    ai.SetCondition(C_COP1TRUE);
+  current_asmdata.CurrAsmList.concat(ai);
   current_asmdata.CurrAsmList.concat(TAiCpu.Op_none(A_NOP));
   cg.a_jmp_always(current_asmdata.CurrAsmList,current_procinfo.CurrFalseLabel);
 end;

+ 5 - 3
compiler/mips/ncpucnv.pas

@@ -71,8 +71,6 @@ uses
 *****************************************************************************}
 
 function tmipseltypeconvnode.first_int_to_real: tnode;
-var
-  fname: string[19];
 begin
   { converting a 64bit integer to a float requires a helper }
   if is_64bitint(left.resultdef) or
@@ -87,7 +85,11 @@ begin
       if is_signed(left.resultdef) then
         inserttypeconv(left,s32inttype)
       else
-        inserttypeconv(left,u32inttype);
+        begin
+          inserttypeconv(left,u32inttype);
+          if (cs_create_pic in current_settings.moduleswitches) then
+            include(current_procinfo.flags,pi_needs_got);
+        end;
       firstpass(left);
     end;
   result := nil;

+ 0 - 7
compiler/mips/ncpuld.pas

@@ -31,7 +31,6 @@ uses
 type
   tmipsloadnode = class(tcgloadnode)
     function pass_1 : tnode; override;
-    procedure generate_picvaraccess; override;
   end;
 
 implementation
@@ -59,12 +58,6 @@ begin
   end;
 end;
 
-procedure tmipsloadnode.generate_picvaraccess;
-begin
-  location.reference.base:=current_procinfo.got;
-  location.reference.refaddr:=addr_pic;
-  location.reference.symbol:=current_asmdata.RefAsmSymbol(tstaticvarsym(symtableentry).mangledname);
-end;
 
 begin
   cloadnode := tmipsloadnode;

+ 39 - 16
compiler/mips/ncpumat.pas

@@ -67,10 +67,14 @@ uses
                              TMipselMODDIVNODE
 *****************************************************************************}
 
+const
+  ops_div: array[boolean] of tasmop = (A_DIVU, A_DIV);
+
 procedure tMIPSELmoddivnode.pass_generate_code;
 var
   power: longint;
   tmpreg, numerator, divider, resultreg: tregister;
+  hl,hl2: tasmlabel;
 begin
   secondpass(left);
   secondpass(right);
@@ -112,25 +116,44 @@ begin
       right.resultdef, right.resultdef, True);
     divider := right.location.Register;
 
-
-    if (nodetype = modn) then
+    { GAS performs division in delay slot:
+
+          bne   denom,$zero,.L1
+          div   $zero,numerator,denom
+          break 7
+     .L1:
+          mflo  result
+
+      We can't yet do the same without prior fixing the spilling code:
+      if registers require spilling, loads can be inserted before 'div',
+      resulting in invalid code.
+    }
+    current_asmdata.CurrAsmList.Concat(taicpu.op_reg_reg_reg(ops_div[is_signed(resultdef)],NR_R0,numerator,divider));
+    { Check for zero denominator, omit if dividing by constant (constants are checked earlier) }
+    if (right.nodetype<>ordconstn) then
     begin
-      if is_signed(right.resultdef) then
-      begin
-        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_REM, resultreg, numerator, divider));
-      end
-      else
-        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_REMU, resultreg, numerator, divider));
-    end
-    else
+      current_asmdata.getjumplabel(hl);
+      cg.a_cmp_reg_reg_label(current_asmdata.CurrAsmList,OS_INT,OC_NE,divider,NR_R0,hl);
+      current_asmdata.CurrAsmList.Concat(taicpu.op_const(A_BREAK,7));
+      cg.a_label(current_asmdata.CurrAsmList,hl);
+    end;
+
+    { Dividing low(longint) by -1 will overflow }
+    if is_signed(right.resultdef) and (cs_check_overflow in current_settings.localswitches) then
     begin
-      if is_signed({left.resultdef}right.resultdef) then
-      begin
-        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_DIV, resultreg, numerator, divider));
-      end
-      else
-        current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_DIVU, resultreg, numerator, divider));
+      current_asmdata.getjumplabel(hl2);
+      current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_const(A_ADDIU,NR_R1,NR_R0,-1));
+      cg.a_cmp_reg_reg_label(current_asmdata.CurrAsmList,OS_INT,OC_NE,divider,NR_R1,hl2);
+      current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_LUI,NR_R1,$8000));
+      cg.a_cmp_reg_reg_label(current_asmdata.CurrAsmList,OS_INT,OC_NE,numerator,NR_R1,hl2);
+      current_asmdata.CurrAsmList.concat(taicpu.op_const(A_BREAK,6));
+      cg.a_label(current_asmdata.CurrAsmList,hl2);
     end;
+
+   if (nodetype=modn) then
+     current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_MFHI,resultreg))
+   else
+     current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_MFLO,resultreg));
   end;
   { set result location }
   location.loc      := LOC_REGISTER;

+ 2 - 9
compiler/mips/ncpuset.pas

@@ -104,22 +104,15 @@ begin
   { create reference }
   reference_reset_symbol(href, table, 0, sizeof(aint));
   href.offset := (-aint(min_)) * 4;
-  basereg     := cg.getaddressregister(current_asmdata.CurrAsmList);
-  cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList, href, basereg);
-
+  href.base:=indexreg;
   jmpreg := cg.getaddressregister(current_asmdata.CurrAsmList);
-
-  reference_reset(href, sizeof(aint));
-  href.index := indexreg;
-  href.base  := basereg;
   cg.a_load_ref_reg(current_asmdata.CurrAsmList, OS_ADDR, OS_ADDR, href, jmpreg);
 
   current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_JR, jmpreg));
   { Delay slot }
   current_asmdata.CurrAsmList.concat(taicpu.op_none(A_NOP));
   { generate jump table }
-  if not(cs_opt_size in current_settings.optimizerswitches) then
-    jumpSegment.concat(Tai_Align.Create_Op(4, 0));
+  new_section(jumpSegment,sec_rodata,current_procinfo.procdef.mangledname,sizeof(aint));
   jumpSegment.concat(Tai_label.Create(table));
   last := min_;
   genitem(hp);

+ 10 - 28
compiler/mips/opcode.inc

@@ -1,18 +1,17 @@
 A_NONE,
-A_P_LW,
 A_P_SET_NOMIPS16,
 A_P_SET_NOREORDER,
 A_P_SET_NOMACRO,
 A_P_SET_MACRO,
 A_P_SET_REORDER,
+A_P_SET_NOAT,
+A_P_SET_AT,
 A_P_FRAME,
 A_P_MASK,
 A_P_FMASK,
-A_P_SW,
 A_P_CPLOAD,
 A_P_CPRESTORE,
 A_P_CPADD,
-A_SPARC8UNIMP,
 A_NOP,
 A_NOT,
 A_NEG,
@@ -95,18 +94,6 @@ A_MFHI,
 A_MTHI,
 A_MFLO,
 A_MTLO,
-A_MULTG,
-A_DMULTG,
-A_MULTUG,
-A_DMULTUG,
-A_DIVG,
-A_DDIVG,
-A_DIVUG,
-A_DDIVUG,
-A_MODG,
-A_DMODG,
-A_MODUG,
-A_DMODUG,
 A_J,
 A_JAL,
 A_JR,
@@ -178,8 +165,6 @@ A_FLOOR_W_S,
 A_FLOOR_W_D,
 A_FLOOR_L_S,
 A_FLOOR_L_D,
-A_BC1T,
-A_BC1F,
 A_C_EQ_D,
 A_C_EQ_S,
 A_C_LE_D,
@@ -195,15 +180,12 @@ A_SLE,
 A_SLEU,
 A_SNE,
 A_SYSCALL,
-A_ADD64SUB,
-A_SUB64SUB,
-A_MUL64SUB,
-A_DIV64SUB,
-A_NEG64SUB,
-A_NOT64SUB,
-A_OR64SUB,
-A_SAR64SUB,
-A_SHL64SUB,
-A_SHR64SUB,
-A_XOR64SUB,
+A_BREAK,
+A_EHB,
+A_EXT,
+A_INS,
+A_MFC0,
+A_MTC0,
+A_SDBBP,
+A_WRPGPR,
 A_END_DEF

+ 30 - 59
compiler/mips/racpugas.pas

@@ -35,7 +35,6 @@ Interface
       procedure BuildReference(oper : TOperand);
       procedure BuildOperand(oper : TOperand);
       procedure BuildOpCode(instr : TInstruction);
-      procedure ReadPercent(oper : TOperand);
       procedure ReadSym(oper : TOperand);
       procedure ConvertCalljmp(instr : TInstruction);
       procedure handlepercent;override;
@@ -60,6 +59,7 @@ Interface
       scanner,
       procinfo,
       rabase,
+      rgbase,
       itcpugas,
       cgbase,cgobj
       ;
@@ -100,52 +100,28 @@ Interface
       begin
         Inherited handledollar;
         if (c in ['0'..'9','a'..'z']) then
-      begin
+          begin
             Consume(AS_DOLLAR);
             if (actasmtoken=AS_INTNUM) or (actasmtoken=AS_ID) then
-           begin
-             { Try to convert to std register }
-                  if actasmtoken=AS_INTNUM then
-                    actasmregister:=gas_regnum_search('$'+actasmpattern)
-                  else
-                    begin
-                      { AS_ID is uppercased by default but register names
-                        are lowercase }
-                      actasmpattern:=lower(actasmpattern);
-                      actasmregister:=gas_regnum_search(actasmpattern);
-                      if actasmregister=NR_NO then
-                        actasmregister:=std_regnum_search(actasmpattern);
-                    end;
-                  if actasmregister<>NR_NO then
-                    begin
-                      // Consume(actasmtoken);
-                      actasmtoken:=AS_REGISTER;
-                    end;
-              end;
-          end;
-      end;
-
-    procedure TMipsReader.ReadPercent(oper : TOperand);
-      begin
-        { check for ...@ }
-        if actasmtoken=AS_AT then
-          begin
-            if (oper.opr.ref.symbol=nil) and
-               (oper.opr.ref.offset = 0) then
-              Message(asmr_e_invalid_reference_syntax);
-            Consume(AS_AT);
-            if actasmtoken=AS_ID then
               begin
-                if upper(actasmpattern)='LO' then
-                  oper.opr.ref.refaddr:=addr_low
-                else if upper(actasmpattern)='HI' then
-                  oper.opr.ref.refaddr:=addr_high
+                { Try to convert to std register }
+                if actasmtoken=AS_INTNUM then
+                  actasmregister:=gas_regnum_search('$'+actasmpattern)
                 else
-                  Message(asmr_e_invalid_reference_syntax);
-                Consume(AS_ID);
-              end
-            else
-              Message(asmr_e_invalid_reference_syntax);
+                  begin
+                    { AS_ID is uppercased by default but register names
+                      are lowercase }
+                    actasmpattern:=lower(actasmpattern);
+                    actasmregister:=gas_regnum_search(actasmpattern);
+                    if actasmregister=NR_NO then
+                      actasmregister:=std_regnum_search(actasmpattern);
+                  end;
+                if actasmregister<>NR_NO then
+                  begin
+                    // Consume(actasmtoken);
+                    actasmtoken:=AS_REGISTER;
+                  end;
+              end;
           end;
       end;
 
@@ -154,13 +130,11 @@ Interface
       var
         l : aint;
         regs : byte;
-        opening : TAsmToken;
         hasimm : boolean;
       begin
         oper.initref;
         regs:=0;
         hasimm:=false;
-        opening:=ActAsmToken;
         Consume(ActAsmToken);
         repeat
           Case actasmtoken of
@@ -203,15 +177,11 @@ Interface
                 inc(oper.opr.ref.offset,l);
               End;
 
-            AS_RPAREN,
-            AS_RBRACKET:
+            AS_RPAREN:
               begin
                 if (regs=0) and (not hasimm) then
                   Message(asmr_e_invalid_reference_syntax);
-                if opening=AS_LPAREN then
-                  Consume(AS_RPAREN)
-                else
-                  Consume(AS_RBRACKET);
+                Consume(AS_RPAREN);
                 break;
               end;
 
@@ -242,14 +212,12 @@ Interface
           end;
          actasmpattern[0]:=chr(len);
          uppervar(actasmpattern);
-         if is_register(actasmpattern) then
-           exit;
          if (actasmpattern='%HI') then
            actasmtoken:=AS_HI
          else if (actasmpattern='%LO')then
            actasmtoken:=AS_LO
          else
-           Message(asmr_e_invalid_register);
+           Message(asmr_e_invalid_reference_syntax);
       end;
 
 
@@ -376,7 +344,6 @@ Interface
                   negative:=(prevasmtoken=AS_MINUS);
               end;
 
-            AS_LBRACKET,
             AS_LPAREN :
               begin
                 { memory reference }
@@ -459,9 +426,7 @@ Interface
                        end
                       else
                        begin
-                         if oper.SetupVar(expr,false) then
-                           ReadPercent(oper)
-                         else
+                         if not oper.SetupVar(expr,false) then
                           Begin
                             { look for special symbols ... }
                             if expr= '__HIGH' then
@@ -680,7 +645,13 @@ Interface
           begin
             condition := actcondition;
             if is_calljmp(opcode) then
-            ConvertCalljmp(instr);
+              ConvertCalljmp(instr);
+            if (opcode in [A_MTC0,A_MFC0]) then
+              begin
+                if (ops<2) or (operands[2].opr.typ<>OPR_REGISTER) then
+                  message(asmr_e_syn_operand);
+                operands[2].opr.reg:=newreg(R_SPECIALREGISTER,getsupreg(operands[2].opr.reg),R_SUBNONE);
+              end;
             ConcatInstruction(curlist);
             Free;
           end;

+ 58 - 0
compiler/mips/rgcpu.pas

@@ -38,8 +38,12 @@ unit rgcpu;
         function get_spill_subreg(r : tregister) : tsubregister;override;
         procedure do_spill_read(list:tasmlist;pos:tai;const spilltemp:treference;tempreg:tregister);override;
         procedure do_spill_written(list:tasmlist;pos:tai;const spilltemp:treference;tempreg:tregister);override;
+        function do_spill_replace(list:TAsmList;instr:taicpu;orgreg:tsuperregister;const spilltemp:treference):boolean;override;
       end;
 
+      trgintcpu=class(trgcpu)
+        procedure add_cpu_interferences(p:tai);override;
+      end;
 
 implementation
 
@@ -152,4 +156,58 @@ implementation
           inherited do_spill_written(list,pos,spilltemp,tempreg);
     end;
 
+
+    function trgcpu.do_spill_replace(list:TAsmList;instr:taicpu;orgreg:tsuperregister;const spilltemp:treference):boolean;
+      begin
+        result:=false;
+        { Replace 'move  orgreg,src' with 'sw  src,spilltemp'
+              and 'move  dst,orgreg' with 'lw  dst,spilltemp' }
+        { TODO: A_MOV_S and A_MOV_D for float registers are also replaceable }
+        if (instr.opcode<>A_MOVE) or (abs(spilltemp.offset)>32767) then
+          exit;
+        if (instr.ops<>2) or
+           (instr.oper[0]^.typ<>top_reg) or
+           (instr.oper[1]^.typ<>top_reg) or
+           (getregtype(instr.oper[0]^.reg)<>regtype) or
+           (getregtype(instr.oper[1]^.reg)<>regtype) then
+          InternalError(2013061001);
+        if get_alias(getsupreg(instr.oper[1]^.reg))=orgreg then
+          begin
+            instr.opcode:=A_LW;
+          end
+        else if get_alias(getsupreg(instr.oper[0]^.reg))=orgreg then
+          begin
+            instr.opcode:=A_SW;
+            instr.oper[0]^:=instr.oper[1]^;
+          end
+        else
+          InternalError(2013061002);
+        instr.oper[1]^.typ:=top_ref;
+        new(instr.oper[1]^.ref);
+        instr.oper[1]^.ref^:=spilltemp;
+        result:=true;
+      end;
+
+
+    procedure trgintcpu.add_cpu_interferences(p: tai);
+      var
+        supreg: tsuperregister;
+      begin
+        if p.typ<>ait_instruction then
+          exit;
+        if (taicpu(p).ops>=1) and (taicpu(p).oper[0]^.typ=top_reg) and
+          (getregtype(taicpu(p).oper[0]^.reg)=regtype) and
+          (taicpu(p).spilling_get_operation_type(0) in [operand_write,operand_readwrite]) then
+          begin
+            { prevent merging registers with frame/stack pointer, $zero and $at
+              if an instruction writes to the register }
+            supreg:=getsupreg(taicpu(p).oper[0]^.reg);
+            add_edge(supreg,RS_STACK_POINTER_REG);
+            add_edge(supreg,RS_FRAME_POINTER_REG);
+            add_edge(supreg,RS_R0);
+            add_edge(supreg,RS_R1);
+          end;
+      end;
+
+
 end.

+ 10 - 28
compiler/mips/strinst.inc

@@ -1,18 +1,17 @@
 'none',
-'p_lw',
 'p_set_nomips16',
 'p_set_noreorder',
 'p_set_nomacro',
 'p_set_macro',
 'p_set_reorder',
+'p_set_noat',
+'p_set_at',
 '.frame',
 '.mask',
 '.fmask',
-'p_sw',
 '.cpload',
 '.cprestore',
 '.cpadd',
-'sparc8unimp',
 'nop',
 'not',
 'neg',
@@ -95,18 +94,6 @@
 'mthi',
 'mflo',
 'mtlo',
-'multg',
-'dmultg',
-'multug',
-'dmultug',
-'divg',
-'ddivg',
-'divug',
-'ddivug',
-'modg',
-'dmodg',
-'modug',
-'dmodug',
 'j',
 'jal',
 'jr',
@@ -178,8 +165,6 @@
 'floor.w.d',
 'floor.l.s',
 'floor.l.d',
-'bc1t',
-'bc1f',
 'c.eq.d',
 'c.eq.s',
 'c.le.d',
@@ -195,15 +180,12 @@
 'sleu',
 'sne',
 'syscall',
-'add64sub',
-'sub64sub',
-'mul64sub',
-'div64sub',
-'neg64sub',
-'not64sub',
-'or64sub',
-'sar64sub',
-'shl64sub',
-'shr64sub',
-'xor64sub',
+'break',
+'ehb',
+'ext',
+'ins',
+'mfc0',
+'mtc0',
+'sdbbp',
+'wrpgpr',
 'end_def'

+ 25 - 6
compiler/msg/errord.msg

@@ -3,7 +3,7 @@
 #   Latest updates contributed by Karl-Michael Schindler aka mischi
 #   <karl-michael.schindler at web.de>
 #
-#   Based on errore.msg of SVN revision 23596
+#   Based on errore.msg of SVN revision 24910 + 1
 #
 #   This file is part of the Free Pascal Compiler
 #   Copyright (c) 1998-2013 by the Free Pascal Development team
@@ -143,7 +143,7 @@ general_f_oserror=01025_F_Betriebsystemfehler: $1
 #
 # Scanner
 #
-# 02092 is the last used one
+# 02094 is the last used one
 #
 % \section{Scanner messages.}
 % This section lists the messages that the scanner emits. The scanner takes
@@ -395,6 +395,12 @@ scan_w_unavailable_system_codepage=02091_W_Die aktuelle System-Codepage "$1" ste
 % the compiler with support for this codepage.
 scan_w_setpeoptflags_not_support=02092_W_SETPEOPTFLAGS wird f�r das Ziel-OS nicht unterst�tzt
 % The \var{\{\$SETPEOPTFLAGS\}} directive is not supported by the target OS.
+scan_e_illegal_peflag=02093_E_Ung�ltiges Argument f�r SETPEFLAGS
+% The given argument for SETPEFLAGS is neither a correct named value nor an
+% ordinal value
+scan_e_illegal_peoptflag=02094_E_Ung�ltiges Argument f�r SETPEOPTFLAGS
+% The given argument for SETPEOPTFLAGS is neither a correct named value nor an
+% ordinal value
 %
 % \end{description}
 # EndOfTeX
@@ -402,7 +408,7 @@ scan_w_setpeoptflags_not_support=02092_W_SETPEOPTFLAGS wird f
 #
 # Parser
 #
-# 03332 is the last used one
+# 03334 is the last used one
 #
 % \section{Parser messages}
 % This section lists all parser messages. The parser takes care of the
@@ -1459,7 +1465,7 @@ parser_e_jvm_invalid_virtual_constructor_call=03321_E_Einen virtuellen Konstrukt
 % for the current instance inside another constructor.
 parser_e_method_lower_visibility=03322_E_Die �berschreibende Method "$1" kann keine niedrigere Sichtbarkeit ($2) haben als in der Elternklasse $3 ($4)
 % The JVM does not allow lowering the visibility of an overriding method.
-parser_w_nostackframe_without_assembler=03323_W_Prozedur/Funktion  mit der Aufrufoption NOSTACKFRAME aber ohne ASSEMBLER deklariert 
+parser_e_nostackframe_without_assembler=03323_E_Prozedur/Funktion  mit der Aufrufoption NOSTACKFRAME aber ohne ASSEMBLER deklariert 
 % nostackframe call modifier is supposed to be used in conjunction with assembler.
 parser_e_nostackframe_with_locals=03324_E_Prozedur/Funktion  mit der Aufrufoption NOSTACKFRAME deklariert, aber die GrӇe des lokalen Stack ist $1
 % nostackframe call modifier used without assembler modifier
@@ -1498,6 +1504,11 @@ parser_e_not_allowed_in_record=03332_E_Sichtbarkeits-Abschnitt "$1" ist in Recor
 % The visibility sections \var(protected) and \var(strict protected) are only
 % useful together with inheritance. Since records do not support that they are
 % forbidden.
+parser_e_proc_dir_not_allowed=03333_E_Prozedurdirektive "$1" ist hier nicht erlaubt
+% This procedure directive is not allowed in the given context. E.g. "static"
+% is not allowed for instance methods or class operators.
+parser_e_no_assembler_in_generic=03334_E_Assemblerbl”cke sind innerhalb von "generics" nicht erlaubt
+% The use of assembler blocks/routines is not allowed inside generics.
 %
 % \end{description}
 # EndOfTeX
@@ -1928,7 +1939,7 @@ type_e_invalid_default_value=04119_E_Der Defaultwert f
 % Parameters declared as structured types, such as files, variants, non-dynamic
 % arrays and TP-style objects, cannot have a default value.
 type_e_type_not_allowed_for_type_helper=04120_E_Typ "$1" kann durch einen Typhelfer nicht erweitert werden
-% Types like procedural variables can not be extended by type helpers
+% Types like procedural variables cannot be extended by type helpers
 %
 % \end{description}
 # EndOfTeX
@@ -2710,7 +2721,7 @@ asmw_e_handlerdata_no_handler=08025_E_".seh_handlerdata"-Direktive ohne vorangeh
 #
 # Executing linker/assembler
 #
-# 09033 is the last used one
+# 09034 is the last used one
 #
 # BeginOfTeX
 %
@@ -2802,6 +2813,9 @@ exec_e_cant_write_resource_file=09032_E_Kann die Resourcedatei "$1" nicht schrei
 % An error occurred resource file cannot be written.
 exec_n_backquote_cat_file_not_found=09033_N_Die Datei "$1" wurde mit dem Kommando cat nicht gefunden
 % The compiler did not find the file that should be expanded into linker parameters
+exec_w_init_file_not_found=09034_W_"$1" nicht gefunden; dies wird wahrscheinlich zu einem Fehler beim Linken f�hren
+% The compiler adds certain startup code files to the linker only when they are found.
+% If they are not found, they are not added and this might cause a linking failure.
 %
 % \end{description}
 # EndOfTeX
@@ -3346,6 +3360,7 @@ diskutiert werden k
 #    3 = 80x86 targets
 #    4 = x86_64
 #    6 = 680x0 targets
+#    8 = 8086 (16-bit) targets
 #    A = ARM
 #    e = in extended debug mode only
 #    F = help for the 'fpc' binary (independent of the target compiler)
@@ -3641,6 +3656,10 @@ p*2Wi_Benutze interne Resourcen (Darwin)
 3*2WI_Die Verwendung der "import"-Abschnitte ein/ausschalten (Windows)
 4*2WI_Die Verwendung der "import"-Abschnitte ein/ausschalten (Windows)
 A*2WI_Die Verwendung der "import"-Abschnitte ein/ausschalten (Windows)
+8*2Wm<x>_Setze Speichermodell
+8*3WmTiny_Winziges (tiny) Speichermodell
+8*3WmSmall_Kleines (small) Speichermodell (Voreinstellung)
+8*3WmMedium_Mittleres (medium) Speichermodell
 3*2WM<x>_Minimale Mac OS X Deployment Version: 10.4, 10.5.1, ... (Darwin)
 4*2WM<x>_Minimale Mac OS X Deployment Version: 10.4, 10.5.1, ... (Darwin)
 p*2WM<x>_Minimale Mac OS X Deployment Version: 10.4, 10.5.1, ... (Darwin)

+ 25 - 7
compiler/msg/errordu.msg

@@ -3,7 +3,7 @@
 #   Latest updates contributed by Karl-Michael Schindler aka mischi
 #   <karl-michael.schindler at web.de>
 #
-#   Based on errore.msg of SVN revision 23596
+#   Based on errore.msg of SVN revision 24910 + 1
 #
 #   This file is part of the Free Pascal Compiler
 #   Copyright (c) 1998-2013 by the Free Pascal Development team
@@ -143,7 +143,7 @@ general_f_oserror=01025_F_Betriebsystemfehler: $1
 #
 # Scanner
 #
-# 02092 is the last used one
+# 02094 is the last used one
 #
 % \section{Scanner messages.}
 % This section lists the messages that the scanner emits. The scanner takes
@@ -395,7 +395,12 @@ scan_w_unavailable_system_codepage=02091_W_Die aktuelle System-Codepage "$1" ste
 % the compiler with support for this codepage.
 scan_w_setpeoptflags_not_support=02092_W_SETPEOPTFLAGS wird für das Ziel-OS nicht unterstützt
 % The \var{\{\$SETPEOPTFLAGS\}} directive is not supported by the target OS.
-%
+scan_e_illegal_peflag=02093_E_Ungültiges Argument für SETPEFLAGS
+% The given argument for SETPEFLAGS is neither a correct named value nor an
+% ordinal value
+scan_e_illegal_peoptflag=02094_E_Ungültiges Argument für SETPEOPTFLAGS
+% The given argument for SETPEOPTFLAGS is neither a correct named value nor an
+% ordinal value
 %
 % \end{description}
 # EndOfTeX
@@ -403,7 +408,7 @@ scan_w_setpeoptflags_not_support=02092_W_SETPEOPTFLAGS wird für das Ziel-OS nic
 #
 # Parser
 #
-# 03332 is the last used one
+# 03334 is the last used one
 #
 % \section{Parser messages}
 % This section lists all parser messages. The parser takes care of the
@@ -1460,7 +1465,7 @@ parser_e_jvm_invalid_virtual_constructor_call=03321_E_Einen virtuellen Konstrukt
 % for the current instance inside another constructor.
 parser_e_method_lower_visibility=03322_E_Die überschreibende Method "$1" kann keine niedrigere Sichtbarkeit ($2) haben als in der Elternklasse $3 ($4)
 % The JVM does not allow lowering the visibility of an overriding method.
-parser_w_nostackframe_without_assembler=03323_W_Prozedur/Funktion  mit der Aufrufoption NOSTACKFRAME aber ohne ASSEMBLER deklariert 
+parser_e_nostackframe_without_assembler=03323_E_Prozedur/Funktion  mit der Aufrufoption NOSTACKFRAME aber ohne ASSEMBLER deklariert
 % nostackframe call modifier is supposed to be used in conjunction with assembler.
 parser_e_nostackframe_with_locals=03324_E_Prozedur/Funktion  mit der Aufrufoption NOSTACKFRAME deklariert, aber die Größe des lokalen Stack ist $1
 % nostackframe call modifier used without assembler modifier
@@ -1499,6 +1504,11 @@ parser_e_not_allowed_in_record=03332_E_Sichtbarkeits-Abschnitt "$1" ist in Recor
 % The visibility sections \var(protected) and \var(strict protected) are only
 % useful together with inheritance. Since records do not support that they are
 % forbidden.
+parser_e_proc_dir_not_allowed=03333_E_Prozedurdirektive "$1" ist hier nicht erlaubt
+% This procedure directive is not allowed in the given context. E.g. "static"
+% is not allowed for instance methods or class operators.
+parser_e_no_assembler_in_generic=03334_E_Assemblerblöcke sind innerhalb von "generics" nicht erlaubt
+% The use of assembler blocks/routines is not allowed inside generics.
 %
 % \end{description}
 # EndOfTeX
@@ -1929,7 +1939,7 @@ type_e_invalid_default_value=04119_E_Der Defaultwert für einen Parameter des Ty
 % Parameters declared as structured types, such as files, variants, non-dynamic
 % arrays and TP-style objects, cannot have a default value.
 type_e_type_not_allowed_for_type_helper=04120_E_Typ "$1" kann durch einen Typhelfer nicht erweitert werden
-% Types like procedural variables can not be extended by type helpers
+% Types like procedural variables cannot be extended by type helpers
 %
 % \end{description}
 # EndOfTeX
@@ -2711,7 +2721,7 @@ asmw_e_handlerdata_no_handler=08025_E_".seh_handlerdata"-Direktive ohne vorangeh
 #
 # Executing linker/assembler
 #
-# 09033 is the last used one
+# 09034 is the last used one
 #
 # BeginOfTeX
 %
@@ -2803,6 +2813,9 @@ exec_e_cant_write_resource_file=09032_E_Kann die Resourcedatei "$1" nicht schrei
 % An error occurred resource file cannot be written.
 exec_n_backquote_cat_file_not_found=09033_N_Die Datei "$1" wurde mit dem Kommando cat nicht gefunden
 % The compiler did not find the file that should be expanded into linker parameters
+exec_w_init_file_not_found=09034_W_"$1" nicht gefunden; dies wird wahrscheinlich zu einem Fehler beim Linken führen
+% The compiler adds certain startup code files to the linker only when they are found.
+% If they are not found, they are not added and this might cause a linking failure.
 %
 % \end{description}
 # EndOfTeX
@@ -3347,6 +3360,7 @@ diskutiert werden können, usw.):
 #    3 = 80x86 targets
 #    4 = x86_64
 #    6 = 680x0 targets
+#    8 = 8086 (16-bit) targets
 #    A = ARM
 #    e = in extended debug mode only
 #    F = help for the 'fpc' binary (independent of the target compiler)
@@ -3642,6 +3656,10 @@ p*2Wi_Benutze interne Resourcen (Darwin)
 3*2WI_Die Verwendung der "import"-Abschnitte ein/ausschalten (Windows)
 4*2WI_Die Verwendung der "import"-Abschnitte ein/ausschalten (Windows)
 A*2WI_Die Verwendung der "import"-Abschnitte ein/ausschalten (Windows)
+8*2Wm<x>_Setze Speichermodell
+8*3WmTiny_Winziges (tiny) Speichermodell
+8*3WmSmall_Kleines (small) Speichermodell (Voreinstellung)
+8*3WmMedium_Mittleres (medium) Speichermodell
 3*2WM<x>_Minimale Mac OS X Deployment Version: 10.4, 10.5.1, ... (Darwin)
 4*2WM<x>_Minimale Mac OS X Deployment Version: 10.4, 10.5.1, ... (Darwin)
 p*2WM<x>_Minimale Mac OS X Deployment Version: 10.4, 10.5.1, ... (Darwin)

Beberapa file tidak ditampilkan karena terlalu banyak file yang berubah dalam diff ini