nx86mat.pas 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. Generate x86 code for math nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86mat;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,nmat,ncgmat;
  22. type
  23. tx86unaryminusnode = class(tcgunaryminusnode)
  24. {$ifdef SUPPORT_MMX}
  25. procedure second_mmx;override;
  26. {$endif SUPPORT_MMX}
  27. procedure second_float;override;
  28. function pass_1:tnode;override;
  29. end;
  30. tx86notnode = class(tcgnotnode)
  31. procedure second_boolean;override;
  32. {$ifdef SUPPORT_MMX}
  33. procedure second_mmx;override;
  34. {$endif SUPPORT_MMX}
  35. end;
  36. implementation
  37. uses
  38. globtype,
  39. systems,
  40. cutils,verbose,globals,
  41. symconst,symdef,
  42. aasmbase,aasmtai,aasmdata,defutil,
  43. cgbase,pass_1,pass_2,
  44. ncon,
  45. cpubase,procinfo,
  46. cga,ncgutil,cgobj,hlcgobj,cgx86,cgutils;
  47. {*****************************************************************************
  48. TI386UNARYMINUSNODE
  49. *****************************************************************************}
  50. function tx86unaryminusnode.pass_1 : tnode;
  51. begin
  52. result:=nil;
  53. firstpass(left);
  54. if codegenerror then
  55. exit;
  56. if (left.resultdef.typ=floatdef) then
  57. begin
  58. if use_vectorfpu(left.resultdef) then
  59. expectloc:=LOC_MMREGISTER
  60. else
  61. expectloc:=LOC_FPUREGISTER;
  62. end
  63. {$ifdef SUPPORT_MMX}
  64. else
  65. if (cs_mmx in current_settings.localswitches) and
  66. is_mmx_able_array(left.resultdef) then
  67. begin
  68. expectloc:=LOC_MMXREGISTER;
  69. end
  70. {$endif SUPPORT_MMX}
  71. else
  72. inherited pass_1;
  73. end;
  74. {$ifdef SUPPORT_MMX}
  75. procedure tx86unaryminusnode.second_mmx;
  76. var
  77. op : tasmop;
  78. hreg : tregister;
  79. begin
  80. secondpass(left);
  81. location_reset(location,LOC_MMXREGISTER,OS_NO);
  82. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  83. emit_reg_reg(A_PXOR,S_NO,hreg,hreg);
  84. case left.location.loc of
  85. LOC_MMXREGISTER:
  86. begin
  87. location.register:=left.location.register;
  88. end;
  89. LOC_CMMXREGISTER:
  90. begin
  91. location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  92. emit_reg_reg(A_MOVQ,S_NO,left.location.register,location.register);
  93. end;
  94. LOC_REFERENCE,
  95. LOC_CREFERENCE:
  96. begin
  97. location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  98. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,location.register);
  99. end;
  100. else
  101. internalerror(200203225);
  102. end;
  103. if cs_mmx_saturation in current_settings.localswitches then
  104. case mmx_type(resultdef) of
  105. mmxs8bit:
  106. op:=A_PSUBSB;
  107. mmxu8bit:
  108. op:=A_PSUBUSB;
  109. mmxs16bit,mmxfixed16:
  110. op:=A_PSUBSW;
  111. mmxu16bit:
  112. op:=A_PSUBUSW;
  113. end
  114. else
  115. case mmx_type(resultdef) of
  116. mmxs8bit,mmxu8bit:
  117. op:=A_PSUBB;
  118. mmxs16bit,mmxu16bit,mmxfixed16:
  119. op:=A_PSUBW;
  120. mmxs32bit,mmxu32bit:
  121. op:=A_PSUBD;
  122. end;
  123. emit_reg_reg(op,S_NO,location.register,hreg);
  124. emit_reg_reg(A_MOVQ,S_NO,hreg,location.register);
  125. end;
  126. {$endif SUPPORT_MMX}
  127. procedure tx86unaryminusnode.second_float;
  128. var
  129. reg : tregister;
  130. href : treference;
  131. l1 : tasmlabel;
  132. begin
  133. secondpass(left);
  134. if expectloc=LOC_MMREGISTER then
  135. begin
  136. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  137. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  138. { make life of register allocator easier }
  139. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
  140. current_asmdata.getdatalabel(l1);
  141. new_section(current_asmdata.asmlists[al_typedconsts],sec_rodata_norel,l1.name,const_align(sizeof(pint)));
  142. current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(l1));
  143. case def_cgsize(resultdef) of
  144. OS_F32:
  145. current_asmdata.asmlists[al_typedconsts].concat(tai_const.create_32bit(longint(1 shl 31)));
  146. OS_F64:
  147. begin
  148. current_asmdata.asmlists[al_typedconsts].concat(tai_const.create_32bit(0));
  149. current_asmdata.asmlists[al_typedconsts].concat(tai_const.create_32bit(-(1 shl 31)));
  150. end
  151. else
  152. internalerror(2004110215);
  153. end;
  154. reference_reset_symbol(href,l1,0,resultdef.alignment);
  155. reg:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
  156. cg.a_loadmm_ref_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),def_cgsize(resultdef),href,reg,mms_movescalar);
  157. if UseAVX then
  158. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_XOR,left.location.size,reg,left.location.register,location.register,nil)
  159. else
  160. begin
  161. cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),def_cgsize(resultdef),left.location.register,location.register,mms_movescalar);
  162. cg.a_opmm_reg_reg(current_asmdata.CurrAsmList,OP_XOR,left.location.size,reg,location.register,nil);
  163. end;
  164. end
  165. else
  166. begin
  167. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  168. case left.location.loc of
  169. LOC_REFERENCE,
  170. LOC_CREFERENCE:
  171. begin
  172. location.register:=NR_ST;
  173. cg.a_loadfpu_ref_reg(current_asmdata.CurrAsmList,
  174. left.location.size,location.size,
  175. left.location.reference,location.register);
  176. emit_none(A_FCHS,S_NO);
  177. end;
  178. LOC_FPUREGISTER,
  179. LOC_CFPUREGISTER:
  180. begin
  181. { "load st,st" is ignored by the code generator }
  182. cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmList,left.location.size,location.size,left.location.register,NR_ST);
  183. location.register:=NR_ST;
  184. emit_none(A_FCHS,S_NO);
  185. end;
  186. else
  187. internalerror(200312241);
  188. end;
  189. end;
  190. end;
  191. {*****************************************************************************
  192. TX86NOTNODE
  193. *****************************************************************************}
  194. procedure tx86notnode.second_boolean;
  195. var
  196. opsize : tcgsize;
  197. hreg: tregister;
  198. begin
  199. opsize:=def_cgsize(resultdef);
  200. if not handle_locjump then
  201. begin
  202. { the second pass could change the location of left }
  203. { if it is a register variable, so we've to do }
  204. { this before the case statement }
  205. secondpass(left);
  206. case left.expectloc of
  207. LOC_FLAGS :
  208. begin
  209. location_reset(location,LOC_FLAGS,OS_NO);
  210. location.resflags:=left.location.resflags;
  211. inverse_flags(location.resflags);
  212. end;
  213. LOC_CREFERENCE,
  214. LOC_REFERENCE:
  215. begin
  216. {$if defined(cpu32bitalu)}
  217. if is_64bit(resultdef) then
  218. begin
  219. hreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_32);
  220. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  221. cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_32,OS_32,left.location.reference,hreg);
  222. inc(left.location.reference.offset,4);
  223. cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_32,left.location.reference,hreg);
  224. end
  225. else
  226. {$elseif defined(cpu16bitalu)}
  227. if is_64bit(resultdef) then
  228. begin
  229. hreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_16);
  230. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  231. cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_16,OS_16,left.location.reference,hreg);
  232. inc(left.location.reference.offset,2);
  233. cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
  234. inc(left.location.reference.offset,2);
  235. cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
  236. inc(left.location.reference.offset,2);
  237. cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
  238. end
  239. else if is_32bit(resultdef) then
  240. begin
  241. hreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_16);
  242. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  243. cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_16,OS_16,left.location.reference,hreg);
  244. inc(left.location.reference.offset,2);
  245. cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
  246. end
  247. else
  248. {$endif}
  249. emit_const_ref(A_CMP, TCGSize2Opsize[opsize], 0, left.location.reference);
  250. location_reset(location,LOC_FLAGS,OS_NO);
  251. location.resflags:=F_E;
  252. end;
  253. LOC_CONSTANT,
  254. LOC_REGISTER,
  255. LOC_CREGISTER,
  256. LOC_SUBSETREG,
  257. LOC_CSUBSETREG,
  258. LOC_SUBSETREF,
  259. LOC_CSUBSETREF :
  260. begin
  261. {$if defined(cpu32bitalu)}
  262. if is_64bit(resultdef) then
  263. begin
  264. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
  265. emit_reg_reg(A_OR,S_L,left.location.register64.reghi,left.location.register64.reglo);
  266. end
  267. else
  268. {$elseif defined(cpu16bitalu)}
  269. if is_64bit(resultdef) then
  270. begin
  271. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
  272. emit_reg_reg(A_OR,S_W,GetNextReg(left.location.register64.reghi),left.location.register64.reghi);
  273. emit_reg_reg(A_OR,S_W,GetNextReg(left.location.register64.reglo),left.location.register64.reglo);
  274. emit_reg_reg(A_OR,S_W,left.location.register64.reghi,left.location.register64.reglo);
  275. end
  276. else if is_32bit(resultdef) then
  277. begin
  278. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
  279. emit_reg_reg(A_OR,S_L,GetNextReg(left.location.register),left.location.register);
  280. end
  281. else
  282. {$endif}
  283. begin
  284. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,true);
  285. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  286. end;
  287. location_reset(location,LOC_FLAGS,OS_NO);
  288. location.resflags:=F_E;
  289. end;
  290. else
  291. internalerror(200203224);
  292. end;
  293. end;
  294. end;
  295. {$ifdef SUPPORT_MMX}
  296. procedure tx86notnode.second_mmx;
  297. var hreg,r:Tregister;
  298. begin
  299. secondpass(left);
  300. location_reset(location,LOC_MMXREGISTER,OS_NO);
  301. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  302. emit_const_reg(A_MOV,S_L,longint($ffffffff),r);
  303. { load operand }
  304. case left.location.loc of
  305. LOC_MMXREGISTER:
  306. location_copy(location,left.location);
  307. LOC_CMMXREGISTER:
  308. begin
  309. location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  310. emit_reg_reg(A_MOVQ,S_NO,left.location.register,location.register);
  311. end;
  312. LOC_REFERENCE,
  313. LOC_CREFERENCE:
  314. begin
  315. location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  316. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,location.register);
  317. end;
  318. end;
  319. { load mask }
  320. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  321. emit_reg_reg(A_MOVD,S_NO,r,hreg);
  322. { lower 32 bit }
  323. emit_reg_reg(A_PXOR,S_NO,hreg,location.register);
  324. { shift mask }
  325. emit_const_reg(A_PSLLQ,S_B,32,hreg);
  326. { higher 32 bit }
  327. emit_reg_reg(A_PXOR,S_NO,hreg,location.register);
  328. end;
  329. {$endif SUPPORT_MMX}
  330. end.