nx86mat.pas 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. Generate x86 code for math nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86mat;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,ncgmat;
  22. type
  23. tx86unaryminusnode = class(tcgunaryminusnode)
  24. {$ifdef SUPPORT_MMX}
  25. procedure second_mmx;override;
  26. {$endif SUPPORT_MMX}
  27. procedure second_float;override;
  28. function pass_1:tnode;override;
  29. end;
  30. tx86notnode = class(tcgnotnode)
  31. procedure second_boolean;override;
  32. {$ifdef SUPPORT_MMX}
  33. procedure second_mmx;override;
  34. {$endif SUPPORT_MMX}
  35. end;
  36. tx86moddivnode = class(tcgmoddivnode)
  37. procedure pass_generate_code;override;
  38. end;
  39. tx86shlshrnode = class(tcgshlshrnode)
  40. {$ifdef SUPPORT_MMX}
  41. procedure second_mmx;override;
  42. {$endif SUPPORT_MMX}
  43. end;
  44. implementation
  45. uses
  46. globtype,
  47. constexp,
  48. cutils,verbose,globals,
  49. symconst,symdef,
  50. aasmbase,aasmtai,aasmcpu,aasmdata,defutil,
  51. cgbase,pass_1,pass_2,
  52. ncon,
  53. cpubase,cpuinfo,
  54. cga,cgobj,hlcgobj,cgx86,cgutils,
  55. tgobj;
  56. {*****************************************************************************
  57. TI386UNARYMINUSNODE
  58. *****************************************************************************}
  59. function tx86unaryminusnode.pass_1 : tnode;
  60. begin
  61. result:=nil;
  62. firstpass(left);
  63. if codegenerror then
  64. exit;
  65. if (left.resultdef.typ=floatdef) then
  66. begin
  67. if use_vectorfpu(left.resultdef) then
  68. expectloc:=LOC_MMREGISTER
  69. else
  70. expectloc:=LOC_FPUREGISTER;
  71. end
  72. {$ifdef SUPPORT_MMX}
  73. else
  74. if (cs_mmx in current_settings.localswitches) and
  75. is_mmx_able_array(left.resultdef) then
  76. begin
  77. expectloc:=LOC_MMXREGISTER;
  78. end
  79. {$endif SUPPORT_MMX}
  80. else
  81. inherited pass_1;
  82. end;
  83. {$ifdef SUPPORT_MMX}
  84. procedure tx86unaryminusnode.second_mmx;
  85. var
  86. op : tasmop;
  87. hreg : tregister;
  88. begin
  89. op:=A_NONE;
  90. secondpass(left);
  91. location_reset(location,LOC_MMXREGISTER,OS_NO);
  92. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  93. emit_reg_reg(A_PXOR,S_NO,hreg,hreg);
  94. case left.location.loc of
  95. LOC_MMXREGISTER:
  96. begin
  97. location.register:=left.location.register;
  98. end;
  99. LOC_CMMXREGISTER:
  100. begin
  101. location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  102. emit_reg_reg(A_MOVQ,S_NO,left.location.register,location.register);
  103. end;
  104. LOC_REFERENCE,
  105. LOC_CREFERENCE:
  106. begin
  107. location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  108. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,location.register);
  109. end;
  110. else
  111. internalerror(200203225);
  112. end;
  113. if cs_mmx_saturation in current_settings.localswitches then
  114. case mmx_type(resultdef) of
  115. mmxs8bit:
  116. op:=A_PSUBSB;
  117. mmxu8bit:
  118. op:=A_PSUBUSB;
  119. mmxs16bit,mmxfixed16:
  120. op:=A_PSUBSW;
  121. mmxu16bit:
  122. op:=A_PSUBUSW;
  123. else
  124. ;
  125. end
  126. else
  127. case mmx_type(resultdef) of
  128. mmxs8bit,mmxu8bit:
  129. op:=A_PSUBB;
  130. mmxs16bit,mmxu16bit,mmxfixed16:
  131. op:=A_PSUBW;
  132. mmxs32bit,mmxu32bit:
  133. op:=A_PSUBD;
  134. else
  135. ;
  136. end;
  137. if op = A_NONE then
  138. internalerror(201408202);
  139. emit_reg_reg(op,S_NO,location.register,hreg);
  140. emit_reg_reg(A_MOVQ,S_NO,hreg,location.register);
  141. end;
  142. {$endif SUPPORT_MMX}
  143. procedure tx86unaryminusnode.second_float;
  144. var
  145. l1: TAsmLabel;
  146. href: treference;
  147. reg: tregister;
  148. begin
  149. secondpass(left);
  150. if expectloc=LOC_MMREGISTER then
  151. begin
  152. if cs_opt_fastmath in current_settings.optimizerswitches then
  153. begin
  154. if not(left.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER,LOC_CREFERENCE,LOC_REFERENCE]) then
  155. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  156. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  157. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
  158. cg.a_opmm_reg_reg(current_asmdata.CurrAsmList,OP_XOR,location.size,location.register,location.register,nil);
  159. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,OP_SUB,location.size,left.location,location.register,mms_movescalar);
  160. end
  161. else
  162. begin
  163. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  164. current_asmdata.getglobaldatalabel(l1);
  165. new_section(current_asmdata.asmlists[al_typedconsts],sec_rodata_norel,l1.name,const_align(16));
  166. current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(l1));
  167. case def_cgsize(resultdef) of
  168. OS_F32:
  169. current_asmdata.asmlists[al_typedconsts].concat(tai_const.create_32bit(longint(1 shl 31)));
  170. OS_F64:
  171. begin
  172. current_asmdata.asmlists[al_typedconsts].concat(tai_const.create_32bit(0));
  173. current_asmdata.asmlists[al_typedconsts].concat(tai_const.create_32bit(-(1 shl 31)));
  174. end
  175. else
  176. internalerror(2004110215);
  177. end;
  178. reference_reset_symbol(href,l1,0,resultdef.alignment,[]);
  179. if UseAVX then
  180. begin
  181. if not(left.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  182. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  183. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
  184. cg.a_opmm_ref_reg_reg(current_asmdata.CurrAsmList,OP_XOR,left.location.size,href,left.location.register,location.register,nil)
  185. end
  186. else
  187. begin
  188. if not(left.location.loc=LOC_MMREGISTER) then
  189. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  190. location.register:=left.location.register;
  191. cg.a_opmm_ref_reg(current_asmdata.CurrAsmList,OP_XOR,left.location.size,href,location.register,mms_movescalar);
  192. end;
  193. end;
  194. end
  195. else
  196. begin
  197. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  198. case left.location.loc of
  199. LOC_REFERENCE,
  200. LOC_CREFERENCE:
  201. begin
  202. location.register:=NR_ST;
  203. cg.a_loadfpu_ref_reg(current_asmdata.CurrAsmList,
  204. left.location.size,location.size,
  205. left.location.reference,location.register);
  206. emit_none(A_FCHS,S_NO);
  207. end;
  208. LOC_FPUREGISTER,
  209. LOC_CFPUREGISTER:
  210. begin
  211. { "load st,st" is ignored by the code generator }
  212. cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmList,left.location.size,location.size,left.location.register,NR_ST);
  213. location.register:=NR_ST;
  214. emit_none(A_FCHS,S_NO);
  215. end;
  216. else
  217. internalerror(200312241);
  218. end;
  219. end;
  220. end;
  221. {*****************************************************************************
  222. TX86NOTNODE
  223. *****************************************************************************}
  224. procedure tx86notnode.second_boolean;
  225. var
  226. opsize : tcgsize;
  227. {$if defined(cpu32bitalu) or defined(cpu16bitalu)}
  228. hreg: tregister;
  229. {$endif}
  230. begin
  231. opsize:=def_cgsize(resultdef);
  232. secondpass(left);
  233. if not handle_locjump then
  234. begin
  235. case left.location.loc of
  236. LOC_FLAGS :
  237. begin
  238. location_reset(location,LOC_FLAGS,OS_NO);
  239. location.resflags:=left.location.resflags;
  240. inverse_flags(location.resflags);
  241. end;
  242. LOC_CREFERENCE,
  243. LOC_REFERENCE:
  244. begin
  245. {$if defined(cpu32bitalu)}
  246. if is_64bit(resultdef) then
  247. begin
  248. hreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_32);
  249. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  250. cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_32,OS_32,left.location.reference,hreg);
  251. inc(left.location.reference.offset,4);
  252. cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_32,left.location.reference,hreg);
  253. end
  254. else
  255. {$elseif defined(cpu16bitalu)}
  256. if is_64bit(resultdef) then
  257. begin
  258. hreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_16);
  259. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  260. cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_16,OS_16,left.location.reference,hreg);
  261. inc(left.location.reference.offset,2);
  262. cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
  263. inc(left.location.reference.offset,2);
  264. cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
  265. inc(left.location.reference.offset,2);
  266. cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
  267. end
  268. else if is_32bit(resultdef) then
  269. begin
  270. hreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_16);
  271. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  272. cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_16,OS_16,left.location.reference,hreg);
  273. inc(left.location.reference.offset,2);
  274. cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
  275. end
  276. else
  277. {$endif}
  278. emit_const_ref(A_CMP, TCGSize2Opsize[opsize], 0, left.location.reference);
  279. location_reset(location,LOC_FLAGS,OS_NO);
  280. location.resflags:=F_E;
  281. end;
  282. LOC_CONSTANT,
  283. LOC_REGISTER,
  284. LOC_CREGISTER,
  285. LOC_SUBSETREG,
  286. LOC_CSUBSETREG,
  287. LOC_SUBSETREF,
  288. LOC_CSUBSETREF :
  289. begin
  290. {$if defined(cpu32bitalu)}
  291. if is_64bit(resultdef) then
  292. begin
  293. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
  294. emit_reg_reg(A_OR,S_L,left.location.register64.reghi,left.location.register64.reglo);
  295. end
  296. else
  297. {$elseif defined(cpu16bitalu)}
  298. if is_64bit(resultdef) then
  299. begin
  300. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
  301. emit_reg_reg(A_OR,S_W,cg.GetNextReg(left.location.register64.reghi),left.location.register64.reghi);
  302. emit_reg_reg(A_OR,S_W,cg.GetNextReg(left.location.register64.reglo),left.location.register64.reglo);
  303. emit_reg_reg(A_OR,S_W,left.location.register64.reghi,left.location.register64.reglo);
  304. end
  305. else if is_32bit(resultdef) then
  306. begin
  307. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
  308. emit_reg_reg(A_OR,S_L,cg.GetNextReg(left.location.register),left.location.register);
  309. end
  310. else
  311. {$endif}
  312. begin
  313. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,true);
  314. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  315. end;
  316. location_reset(location,LOC_FLAGS,OS_NO);
  317. location.resflags:=F_E;
  318. end;
  319. else
  320. internalerror(200203224);
  321. end;
  322. end;
  323. end;
  324. {$ifdef SUPPORT_MMX}
  325. procedure tx86notnode.second_mmx;
  326. var hreg,r:Tregister;
  327. begin
  328. secondpass(left);
  329. location_reset(location,LOC_MMXREGISTER,OS_NO);
  330. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  331. emit_const_reg(A_MOV,S_L,longint($ffffffff),r);
  332. { load operand }
  333. case left.location.loc of
  334. LOC_MMXREGISTER:
  335. location_copy(location,left.location);
  336. LOC_CMMXREGISTER:
  337. begin
  338. location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  339. emit_reg_reg(A_MOVQ,S_NO,left.location.register,location.register);
  340. end;
  341. LOC_REFERENCE,
  342. LOC_CREFERENCE:
  343. begin
  344. location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  345. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,location.register);
  346. end;
  347. else
  348. internalerror(2019050906);
  349. end;
  350. { load mask }
  351. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  352. emit_reg_reg(A_MOVD,S_NO,r,hreg);
  353. { lower 32 bit }
  354. emit_reg_reg(A_PXOR,S_NO,hreg,location.register);
  355. { shift mask }
  356. emit_const_reg(A_PSLLQ,S_B,32,hreg);
  357. { higher 32 bit }
  358. emit_reg_reg(A_PXOR,S_NO,hreg,location.register);
  359. end;
  360. {$endif SUPPORT_MMX}
  361. {*****************************************************************************
  362. TX86MODDIVNODE
  363. *****************************************************************************}
  364. procedure tx86moddivnode.pass_generate_code;
  365. var
  366. hreg1,hreg2,hreg3,rega,regd,tempreg:Tregister;
  367. power:longint;
  368. instr:TAiCpu;
  369. op:Tasmop;
  370. cgsize:TCgSize;
  371. opsize:topsize;
  372. e, sm: aint;
  373. d,m: aword;
  374. m_add, invertsign: boolean;
  375. s: byte;
  376. label
  377. DefaultDiv;
  378. procedure DoUnsignedReciprocalDivision;
  379. var
  380. exp_rega,exp_regd:Tregister;
  381. exp_opsize:topsize;
  382. DoMod: Boolean;
  383. begin
  384. DoMod := (nodetype = modn);
  385. { Extend 32-bit divides to 64-bit registers and 16-bit
  386. divides to 32-bit registers. Because the domain of
  387. the left input is only up to 2^(X/2 - 1) - 1, (i.e.
  388. 2^31 - 1 for 64-bit and 2^15 - 1 for 32-bit), a much
  389. larger error in the reciprocal is permitted. }
  390. if (resultdef.size <= {$ifdef x86_64}4{$else x86_64}2{$endif x86_64}) then
  391. begin
  392. calc_divconst_magic_unsigned(resultdef.size * 2 * 8,d,m,m_add,s);
  393. { Should never have a zero shift and a magic add together }
  394. if (s = 0) and m_add then
  395. InternalError(2021090201);
  396. { Extend the input register (the peephole optimizer should
  397. help clean up unnecessary MOVZX instructions }
  398. hreg3 := hreg1;
  399. case resultdef.size of
  400. {$ifdef x86_64}
  401. 4:
  402. begin
  403. setsubreg(hreg3, R_SUBQ);
  404. { Make sure the upper 32 bits are zero; the peephole
  405. optimizer will remove this instruction via MovAnd2Mov
  406. if it's not needed }
  407. emit_const_reg(A_AND, S_L, $FFFFFFFF, hreg1);
  408. exp_rega := NR_RAX;
  409. exp_regd := NR_RDX;
  410. exp_opsize := S_Q;
  411. if m_add then
  412. { Append 1 to the tail end of the result }
  413. m := (m shr s) or ($8000000000000000 shr (s - 1))
  414. else
  415. m := m shr s;
  416. end;
  417. {$endif x86_64}
  418. 2:
  419. begin
  420. setsubreg(hreg3, R_SUBD);
  421. emit_reg_reg(A_MOVZX, S_WL, hreg1, hreg3);
  422. exp_rega := NR_EAX;
  423. exp_regd := NR_EDX;
  424. exp_opsize := S_L;
  425. if m_add then
  426. { Append 1 to the tail end of the result }
  427. m := (m shr s) or ($80000000 shr (s - 1))
  428. else
  429. m := m shr s;
  430. end;
  431. 1:
  432. begin
  433. setsubreg(hreg3, R_SUBW);
  434. emit_reg_reg(A_MOVZX, S_BW, hreg1, hreg3);
  435. exp_rega := NR_AX;
  436. exp_regd := NR_DX;
  437. regd := NR_DL; { We need to change this from AH }
  438. exp_opsize := S_W;
  439. if m_add then
  440. { Append 1 to the tail end of the result }
  441. m := (m shr s) or ($8000 shr (s - 1))
  442. else
  443. m := m shr s;
  444. end;
  445. else
  446. InternalError(2021090210);
  447. end;
  448. Inc(m);
  449. cg.getcpuregister(current_asmdata.CurrAsmList,exp_rega);
  450. emit_const_reg(A_MOV,exp_opsize,aint(m),exp_rega);
  451. cg.getcpuregister(current_asmdata.CurrAsmList,exp_regd);
  452. emit_reg(A_MUL,exp_opsize,hreg3);
  453. cg.ungetcpuregister(current_asmdata.CurrAsmList,exp_rega);
  454. if DoMod then
  455. begin
  456. hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  457. emit_reg_reg(A_MOV,opsize,hreg1,hreg2);
  458. end;
  459. end
  460. else
  461. begin
  462. calc_divconst_magic_unsigned(resultdef.size*8,d,m,m_add,s);
  463. { Should never have a zero shift and a magic add together }
  464. if (s = 0) and m_add then
  465. InternalError(2021090202);
  466. cg.getcpuregister(current_asmdata.CurrAsmList,rega);
  467. emit_const_reg(A_MOV,opsize,aint(m),rega);
  468. cg.getcpuregister(current_asmdata.CurrAsmList,regd);
  469. emit_reg(A_MUL,opsize,hreg1);
  470. cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);
  471. if DoMod then
  472. begin
  473. hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  474. emit_reg_reg(A_MOV,opsize,hreg1,hreg2);
  475. end;
  476. if m_add then
  477. begin
  478. { addition can overflow, shift first bit considering carry,
  479. then shift remaining bits in regular way. }
  480. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  481. emit_reg_reg(A_ADD,opsize,hreg1,regd);
  482. emit_const_reg(A_RCR,opsize,1,regd);
  483. cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  484. dec(s);
  485. end;
  486. if s<>0 then
  487. emit_const_reg(A_SHR,opsize,aint(s),regd);
  488. end;
  489. if DoMod then
  490. begin
  491. { Now multiply the quotient by the original denominator and
  492. subtract the product from the original numerator to get
  493. the remainder. }
  494. if (cgsize in [OS_64,OS_S64]) then { Cannot use 64-bit constants in IMUL }
  495. begin
  496. hreg3:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  497. emit_const_reg(A_MOV,opsize,aint(d),hreg3);
  498. emit_reg_reg(A_IMUL,opsize,hreg3,regd);
  499. end
  500. else
  501. emit_const_reg(A_IMUL,opsize,aint(d),regd);
  502. emit_reg_reg(A_SUB,opsize,regd,hreg2);
  503. end;
  504. cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);
  505. if not DoMod then
  506. begin
  507. hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  508. cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,regd,hreg2);
  509. end;
  510. location.register:=hreg2;
  511. end;
  512. begin
  513. secondpass(left);
  514. if codegenerror then
  515. exit;
  516. secondpass(right);
  517. if codegenerror then
  518. exit;
  519. { put numerator in register }
  520. cgsize:=def_cgsize(resultdef);
  521. opsize:=TCGSize2OpSize[cgsize];
  522. rega:=newreg(R_INTREGISTER,RS_EAX,cgsize2subreg(R_INTREGISTER,cgsize));
  523. if cgsize in [OS_8,OS_S8] then
  524. regd:=NR_AH
  525. else
  526. regd:=newreg(R_INTREGISTER,RS_EDX,cgsize2subreg(R_INTREGISTER,cgsize));
  527. location_reset(location,LOC_REGISTER,cgsize);
  528. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
  529. hreg1:=left.location.register;
  530. if (nodetype=divn) and (right.nodetype=ordconstn) then
  531. begin
  532. if isabspowerof2(tordconstnode(right).value,power) then
  533. begin
  534. { for signed numbers, the numerator must be adjusted before the
  535. shift instruction, but not with unsigned numbers! Otherwise,
  536. "Cardinal($ffffffff) div 16" overflows! (JM) }
  537. if is_signed(left.resultdef) Then
  538. begin
  539. invertsign:=tordconstnode(right).value<0;
  540. { use a sequence without jumps, saw this in
  541. comp.compilers (JM) }
  542. { no jumps, but more operations }
  543. hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  544. emit_reg_reg(A_MOV,opsize,hreg1,hreg2);
  545. if power=1 then
  546. begin
  547. {If the left value is negative, hreg2=(1 shl power)-1=1, otherwise 0.}
  548. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,cgsize,resultdef.size*8-1,hreg2);
  549. end
  550. else
  551. begin
  552. {If the left value is negative, hreg2=$ffffffff, otherwise 0.}
  553. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,cgsize,resultdef.size*8-1,hreg2);
  554. {If negative, hreg2=(1 shl power)-1, otherwise 0.}
  555. { (don't use emit_const_reg, because if value>high(longint)
  556. then it must first be loaded into a register) }
  557. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,cgsize,(aint(1) shl power)-1,hreg2);
  558. end;
  559. { add to the left value }
  560. emit_reg_reg(A_ADD,opsize,hreg2,hreg1);
  561. { do the shift }
  562. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,cgsize,power,hreg1);
  563. if invertsign then
  564. emit_reg(A_NEG,opsize,hreg1);
  565. end
  566. else
  567. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,cgsize,power,hreg1);
  568. location.register:=hreg1;
  569. end
  570. else
  571. begin
  572. if is_signed(left.resultdef) then
  573. begin
  574. e:=tordconstnode(right).value.svalue;
  575. calc_divconst_magic_signed(resultdef.size*8,e,sm,s);
  576. cg.getcpuregister(current_asmdata.CurrAsmList,rega);
  577. emit_const_reg(A_MOV,opsize,sm,rega);
  578. cg.getcpuregister(current_asmdata.CurrAsmList,regd);
  579. emit_reg(A_IMUL,opsize,hreg1);
  580. { only the high half of result is used }
  581. cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);
  582. { add or subtract dividend }
  583. if (e>0) and (sm<0) then
  584. emit_reg_reg(A_ADD,opsize,hreg1,regd)
  585. else if (e<0) and (sm>0) then
  586. emit_reg_reg(A_SUB,opsize,hreg1,regd);
  587. { shift if necessary }
  588. if (s<>0) then
  589. emit_const_reg(A_SAR,opsize,s,regd);
  590. { extract and add the sign bit }
  591. if (e<0) then
  592. emit_reg_reg(A_MOV,opsize,regd,hreg1);
  593. { if e>=0, hreg1 still contains dividend }
  594. emit_const_reg(A_SHR,opsize,left.resultdef.size*8-1,hreg1);
  595. emit_reg_reg(A_ADD,opsize,hreg1,regd);
  596. cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);
  597. location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  598. cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,regd,location.register)
  599. end
  600. else
  601. begin
  602. d:=tordconstnode(right).value.uvalue;
  603. if d>=aword(1) shl (left.resultdef.size*8-1) then
  604. begin
  605. location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  606. { Ensure that the whole register is 0, since SETcc only sets the lowest byte }
  607. { If the operands are 64 bits, this XOR routine will be shrunk by the
  608. peephole optimizer. [Kit] }
  609. emit_reg_reg(A_XOR,opsize,location.register,location.register);
  610. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  611. if (cgsize in [OS_64,OS_S64]) then { Cannot use 64-bit constants in CMP }
  612. begin
  613. hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  614. emit_const_reg(A_MOV,opsize,aint(d),hreg2);
  615. emit_reg_reg(A_CMP,opsize,hreg2,hreg1);
  616. end
  617. else
  618. emit_const_reg(A_CMP,opsize,aint(d),hreg1);
  619. { NOTE: SBB and SETAE are both 3 bytes long without the REX prefix,
  620. both use an ALU for their execution and take a single cycle to
  621. run. The only difference is that SETAE does not modify the flags,
  622. allowing for some possible reuse. [Kit] }
  623. { Emit a SETcc instruction that depends on the carry bit being zero,
  624. that is, the numerator is greater than or equal to the denominator. }
  625. tempreg:=cg.makeregsize(current_asmdata.CurrAsmList,location.register,OS_8);
  626. instr:=TAiCpu.op_reg(A_SETcc,S_B,tempreg);
  627. instr.condition:=C_AE;
  628. current_asmdata.CurrAsmList.concat(instr);
  629. cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  630. end
  631. else
  632. DoUnsignedReciprocalDivision;
  633. end;
  634. end;
  635. end
  636. else if (nodetype=modn) and (right.nodetype=ordconstn) and not(is_signed(left.resultdef)) then
  637. begin
  638. { unsigned modulus by a (+/-)power-of-2 constant? }
  639. if isabspowerof2(tordconstnode(right).value,power) then
  640. begin
  641. emit_const_reg(A_AND,opsize,(aint(1) shl power)-1,hreg1);
  642. location.register:=hreg1;
  643. end
  644. else
  645. begin
  646. d:=tordconstnode(right).value.uvalue;
  647. if d>=aword(1) shl (left.resultdef.size*8-1) then
  648. begin
  649. if not (CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) then
  650. goto DefaultDiv;
  651. location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  652. hreg3:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  653. m := aword(-aint(d)); { Two's complement of d }
  654. if (cgsize in [OS_64,OS_S64]) then { Cannot use 64-bit constants in CMP }
  655. begin
  656. hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  657. emit_const_reg(A_MOV,opsize,aint(d),hreg2);
  658. emit_const_reg(A_MOV,opsize,aint(m),hreg3);
  659. emit_reg_reg(A_XOR,opsize,location.register,location.register);
  660. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  661. emit_reg_reg(A_CMP,opsize,hreg2,hreg1);
  662. end
  663. else
  664. begin
  665. emit_const_reg(A_MOV,opsize,aint(m),hreg3);
  666. emit_reg_reg(A_XOR,opsize,location.register,location.register);
  667. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  668. emit_const_reg(A_CMP,opsize,aint(d),hreg1);
  669. end;
  670. { Emit conditional move that depends on the carry flag being zero,
  671. that is, the comparison result is above or equal }
  672. instr:=TAiCpu.op_reg_reg(A_CMOVcc,opsize,hreg3,location.register);
  673. instr.condition := C_AE;
  674. current_asmdata.CurrAsmList.concat(instr);
  675. cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  676. emit_reg_reg(A_ADD,opsize,hreg1,location.register);
  677. end
  678. else
  679. { Convert the division to a multiplication }
  680. DoUnsignedReciprocalDivision;
  681. end;
  682. end
  683. else if (nodetype=modn) and (right.nodetype=ordconstn) and (is_signed(left.resultdef)) and isabspowerof2(tordconstnode(right).value,power) then
  684. begin
  685. hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  686. if power=1 then
  687. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHR,cgsize,resultdef.size*8-power,hreg1,hreg2)
  688. else
  689. begin
  690. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,cgsize,resultdef.size*8-1,hreg1,hreg2);
  691. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHR,cgsize,resultdef.size*8-power,hreg2,hreg2);
  692. end;
  693. emit_reg_reg(A_ADD,opsize,hreg1,hreg2);
  694. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,cgsize,not((aint(1) shl power)-1),hreg2);
  695. emit_reg_reg(A_SUB,opsize,hreg2,hreg1);
  696. location.register:=hreg1;
  697. end
  698. else
  699. begin
  700. DefaultDiv:
  701. {Bring denominator to a register.}
  702. cg.getcpuregister(current_asmdata.CurrAsmList,rega);
  703. emit_reg_reg(A_MOV,opsize,hreg1,rega);
  704. cg.getcpuregister(current_asmdata.CurrAsmList,regd);
  705. {Sign extension depends on the left type.}
  706. if is_signed(left.resultdef) then
  707. case left.resultdef.size of
  708. {$ifdef x86_64}
  709. 8:
  710. emit_none(A_CQO,S_NO);
  711. {$endif x86_64}
  712. 4:
  713. emit_none(A_CDQ,S_NO);
  714. else
  715. internalerror(2013102704);
  716. end
  717. else
  718. emit_reg_reg(A_XOR,opsize,regd,regd);
  719. { Division depends on the result type }
  720. if is_signed(resultdef) then
  721. op:=A_IDIV
  722. else
  723. op:=A_DIV;
  724. if right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
  725. emit_ref(op,opsize,right.location.reference)
  726. else if right.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
  727. emit_reg(op,opsize,right.location.register)
  728. else
  729. begin
  730. hreg1:=cg.getintregister(current_asmdata.CurrAsmList,right.location.size);
  731. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,right.resultdef,right.location,hreg1);
  732. emit_reg(op,opsize,hreg1);
  733. end;
  734. { Copy the result into a new register. Release R/EAX & R/EDX.}
  735. cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);
  736. cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);
  737. location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  738. if nodetype=divn then
  739. cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,rega,location.register)
  740. else
  741. cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,regd,location.register);
  742. end;
  743. end;
  744. {$ifdef SUPPORT_MMX}
  745. procedure tx86shlshrnode.second_mmx;
  746. var
  747. op : TAsmOp;
  748. mmxbase : tmmxtype;
  749. hregister : tregister;
  750. begin
  751. secondpass(left);
  752. if codegenerror then
  753. exit;
  754. secondpass(right);
  755. if codegenerror then
  756. exit;
  757. op:=A_NOP;
  758. mmxbase:=mmx_type(left.resultdef);
  759. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  760. case nodetype of
  761. shrn :
  762. case mmxbase of
  763. mmxs16bit,mmxu16bit,mmxfixed16:
  764. op:=A_PSRLW;
  765. mmxs32bit,mmxu32bit:
  766. op:=A_PSRLD;
  767. mmxs64bit,mmxu64bit:
  768. op:=A_PSRLQ;
  769. else
  770. Internalerror(2018022504);
  771. end;
  772. shln :
  773. case mmxbase of
  774. mmxs16bit,mmxu16bit,mmxfixed16:
  775. op:=A_PSLLW;
  776. mmxs32bit,mmxu32bit:
  777. op:=A_PSLLD;
  778. mmxs64bit,mmxu64bit:
  779. op:=A_PSLLD;
  780. else
  781. Internalerror(2018022503);
  782. end;
  783. else
  784. internalerror(2018022502);
  785. end;
  786. { left and right no register? }
  787. { then one must be demanded }
  788. if (left.location.loc<>LOC_MMXREGISTER) then
  789. begin
  790. { register variable ? }
  791. if (left.location.loc=LOC_CMMXREGISTER) then
  792. begin
  793. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  794. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  795. end
  796. else
  797. begin
  798. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  799. internalerror(2018022505);
  800. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  801. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  802. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  803. end;
  804. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  805. left.location.register:=hregister;
  806. end;
  807. { at this point, left.location.loc should be LOC_MMXREGISTER }
  808. case right.location.loc of
  809. LOC_MMXREGISTER,LOC_CMMXREGISTER:
  810. begin
  811. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  812. location.register:=left.location.register;
  813. end;
  814. LOC_CONSTANT:
  815. emit_const_reg(op,S_NO,right.location.value,left.location.register);
  816. LOC_REFERENCE,LOC_CREFERENCE:
  817. begin
  818. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  819. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  820. end;
  821. else
  822. internalerror(2018022506);
  823. end;
  824. location.register:=left.location.register;
  825. location_freetemp(current_asmdata.CurrAsmList,right.location);
  826. end;
  827. {$endif SUPPORT_MMX}
  828. end.