nx86mat.pas 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. Generate x86 code for math nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86mat;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,ncgmat;
  22. type
  23. tx86unaryminusnode = class(tcgunaryminusnode)
  24. {$ifdef SUPPORT_MMX}
  25. procedure second_mmx;override;
  26. {$endif SUPPORT_MMX}
  27. procedure second_float;override;
  28. function pass_1:tnode;override;
  29. end;
  30. tx86notnode = class(tcgnotnode)
  31. procedure second_boolean;override;
  32. {$ifdef SUPPORT_MMX}
  33. procedure second_mmx;override;
  34. {$endif SUPPORT_MMX}
  35. end;
  36. tx86moddivnode = class(tcgmoddivnode)
  37. procedure pass_generate_code;override;
  38. end;
  39. tx86shlshrnode = class(tcgshlshrnode)
  40. {$ifdef SUPPORT_MMX}
  41. procedure second_mmx;override;
  42. {$endif SUPPORT_MMX}
  43. end;
  44. implementation
  45. uses
  46. globtype,
  47. constexp,
  48. cutils,verbose,globals,
  49. symconst,symdef,
  50. aasmbase,aasmtai,aasmcpu,aasmdata,defutil,
  51. cgbase,pass_1,pass_2,
  52. ncon,
  53. cpubase,cpuinfo,
  54. cga,cgobj,hlcgobj,cgx86,cgutils,
  55. tgobj;
  56. {*****************************************************************************
  57. TI386UNARYMINUSNODE
  58. *****************************************************************************}
  59. function tx86unaryminusnode.pass_1 : tnode;
  60. begin
  61. result:=nil;
  62. firstpass(left);
  63. if codegenerror then
  64. exit;
  65. if (left.resultdef.typ=floatdef) then
  66. begin
  67. if use_vectorfpu(left.resultdef) then
  68. expectloc:=LOC_MMREGISTER
  69. else
  70. expectloc:=LOC_FPUREGISTER;
  71. end
  72. {$ifdef SUPPORT_MMX}
  73. else
  74. if (cs_mmx in current_settings.localswitches) and
  75. is_mmx_able_array(left.resultdef) then
  76. begin
  77. expectloc:=LOC_MMXREGISTER;
  78. end
  79. {$endif SUPPORT_MMX}
  80. else
  81. inherited pass_1;
  82. end;
  83. {$ifdef SUPPORT_MMX}
  84. procedure tx86unaryminusnode.second_mmx;
  85. var
  86. op : tasmop;
  87. hreg : tregister;
  88. begin
  89. op:=A_NONE;
  90. secondpass(left);
  91. location_reset(location,LOC_MMXREGISTER,OS_NO);
  92. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  93. emit_reg_reg(A_PXOR,S_NO,hreg,hreg);
  94. case left.location.loc of
  95. LOC_MMXREGISTER:
  96. begin
  97. location.register:=left.location.register;
  98. end;
  99. LOC_CMMXREGISTER:
  100. begin
  101. location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  102. emit_reg_reg(A_MOVQ,S_NO,left.location.register,location.register);
  103. end;
  104. LOC_REFERENCE,
  105. LOC_CREFERENCE:
  106. begin
  107. location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  108. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,location.register);
  109. end;
  110. else
  111. internalerror(200203225);
  112. end;
  113. if cs_mmx_saturation in current_settings.localswitches then
  114. case mmx_type(resultdef) of
  115. mmxs8bit:
  116. op:=A_PSUBSB;
  117. mmxu8bit:
  118. op:=A_PSUBUSB;
  119. mmxs16bit,mmxfixed16:
  120. op:=A_PSUBSW;
  121. mmxu16bit:
  122. op:=A_PSUBUSW;
  123. else
  124. ;
  125. end
  126. else
  127. case mmx_type(resultdef) of
  128. mmxs8bit,mmxu8bit:
  129. op:=A_PSUBB;
  130. mmxs16bit,mmxu16bit,mmxfixed16:
  131. op:=A_PSUBW;
  132. mmxs32bit,mmxu32bit:
  133. op:=A_PSUBD;
  134. else
  135. ;
  136. end;
  137. if op = A_NONE then
  138. internalerror(201408202);
  139. emit_reg_reg(op,S_NO,location.register,hreg);
  140. emit_reg_reg(A_MOVQ,S_NO,hreg,location.register);
  141. end;
  142. {$endif SUPPORT_MMX}
  143. procedure tx86unaryminusnode.second_float;
  144. var
  145. reg : tregister;
  146. href : treference;
  147. l1 : tasmlabel;
  148. begin
  149. secondpass(left);
  150. if expectloc=LOC_MMREGISTER then
  151. begin
  152. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  153. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  154. { make life of register allocator easier }
  155. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
  156. current_asmdata.getglobaldatalabel(l1);
  157. new_section(current_asmdata.asmlists[al_typedconsts],sec_rodata_norel,l1.name,const_align(sizeof(pint)));
  158. current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(l1));
  159. case def_cgsize(resultdef) of
  160. OS_F32:
  161. current_asmdata.asmlists[al_typedconsts].concat(tai_const.create_32bit(longint(1 shl 31)));
  162. OS_F64:
  163. begin
  164. current_asmdata.asmlists[al_typedconsts].concat(tai_const.create_32bit(0));
  165. current_asmdata.asmlists[al_typedconsts].concat(tai_const.create_32bit(-(1 shl 31)));
  166. end
  167. else
  168. internalerror(2004110215);
  169. end;
  170. reference_reset_symbol(href,l1,0,resultdef.alignment,[]);
  171. if UseAVX then
  172. cg.a_opmm_ref_reg_reg(current_asmdata.CurrAsmList,OP_XOR,left.location.size,href,left.location.register,location.register,nil)
  173. else
  174. begin
  175. reg:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
  176. cg.a_loadmm_ref_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),def_cgsize(resultdef),href,reg,mms_movescalar);
  177. cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),def_cgsize(resultdef),left.location.register,location.register,mms_movescalar);
  178. cg.a_opmm_reg_reg(current_asmdata.CurrAsmList,OP_XOR,left.location.size,reg,location.register,nil);
  179. end;
  180. end
  181. else
  182. begin
  183. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  184. case left.location.loc of
  185. LOC_REFERENCE,
  186. LOC_CREFERENCE:
  187. begin
  188. location.register:=NR_ST;
  189. cg.a_loadfpu_ref_reg(current_asmdata.CurrAsmList,
  190. left.location.size,location.size,
  191. left.location.reference,location.register);
  192. emit_none(A_FCHS,S_NO);
  193. end;
  194. LOC_FPUREGISTER,
  195. LOC_CFPUREGISTER:
  196. begin
  197. { "load st,st" is ignored by the code generator }
  198. cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmList,left.location.size,location.size,left.location.register,NR_ST);
  199. location.register:=NR_ST;
  200. emit_none(A_FCHS,S_NO);
  201. end;
  202. else
  203. internalerror(200312241);
  204. end;
  205. end;
  206. end;
  207. {*****************************************************************************
  208. TX86NOTNODE
  209. *****************************************************************************}
  210. procedure tx86notnode.second_boolean;
  211. var
  212. opsize : tcgsize;
  213. {$if defined(cpu32bitalu) or defined(cpu16bitalu)}
  214. hreg: tregister;
  215. {$endif}
  216. begin
  217. opsize:=def_cgsize(resultdef);
  218. secondpass(left);
  219. if not handle_locjump then
  220. begin
  221. case left.location.loc of
  222. LOC_FLAGS :
  223. begin
  224. location_reset(location,LOC_FLAGS,OS_NO);
  225. location.resflags:=left.location.resflags;
  226. inverse_flags(location.resflags);
  227. end;
  228. LOC_CREFERENCE,
  229. LOC_REFERENCE:
  230. begin
  231. {$if defined(cpu32bitalu)}
  232. if is_64bit(resultdef) then
  233. begin
  234. hreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_32);
  235. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  236. cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_32,OS_32,left.location.reference,hreg);
  237. inc(left.location.reference.offset,4);
  238. cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_32,left.location.reference,hreg);
  239. end
  240. else
  241. {$elseif defined(cpu16bitalu)}
  242. if is_64bit(resultdef) then
  243. begin
  244. hreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_16);
  245. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  246. cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_16,OS_16,left.location.reference,hreg);
  247. inc(left.location.reference.offset,2);
  248. cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
  249. inc(left.location.reference.offset,2);
  250. cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
  251. inc(left.location.reference.offset,2);
  252. cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
  253. end
  254. else if is_32bit(resultdef) then
  255. begin
  256. hreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_16);
  257. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  258. cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_16,OS_16,left.location.reference,hreg);
  259. inc(left.location.reference.offset,2);
  260. cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
  261. end
  262. else
  263. {$endif}
  264. emit_const_ref(A_CMP, TCGSize2Opsize[opsize], 0, left.location.reference);
  265. location_reset(location,LOC_FLAGS,OS_NO);
  266. location.resflags:=F_E;
  267. end;
  268. LOC_CONSTANT,
  269. LOC_REGISTER,
  270. LOC_CREGISTER,
  271. LOC_SUBSETREG,
  272. LOC_CSUBSETREG,
  273. LOC_SUBSETREF,
  274. LOC_CSUBSETREF :
  275. begin
  276. {$if defined(cpu32bitalu)}
  277. if is_64bit(resultdef) then
  278. begin
  279. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
  280. emit_reg_reg(A_OR,S_L,left.location.register64.reghi,left.location.register64.reglo);
  281. end
  282. else
  283. {$elseif defined(cpu16bitalu)}
  284. if is_64bit(resultdef) then
  285. begin
  286. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
  287. emit_reg_reg(A_OR,S_W,cg.GetNextReg(left.location.register64.reghi),left.location.register64.reghi);
  288. emit_reg_reg(A_OR,S_W,cg.GetNextReg(left.location.register64.reglo),left.location.register64.reglo);
  289. emit_reg_reg(A_OR,S_W,left.location.register64.reghi,left.location.register64.reglo);
  290. end
  291. else if is_32bit(resultdef) then
  292. begin
  293. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
  294. emit_reg_reg(A_OR,S_L,cg.GetNextReg(left.location.register),left.location.register);
  295. end
  296. else
  297. {$endif}
  298. begin
  299. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,true);
  300. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  301. end;
  302. location_reset(location,LOC_FLAGS,OS_NO);
  303. location.resflags:=F_E;
  304. end;
  305. else
  306. internalerror(200203224);
  307. end;
  308. end;
  309. end;
  310. {$ifdef SUPPORT_MMX}
  311. procedure tx86notnode.second_mmx;
  312. var hreg,r:Tregister;
  313. begin
  314. secondpass(left);
  315. location_reset(location,LOC_MMXREGISTER,OS_NO);
  316. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  317. emit_const_reg(A_MOV,S_L,longint($ffffffff),r);
  318. { load operand }
  319. case left.location.loc of
  320. LOC_MMXREGISTER:
  321. location_copy(location,left.location);
  322. LOC_CMMXREGISTER:
  323. begin
  324. location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  325. emit_reg_reg(A_MOVQ,S_NO,left.location.register,location.register);
  326. end;
  327. LOC_REFERENCE,
  328. LOC_CREFERENCE:
  329. begin
  330. location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  331. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,location.register);
  332. end;
  333. else
  334. internalerror(2019050906);
  335. end;
  336. { load mask }
  337. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  338. emit_reg_reg(A_MOVD,S_NO,r,hreg);
  339. { lower 32 bit }
  340. emit_reg_reg(A_PXOR,S_NO,hreg,location.register);
  341. { shift mask }
  342. emit_const_reg(A_PSLLQ,S_B,32,hreg);
  343. { higher 32 bit }
  344. emit_reg_reg(A_PXOR,S_NO,hreg,location.register);
  345. end;
  346. {$endif SUPPORT_MMX}
  347. {*****************************************************************************
  348. TX86MODDIVNODE
  349. *****************************************************************************}
  350. procedure tx86moddivnode.pass_generate_code;
  351. var
  352. hreg1,hreg2,hreg3,rega,regd,tempreg:Tregister;
  353. power:longint;
  354. instr:TAiCpu;
  355. op:Tasmop;
  356. cgsize:TCgSize;
  357. opsize:topsize;
  358. e, sm: aint;
  359. d,m: aword;
  360. m_add, invertsign: boolean;
  361. s: byte;
  362. label
  363. DefaultDiv;
  364. begin
  365. secondpass(left);
  366. if codegenerror then
  367. exit;
  368. secondpass(right);
  369. if codegenerror then
  370. exit;
  371. { put numerator in register }
  372. cgsize:=def_cgsize(resultdef);
  373. opsize:=TCGSize2OpSize[cgsize];
  374. if not (cgsize in [OS_32,OS_S32,OS_64,OS_S64]) then
  375. InternalError(2013102702);
  376. rega:=newreg(R_INTREGISTER,RS_EAX,cgsize2subreg(R_INTREGISTER,cgsize));
  377. regd:=newreg(R_INTREGISTER,RS_EDX,cgsize2subreg(R_INTREGISTER,cgsize));
  378. location_reset(location,LOC_REGISTER,cgsize);
  379. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
  380. hreg1:=left.location.register;
  381. if (nodetype=divn) and (right.nodetype=ordconstn) then
  382. begin
  383. if isabspowerof2(tordconstnode(right).value,power) then
  384. begin
  385. { for signed numbers, the numerator must be adjusted before the
  386. shift instruction, but not with unsigned numbers! Otherwise,
  387. "Cardinal($ffffffff) div 16" overflows! (JM) }
  388. if is_signed(left.resultdef) Then
  389. begin
  390. invertsign:=tordconstnode(right).value<0;
  391. { use a sequence without jumps, saw this in
  392. comp.compilers (JM) }
  393. { no jumps, but more operations }
  394. hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  395. emit_reg_reg(A_MOV,opsize,hreg1,hreg2);
  396. if power=1 then
  397. begin
  398. {If the left value is negative, hreg2=(1 shl power)-1=1, otherwise 0.}
  399. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,cgsize,resultdef.size*8-1,hreg2);
  400. end
  401. else
  402. begin
  403. {If the left value is negative, hreg2=$ffffffff, otherwise 0.}
  404. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,cgsize,resultdef.size*8-1,hreg2);
  405. {If negative, hreg2=(1 shl power)-1, otherwise 0.}
  406. { (don't use emit_const_reg, because if value>high(longint)
  407. then it must first be loaded into a register) }
  408. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,cgsize,(aint(1) shl power)-1,hreg2);
  409. end;
  410. { add to the left value }
  411. emit_reg_reg(A_ADD,opsize,hreg2,hreg1);
  412. { do the shift }
  413. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,cgsize,power,hreg1);
  414. if invertsign then
  415. emit_reg(A_NEG,opsize,hreg1);
  416. end
  417. else
  418. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,cgsize,power,hreg1);
  419. location.register:=hreg1;
  420. end
  421. else
  422. begin
  423. if is_signed(left.resultdef) then
  424. begin
  425. e:=tordconstnode(right).value.svalue;
  426. calc_divconst_magic_signed(resultdef.size*8,e,sm,s);
  427. cg.getcpuregister(current_asmdata.CurrAsmList,rega);
  428. emit_const_reg(A_MOV,opsize,sm,rega);
  429. cg.getcpuregister(current_asmdata.CurrAsmList,regd);
  430. emit_reg(A_IMUL,opsize,hreg1);
  431. { only the high half of result is used }
  432. cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);
  433. { add or subtract dividend }
  434. if (e>0) and (sm<0) then
  435. emit_reg_reg(A_ADD,opsize,hreg1,regd)
  436. else if (e<0) and (sm>0) then
  437. emit_reg_reg(A_SUB,opsize,hreg1,regd);
  438. { shift if necessary }
  439. if (s<>0) then
  440. emit_const_reg(A_SAR,opsize,s,regd);
  441. { extract and add the sign bit }
  442. if (e<0) then
  443. emit_reg_reg(A_MOV,opsize,regd,hreg1);
  444. { if e>=0, hreg1 still contains dividend }
  445. emit_const_reg(A_SHR,opsize,left.resultdef.size*8-1,hreg1);
  446. emit_reg_reg(A_ADD,opsize,hreg1,regd);
  447. cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);
  448. location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  449. cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,regd,location.register)
  450. end
  451. else
  452. begin
  453. d:=tordconstnode(right).value.svalue;
  454. if d>=aword(1) shl (left.resultdef.size*8-1) then
  455. begin
  456. location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  457. { Ensure that the whole register is 0, since SETcc only sets the lowest byte }
  458. { If the operands are 64 bits, this XOR routine will be shrunk by the
  459. peephole optimizer. [Kit] }
  460. emit_reg_reg(A_XOR,opsize,location.register,location.register);
  461. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  462. if (cgsize in [OS_64,OS_S64]) then { Cannot use 64-bit constants in CMP }
  463. begin
  464. hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  465. emit_const_reg(A_MOV,opsize,aint(d),hreg2);
  466. emit_reg_reg(A_CMP,opsize,hreg2,hreg1);
  467. end
  468. else
  469. emit_const_reg(A_CMP,opsize,aint(d),hreg1);
  470. { NOTE: SBB and SETAE are both 3 bytes long without the REX prefix,
  471. both use an ALU for their execution and take a single cycle to
  472. run. The only difference is that SETAE does not modify the flags,
  473. allowing for some possible reuse. [Kit] }
  474. { Emit a SETcc instruction that depends on the carry bit being zero,
  475. that is, the numerator is greater than or equal to the denominator. }
  476. tempreg:=cg.makeregsize(current_asmdata.CurrAsmList,location.register,OS_8);
  477. instr:=TAiCpu.op_reg(A_SETcc,S_B,tempreg);
  478. instr.condition:=C_AE;
  479. current_asmdata.CurrAsmList.concat(instr);
  480. cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  481. end
  482. else
  483. begin
  484. calc_divconst_magic_unsigned(resultdef.size*8,d,m,m_add,s);
  485. cg.getcpuregister(current_asmdata.CurrAsmList,rega);
  486. emit_const_reg(A_MOV,opsize,aint(m),rega);
  487. cg.getcpuregister(current_asmdata.CurrAsmList,regd);
  488. emit_reg(A_MUL,opsize,hreg1);
  489. cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);
  490. if m_add then
  491. begin
  492. { addition can overflow, shift first bit considering carry,
  493. then shift remaining bits in regular way. }
  494. emit_reg_reg(A_ADD,opsize,hreg1,regd);
  495. emit_const_reg(A_RCR,opsize,1,regd);
  496. dec(s);
  497. end;
  498. if s<>0 then
  499. emit_const_reg(A_SHR,opsize,aint(s),regd);
  500. cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);
  501. location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  502. cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,regd,location.register)
  503. end;
  504. end;
  505. end;
  506. end
  507. else if (nodetype=modn) and (right.nodetype=ordconstn) and not(is_signed(left.resultdef)) then
  508. begin
  509. { unsigned modulus by a (+/-)power-of-2 constant? }
  510. if isabspowerof2(tordconstnode(right).value,power) then
  511. begin
  512. emit_const_reg(A_AND,opsize,(aint(1) shl power)-1,hreg1);
  513. location.register:=hreg1;
  514. end
  515. else
  516. begin
  517. d:=tordconstnode(right).value.svalue;
  518. if d>=aword(1) shl (left.resultdef.size*8-1) then
  519. begin
  520. if not (CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) then
  521. goto DefaultDiv;
  522. location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  523. hreg3:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  524. m := aword(-aint(d)); { Two's complement of d }
  525. if (cgsize in [OS_64,OS_S64]) then { Cannot use 64-bit constants in CMP }
  526. begin
  527. hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  528. emit_const_reg(A_MOV,opsize,aint(d),hreg2);
  529. emit_const_reg(A_MOV,opsize,aint(m),hreg3);
  530. emit_reg_reg(A_XOR,opsize,location.register,location.register);
  531. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  532. emit_reg_reg(A_CMP,opsize,hreg2,hreg1);
  533. end
  534. else
  535. begin
  536. emit_const_reg(A_MOV,opsize,aint(m),hreg3);
  537. emit_reg_reg(A_XOR,opsize,location.register,location.register);
  538. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  539. emit_const_reg(A_CMP,opsize,aint(d),hreg1);
  540. end;
  541. { Emit conditional move that depends on the carry flag being zero,
  542. that is, the comparison result is above or equal }
  543. instr:=TAiCpu.op_reg_reg(A_CMOVcc,opsize,hreg3,location.register);
  544. instr.condition := C_AE;
  545. current_asmdata.CurrAsmList.concat(instr);
  546. cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  547. emit_reg_reg(A_ADD,opsize,hreg1,location.register);
  548. end
  549. else
  550. begin
  551. { Convert the division to a multiplication }
  552. calc_divconst_magic_unsigned(resultdef.size*8,d,m,m_add,s);
  553. cg.getcpuregister(current_asmdata.CurrAsmList,rega);
  554. emit_const_reg(A_MOV,opsize,aint(m),rega);
  555. cg.getcpuregister(current_asmdata.CurrAsmList,regd);
  556. emit_reg(A_MUL,opsize,hreg1);
  557. cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);
  558. hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  559. emit_reg_reg(A_MOV,opsize,hreg1,hreg2);
  560. if m_add then
  561. begin
  562. { addition can overflow, shift first bit considering carry,
  563. then shift remaining bits in regular way. }
  564. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  565. emit_reg_reg(A_ADD,opsize,hreg1,regd);
  566. emit_const_reg(A_RCR,opsize,1,regd);
  567. cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  568. dec(s);
  569. end;
  570. if s<>0 then
  571. emit_const_reg(A_SHR,opsize,aint(s),regd); { R/EDX now contains the quotient }
  572. { Now multiply the quotient by the original denominator and
  573. subtract the product from the original numerator to get
  574. the remainder. }
  575. if (cgsize in [OS_64,OS_S64]) then { Cannot use 64-bit constants in IMUL }
  576. begin
  577. hreg3:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  578. emit_const_reg(A_MOV,opsize,aint(d),hreg3);
  579. emit_reg_reg(A_IMUL,opsize,hreg3,regd);
  580. end
  581. else
  582. emit_const_reg(A_IMUL,opsize,aint(d),regd);
  583. emit_reg_reg(A_SUB,opsize,regd,hreg2);
  584. cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);
  585. location.register:=hreg2;
  586. end;
  587. end;
  588. end
  589. else if (nodetype=modn) and (right.nodetype=ordconstn) and (is_signed(left.resultdef)) and isabspowerof2(tordconstnode(right).value,power) then
  590. begin
  591. hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  592. if power=1 then
  593. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHR,cgsize,resultdef.size*8-power,hreg1,hreg2)
  594. else
  595. begin
  596. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SAR,cgsize,resultdef.size*8-1,hreg1,hreg2);
  597. cg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_SHR,cgsize,resultdef.size*8-power,hreg2,hreg2);
  598. end;
  599. emit_reg_reg(A_ADD,opsize,hreg1,hreg2);
  600. emit_const_reg(A_AND,opsize,not((aint(1) shl power)-1),hreg2);
  601. emit_reg_reg(A_SUB,opsize,hreg2,hreg1);
  602. location.register:=hreg1;
  603. end
  604. else
  605. begin
  606. DefaultDiv:
  607. {Bring denominator to a register.}
  608. cg.getcpuregister(current_asmdata.CurrAsmList,rega);
  609. emit_reg_reg(A_MOV,opsize,hreg1,rega);
  610. cg.getcpuregister(current_asmdata.CurrAsmList,regd);
  611. {Sign extension depends on the left type.}
  612. if is_signed(left.resultdef) then
  613. case left.resultdef.size of
  614. {$ifdef x86_64}
  615. 8:
  616. emit_none(A_CQO,S_NO);
  617. {$endif x86_64}
  618. 4:
  619. emit_none(A_CDQ,S_NO);
  620. else
  621. internalerror(2013102701);
  622. end
  623. else
  624. emit_reg_reg(A_XOR,opsize,regd,regd);
  625. { Division depends on the result type }
  626. if is_signed(resultdef) then
  627. op:=A_IDIV
  628. else
  629. op:=A_DIV;
  630. if right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
  631. emit_ref(op,opsize,right.location.reference)
  632. else if right.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
  633. emit_reg(op,opsize,right.location.register)
  634. else
  635. begin
  636. hreg1:=cg.getintregister(current_asmdata.CurrAsmList,right.location.size);
  637. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,right.resultdef,right.location,hreg1);
  638. emit_reg(op,opsize,hreg1);
  639. end;
  640. { Copy the result into a new register. Release R/EAX & R/EDX.}
  641. cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);
  642. cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);
  643. location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  644. if nodetype=divn then
  645. cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,rega,location.register)
  646. else
  647. cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,regd,location.register);
  648. end;
  649. end;
  650. {$ifdef SUPPORT_MMX}
  651. procedure tx86shlshrnode.second_mmx;
  652. var
  653. op : TAsmOp;
  654. mmxbase : tmmxtype;
  655. hregister : tregister;
  656. begin
  657. secondpass(left);
  658. if codegenerror then
  659. exit;
  660. secondpass(right);
  661. if codegenerror then
  662. exit;
  663. op:=A_NOP;
  664. mmxbase:=mmx_type(left.resultdef);
  665. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  666. case nodetype of
  667. shrn :
  668. case mmxbase of
  669. mmxs16bit,mmxu16bit,mmxfixed16:
  670. op:=A_PSRLW;
  671. mmxs32bit,mmxu32bit:
  672. op:=A_PSRLD;
  673. mmxs64bit,mmxu64bit:
  674. op:=A_PSRLQ;
  675. else
  676. Internalerror(2018022504);
  677. end;
  678. shln :
  679. case mmxbase of
  680. mmxs16bit,mmxu16bit,mmxfixed16:
  681. op:=A_PSLLW;
  682. mmxs32bit,mmxu32bit:
  683. op:=A_PSLLD;
  684. mmxs64bit,mmxu64bit:
  685. op:=A_PSLLD;
  686. else
  687. Internalerror(2018022503);
  688. end;
  689. else
  690. internalerror(2018022502);
  691. end;
  692. { left and right no register? }
  693. { then one must be demanded }
  694. if (left.location.loc<>LOC_MMXREGISTER) then
  695. begin
  696. { register variable ? }
  697. if (left.location.loc=LOC_CMMXREGISTER) then
  698. begin
  699. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  700. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  701. end
  702. else
  703. begin
  704. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  705. internalerror(2018022505);
  706. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  707. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  708. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  709. end;
  710. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  711. left.location.register:=hregister;
  712. end;
  713. { at this point, left.location.loc should be LOC_MMXREGISTER }
  714. case right.location.loc of
  715. LOC_MMXREGISTER,LOC_CMMXREGISTER:
  716. begin
  717. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  718. location.register:=left.location.register;
  719. end;
  720. LOC_CONSTANT:
  721. emit_const_reg(op,S_NO,right.location.value,left.location.register);
  722. LOC_REFERENCE,LOC_CREFERENCE:
  723. begin
  724. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  725. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  726. end;
  727. else
  728. internalerror(2018022506);
  729. end;
  730. location.register:=left.location.register;
  731. location_freetemp(current_asmdata.CurrAsmList,right.location);
  732. end;
  733. {$endif SUPPORT_MMX}
  734. end.