nx86mat.pas 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. Generate x86 code for math nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86mat;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,ncgmat;
  22. type
  23. tx86unaryminusnode = class(tcgunaryminusnode)
  24. {$ifdef SUPPORT_MMX}
  25. procedure second_mmx;override;
  26. {$endif SUPPORT_MMX}
  27. procedure second_float;override;
  28. function pass_1:tnode;override;
  29. end;
  30. tx86notnode = class(tcgnotnode)
  31. procedure second_boolean;override;
  32. {$ifdef SUPPORT_MMX}
  33. procedure second_mmx;override;
  34. {$endif SUPPORT_MMX}
  35. end;
  36. tx86moddivnode = class(tcgmoddivnode)
  37. procedure pass_generate_code;override;
  38. end;
  39. implementation
  40. uses
  41. globtype,
  42. constexp,
  43. cutils,verbose,globals,
  44. symconst,symdef,
  45. aasmbase,aasmtai,aasmcpu,aasmdata,defutil,
  46. cgbase,pass_1,pass_2,
  47. ncon,
  48. cpubase,cpuinfo,
  49. cga,cgobj,hlcgobj,cgx86,cgutils;
  50. {*****************************************************************************
  51. TI386UNARYMINUSNODE
  52. *****************************************************************************}
  53. function tx86unaryminusnode.pass_1 : tnode;
  54. begin
  55. result:=nil;
  56. firstpass(left);
  57. if codegenerror then
  58. exit;
  59. if (left.resultdef.typ=floatdef) then
  60. begin
  61. if use_vectorfpu(left.resultdef) then
  62. expectloc:=LOC_MMREGISTER
  63. else
  64. expectloc:=LOC_FPUREGISTER;
  65. end
  66. {$ifdef SUPPORT_MMX}
  67. else
  68. if (cs_mmx in current_settings.localswitches) and
  69. is_mmx_able_array(left.resultdef) then
  70. begin
  71. expectloc:=LOC_MMXREGISTER;
  72. end
  73. {$endif SUPPORT_MMX}
  74. else
  75. inherited pass_1;
  76. end;
  77. {$ifdef SUPPORT_MMX}
  78. procedure tx86unaryminusnode.second_mmx;
  79. var
  80. op : tasmop;
  81. hreg : tregister;
  82. begin
  83. op:=A_NONE;
  84. secondpass(left);
  85. location_reset(location,LOC_MMXREGISTER,OS_NO);
  86. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  87. emit_reg_reg(A_PXOR,S_NO,hreg,hreg);
  88. case left.location.loc of
  89. LOC_MMXREGISTER:
  90. begin
  91. location.register:=left.location.register;
  92. end;
  93. LOC_CMMXREGISTER:
  94. begin
  95. location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  96. emit_reg_reg(A_MOVQ,S_NO,left.location.register,location.register);
  97. end;
  98. LOC_REFERENCE,
  99. LOC_CREFERENCE:
  100. begin
  101. location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  102. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,location.register);
  103. end;
  104. else
  105. internalerror(200203225);
  106. end;
  107. if cs_mmx_saturation in current_settings.localswitches then
  108. case mmx_type(resultdef) of
  109. mmxs8bit:
  110. op:=A_PSUBSB;
  111. mmxu8bit:
  112. op:=A_PSUBUSB;
  113. mmxs16bit,mmxfixed16:
  114. op:=A_PSUBSW;
  115. mmxu16bit:
  116. op:=A_PSUBUSW;
  117. end
  118. else
  119. case mmx_type(resultdef) of
  120. mmxs8bit,mmxu8bit:
  121. op:=A_PSUBB;
  122. mmxs16bit,mmxu16bit,mmxfixed16:
  123. op:=A_PSUBW;
  124. mmxs32bit,mmxu32bit:
  125. op:=A_PSUBD;
  126. end;
  127. if op = A_NONE then
  128. internalerror(201408202);
  129. emit_reg_reg(op,S_NO,location.register,hreg);
  130. emit_reg_reg(A_MOVQ,S_NO,hreg,location.register);
  131. end;
  132. {$endif SUPPORT_MMX}
  133. procedure tx86unaryminusnode.second_float;
  134. var
  135. reg : tregister;
  136. href : treference;
  137. l1 : tasmlabel;
  138. begin
  139. secondpass(left);
  140. if expectloc=LOC_MMREGISTER then
  141. begin
  142. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  143. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  144. { make life of register allocator easier }
  145. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
  146. current_asmdata.getglobaldatalabel(l1);
  147. new_section(current_asmdata.asmlists[al_typedconsts],sec_rodata_norel,l1.name,const_align(sizeof(pint)));
  148. current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(l1));
  149. case def_cgsize(resultdef) of
  150. OS_F32:
  151. current_asmdata.asmlists[al_typedconsts].concat(tai_const.create_32bit(longint(1 shl 31)));
  152. OS_F64:
  153. begin
  154. current_asmdata.asmlists[al_typedconsts].concat(tai_const.create_32bit(0));
  155. current_asmdata.asmlists[al_typedconsts].concat(tai_const.create_32bit(-(1 shl 31)));
  156. end
  157. else
  158. internalerror(2004110215);
  159. end;
  160. reference_reset_symbol(href,l1,0,resultdef.alignment,[]);
  161. if UseAVX then
  162. cg.a_opmm_ref_reg_reg(current_asmdata.CurrAsmList,OP_XOR,left.location.size,href,left.location.register,location.register,nil)
  163. else
  164. begin
  165. reg:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
  166. cg.a_loadmm_ref_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),def_cgsize(resultdef),href,reg,mms_movescalar);
  167. cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),def_cgsize(resultdef),left.location.register,location.register,mms_movescalar);
  168. cg.a_opmm_reg_reg(current_asmdata.CurrAsmList,OP_XOR,left.location.size,reg,location.register,nil);
  169. end;
  170. end
  171. else
  172. begin
  173. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  174. case left.location.loc of
  175. LOC_REFERENCE,
  176. LOC_CREFERENCE:
  177. begin
  178. location.register:=NR_ST;
  179. cg.a_loadfpu_ref_reg(current_asmdata.CurrAsmList,
  180. left.location.size,location.size,
  181. left.location.reference,location.register);
  182. emit_none(A_FCHS,S_NO);
  183. end;
  184. LOC_FPUREGISTER,
  185. LOC_CFPUREGISTER:
  186. begin
  187. { "load st,st" is ignored by the code generator }
  188. cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmList,left.location.size,location.size,left.location.register,NR_ST);
  189. location.register:=NR_ST;
  190. emit_none(A_FCHS,S_NO);
  191. end;
  192. else
  193. internalerror(200312241);
  194. end;
  195. end;
  196. end;
  197. {*****************************************************************************
  198. TX86NOTNODE
  199. *****************************************************************************}
  200. procedure tx86notnode.second_boolean;
  201. var
  202. opsize : tcgsize;
  203. {$if defined(cpu32bitalu) or defined(cpu16bitalu)}
  204. hreg: tregister;
  205. {$endif}
  206. begin
  207. opsize:=def_cgsize(resultdef);
  208. if not handle_locjump then
  209. begin
  210. { the second pass could change the location of left }
  211. { if it is a register variable, so we've to do }
  212. { this before the case statement }
  213. secondpass(left);
  214. case left.location.loc of
  215. LOC_FLAGS :
  216. begin
  217. location_reset(location,LOC_FLAGS,OS_NO);
  218. location.resflags:=left.location.resflags;
  219. inverse_flags(location.resflags);
  220. end;
  221. LOC_CREFERENCE,
  222. LOC_REFERENCE:
  223. begin
  224. {$if defined(cpu32bitalu)}
  225. if is_64bit(resultdef) then
  226. begin
  227. hreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_32);
  228. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  229. cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_32,OS_32,left.location.reference,hreg);
  230. inc(left.location.reference.offset,4);
  231. cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_32,left.location.reference,hreg);
  232. end
  233. else
  234. {$elseif defined(cpu16bitalu)}
  235. if is_64bit(resultdef) then
  236. begin
  237. hreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_16);
  238. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  239. cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_16,OS_16,left.location.reference,hreg);
  240. inc(left.location.reference.offset,2);
  241. cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
  242. inc(left.location.reference.offset,2);
  243. cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
  244. inc(left.location.reference.offset,2);
  245. cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
  246. end
  247. else if is_32bit(resultdef) then
  248. begin
  249. hreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_16);
  250. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  251. cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_16,OS_16,left.location.reference,hreg);
  252. inc(left.location.reference.offset,2);
  253. cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
  254. end
  255. else
  256. {$endif}
  257. emit_const_ref(A_CMP, TCGSize2Opsize[opsize], 0, left.location.reference);
  258. location_reset(location,LOC_FLAGS,OS_NO);
  259. location.resflags:=F_E;
  260. end;
  261. LOC_CONSTANT,
  262. LOC_REGISTER,
  263. LOC_CREGISTER,
  264. LOC_SUBSETREG,
  265. LOC_CSUBSETREG,
  266. LOC_SUBSETREF,
  267. LOC_CSUBSETREF :
  268. begin
  269. {$if defined(cpu32bitalu)}
  270. if is_64bit(resultdef) then
  271. begin
  272. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
  273. emit_reg_reg(A_OR,S_L,left.location.register64.reghi,left.location.register64.reglo);
  274. end
  275. else
  276. {$elseif defined(cpu16bitalu)}
  277. if is_64bit(resultdef) then
  278. begin
  279. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
  280. emit_reg_reg(A_OR,S_W,cg.GetNextReg(left.location.register64.reghi),left.location.register64.reghi);
  281. emit_reg_reg(A_OR,S_W,cg.GetNextReg(left.location.register64.reglo),left.location.register64.reglo);
  282. emit_reg_reg(A_OR,S_W,left.location.register64.reghi,left.location.register64.reglo);
  283. end
  284. else if is_32bit(resultdef) then
  285. begin
  286. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
  287. emit_reg_reg(A_OR,S_L,cg.GetNextReg(left.location.register),left.location.register);
  288. end
  289. else
  290. {$endif}
  291. begin
  292. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,true);
  293. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  294. end;
  295. location_reset(location,LOC_FLAGS,OS_NO);
  296. location.resflags:=F_E;
  297. end;
  298. else
  299. internalerror(200203224);
  300. end;
  301. end;
  302. end;
  303. {$ifdef SUPPORT_MMX}
  304. procedure tx86notnode.second_mmx;
  305. var hreg,r:Tregister;
  306. begin
  307. secondpass(left);
  308. location_reset(location,LOC_MMXREGISTER,OS_NO);
  309. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  310. emit_const_reg(A_MOV,S_L,longint($ffffffff),r);
  311. { load operand }
  312. case left.location.loc of
  313. LOC_MMXREGISTER:
  314. location_copy(location,left.location);
  315. LOC_CMMXREGISTER:
  316. begin
  317. location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  318. emit_reg_reg(A_MOVQ,S_NO,left.location.register,location.register);
  319. end;
  320. LOC_REFERENCE,
  321. LOC_CREFERENCE:
  322. begin
  323. location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  324. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,location.register);
  325. end;
  326. end;
  327. { load mask }
  328. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  329. emit_reg_reg(A_MOVD,S_NO,r,hreg);
  330. { lower 32 bit }
  331. emit_reg_reg(A_PXOR,S_NO,hreg,location.register);
  332. { shift mask }
  333. emit_const_reg(A_PSLLQ,S_B,32,hreg);
  334. { higher 32 bit }
  335. emit_reg_reg(A_PXOR,S_NO,hreg,location.register);
  336. end;
  337. {$endif SUPPORT_MMX}
  338. {*****************************************************************************
  339. TX86MODDIVNODE
  340. *****************************************************************************}
  341. procedure tx86moddivnode.pass_generate_code;
  342. var
  343. hreg1,hreg2,hreg3,rega,regd,tempreg:Tregister;
  344. power:longint;
  345. instr:TAiCpu;
  346. op:Tasmop;
  347. cgsize:TCgSize;
  348. opsize:topsize;
  349. e, sm: aint;
  350. d,m: aword;
  351. m_add, invertsign: boolean;
  352. s: byte;
  353. label
  354. DefaultDiv;
  355. begin
  356. secondpass(left);
  357. if codegenerror then
  358. exit;
  359. secondpass(right);
  360. if codegenerror then
  361. exit;
  362. { put numerator in register }
  363. cgsize:=def_cgsize(resultdef);
  364. opsize:=TCGSize2OpSize[cgsize];
  365. if not (cgsize in [OS_32,OS_S32,OS_64,OS_S64]) then
  366. InternalError(2013102702);
  367. rega:=newreg(R_INTREGISTER,RS_EAX,cgsize2subreg(R_INTREGISTER,cgsize));
  368. regd:=newreg(R_INTREGISTER,RS_EDX,cgsize2subreg(R_INTREGISTER,cgsize));
  369. location_reset(location,LOC_REGISTER,cgsize);
  370. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
  371. hreg1:=left.location.register;
  372. if (nodetype=divn) and (right.nodetype=ordconstn) then
  373. begin
  374. if isabspowerof2(tordconstnode(right).value,power) then
  375. begin
  376. { for signed numbers, the numerator must be adjusted before the
  377. shift instruction, but not with unsigned numbers! Otherwise,
  378. "Cardinal($ffffffff) div 16" overflows! (JM) }
  379. if is_signed(left.resultdef) Then
  380. begin
  381. invertsign:=tordconstnode(right).value<0;
  382. { use a sequence without jumps, saw this in
  383. comp.compilers (JM) }
  384. { no jumps, but more operations }
  385. hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  386. emit_reg_reg(A_MOV,opsize,hreg1,hreg2);
  387. if power=1 then
  388. begin
  389. {If the left value is negative, hreg2=(1 shl power)-1=1, otherwise 0.}
  390. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,cgsize,resultdef.size*8-1,hreg2);
  391. end
  392. else
  393. begin
  394. {If the left value is negative, hreg2=$ffffffff, otherwise 0.}
  395. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,cgsize,resultdef.size*8-1,hreg2);
  396. {If negative, hreg2=(1 shl power)-1, otherwise 0.}
  397. { (don't use emit_const_reg, because if value>high(longint)
  398. then it must first be loaded into a register) }
  399. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,cgsize,(aint(1) shl power)-1,hreg2);
  400. end;
  401. { add to the left value }
  402. emit_reg_reg(A_ADD,opsize,hreg2,hreg1);
  403. { do the shift }
  404. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,cgsize,power,hreg1);
  405. if invertsign then
  406. emit_reg(A_NEG,opsize,hreg1);
  407. end
  408. else
  409. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,cgsize,power,hreg1);
  410. location.register:=hreg1;
  411. end
  412. else
  413. begin
  414. if is_signed(left.resultdef) then
  415. begin
  416. e:=tordconstnode(right).value.svalue;
  417. calc_divconst_magic_signed(resultdef.size*8,e,sm,s);
  418. cg.getcpuregister(current_asmdata.CurrAsmList,rega);
  419. emit_const_reg(A_MOV,opsize,sm,rega);
  420. cg.getcpuregister(current_asmdata.CurrAsmList,regd);
  421. emit_reg(A_IMUL,opsize,hreg1);
  422. { only the high half of result is used }
  423. cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);
  424. { add or subtract dividend }
  425. if (e>0) and (sm<0) then
  426. emit_reg_reg(A_ADD,opsize,hreg1,regd)
  427. else if (e<0) and (sm>0) then
  428. emit_reg_reg(A_SUB,opsize,hreg1,regd);
  429. { shift if necessary }
  430. if (s<>0) then
  431. emit_const_reg(A_SAR,opsize,s,regd);
  432. { extract and add the sign bit }
  433. if (e<0) then
  434. emit_reg_reg(A_MOV,opsize,regd,hreg1);
  435. { if e>=0, hreg1 still contains dividend }
  436. emit_const_reg(A_SHR,opsize,left.resultdef.size*8-1,hreg1);
  437. emit_reg_reg(A_ADD,opsize,hreg1,regd);
  438. cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);
  439. location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  440. cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,regd,location.register)
  441. end
  442. else
  443. begin
  444. d:=tordconstnode(right).value.svalue;
  445. if d>=aword(1) shl (left.resultdef.size*8-1) then
  446. begin
  447. location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  448. { Ensure that the whole register is 0, since SETcc only sets the lowest byte }
  449. if opsize = S_Q then
  450. begin
  451. { Emit an XOR instruction that only operates on the lower 32 bits,
  452. since we want to initialise this register to zero, the upper 32
  453. bits will be set to zero regardless, and the resultant machine code
  454. will usually be smaller due to the lack of a REX prefix. [Kit] }
  455. tempreg := location.register;
  456. setsubreg(tempreg, R_SUBD);
  457. emit_reg_reg(A_XOR, S_L, tempreg, tempreg);
  458. end
  459. else
  460. emit_reg_reg(A_XOR,opsize,location.register,location.register);
  461. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  462. if (cgsize in [OS_64,OS_S64]) then { Cannot use 64-bit constants in CMP }
  463. begin
  464. hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  465. emit_const_reg(A_MOV,opsize,aint(d),hreg2);
  466. emit_reg_reg(A_CMP,opsize,hreg2,hreg1);
  467. end
  468. else
  469. emit_const_reg(A_CMP,opsize,aint(d),hreg1);
  470. { NOTE: SBB and SETAE are both 3 bytes long without the REX prefix,
  471. both use an ALU for their execution and take a single cycle to
  472. run. The only difference is that SETAE does not modify the flags,
  473. allowing for some possible reuse. [Kit] }
  474. {$ifdef x86_64}
  475. { Emit a SETcc instruction that depends on the carry bit being zero,
  476. that is, the numerator is greater than or equal to the denominator. }
  477. tempreg := location.register;
  478. setsubreg(tempreg, R_SUBL);
  479. { On x86-64, all registers can have their lower 8 bits represented }
  480. instr:=TAiCpu.op_reg(A_SETcc,S_B,tempreg);
  481. instr.condition := C_AE;
  482. current_asmdata.CurrAsmList.concat(instr);
  483. {$else}
  484. case getsupreg(location.register) of
  485. { On x86, only these four registers can have their lower 8 bits represented }
  486. RS_EAX, RS_ECX, RS_EDX, RS_EBX:
  487. begin
  488. { Emit a SETcc instruction that depends on the carry bit being zero,
  489. that is, the numerator is greater than or equal to the denominator. }
  490. tempreg := location.register;
  491. setsubreg(tempreg, R_SUBL);
  492. instr:=TAiCpu.op_reg(A_SETcc,S_B,tempreg);
  493. instr.condition := C_AE;
  494. current_asmdata.CurrAsmList.concat(instr);
  495. end;
  496. else
  497. { It will likely emit SBB anyway because location.register is
  498. usually imaginary. [Kit] }
  499. emit_const_reg(A_SBB,opsize,-1,location.register);
  500. end;
  501. {$endif}
  502. cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  503. end
  504. else
  505. begin
  506. calc_divconst_magic_unsigned(resultdef.size*8,d,m,m_add,s);
  507. cg.getcpuregister(current_asmdata.CurrAsmList,rega);
  508. emit_const_reg(A_MOV,opsize,aint(m),rega);
  509. cg.getcpuregister(current_asmdata.CurrAsmList,regd);
  510. emit_reg(A_MUL,opsize,hreg1);
  511. cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);
  512. if m_add then
  513. begin
  514. { addition can overflow, shift first bit considering carry,
  515. then shift remaining bits in regular way. }
  516. emit_reg_reg(A_ADD,opsize,hreg1,regd);
  517. emit_const_reg(A_RCR,opsize,1,regd);
  518. dec(s);
  519. end;
  520. if s<>0 then
  521. emit_const_reg(A_SHR,opsize,aint(s),regd);
  522. cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);
  523. location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  524. cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,regd,location.register)
  525. end;
  526. end;
  527. end;
  528. end
  529. else if (nodetype=modn) and (right.nodetype=ordconstn) and not(is_signed(left.resultdef)) then
  530. begin
  531. { unsigned modulus by a (+/-)power-of-2 constant? }
  532. if isabspowerof2(tordconstnode(right).value,power) then
  533. begin
  534. emit_const_reg(A_AND,opsize,(aint(1) shl power)-1,hreg1);
  535. location.register:=hreg1;
  536. end
  537. else
  538. begin
  539. d:=tordconstnode(right).value.svalue;
  540. if d>=aword(1) shl (left.resultdef.size*8-1) then
  541. begin
  542. if not (CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) then
  543. goto DefaultDiv;
  544. location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  545. hreg3:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  546. m := aword(-aint(d)); { Two's complement of d }
  547. if (cgsize in [OS_64,OS_S64]) then { Cannot use 64-bit constants in CMP }
  548. begin
  549. hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  550. emit_const_reg(A_MOV,opsize,aint(d),hreg2);
  551. emit_const_reg(A_MOV,opsize,aint(m),hreg3);
  552. emit_reg_reg(A_XOR,opsize,location.register,location.register);
  553. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  554. emit_reg_reg(A_CMP,opsize,hreg2,hreg1);
  555. end
  556. else
  557. begin
  558. emit_const_reg(A_MOV,opsize,aint(m),hreg3);
  559. emit_reg_reg(A_XOR,opsize,location.register,location.register);
  560. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  561. emit_const_reg(A_CMP,opsize,aint(d),hreg1);
  562. end;
  563. { Emit conditional move that depends on the carry flag being zero,
  564. that is, the comparison result is above or equal }
  565. instr:=TAiCpu.op_reg_reg(A_CMOVcc,opsize,hreg3,location.register);
  566. instr.condition := C_AE;
  567. current_asmdata.CurrAsmList.concat(instr);
  568. cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  569. emit_reg_reg(A_ADD,opsize,hreg1,location.register);
  570. end
  571. else
  572. begin
  573. { Convert the division to a multiplication }
  574. calc_divconst_magic_unsigned(resultdef.size*8,d,m,m_add,s);
  575. cg.getcpuregister(current_asmdata.CurrAsmList,rega);
  576. emit_const_reg(A_MOV,opsize,aint(m),rega);
  577. cg.getcpuregister(current_asmdata.CurrAsmList,regd);
  578. emit_reg(A_MUL,opsize,hreg1);
  579. cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);
  580. hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  581. emit_reg_reg(A_MOV,opsize,hreg1,hreg2);
  582. if m_add then
  583. begin
  584. { addition can overflow, shift first bit considering carry,
  585. then shift remaining bits in regular way. }
  586. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  587. emit_reg_reg(A_ADD,opsize,hreg1,regd);
  588. emit_const_reg(A_RCR,opsize,1,regd);
  589. cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  590. dec(s);
  591. end;
  592. if s<>0 then
  593. emit_const_reg(A_SHR,opsize,aint(s),regd); { R/EDX now contains the quotient }
  594. { Now multiply the quotient by the original denominator and
  595. subtract the product from the original numerator to get
  596. the remainder. }
  597. if (cgsize in [OS_64,OS_S64]) then { Cannot use 64-bit constants in IMUL }
  598. begin
  599. hreg3:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  600. emit_const_reg(A_MOV,opsize,aint(d),hreg3);
  601. emit_reg_reg(A_IMUL,opsize,hreg3,regd);
  602. end
  603. else
  604. emit_const_reg(A_IMUL,opsize,aint(d),regd);
  605. emit_reg_reg(A_SUB,opsize,regd,hreg2);
  606. cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);
  607. location.register:=hreg2;
  608. end;
  609. end;
  610. end
  611. else
  612. begin
  613. DefaultDiv:
  614. {Bring denominator to a register.}
  615. cg.getcpuregister(current_asmdata.CurrAsmList,rega);
  616. emit_reg_reg(A_MOV,opsize,hreg1,rega);
  617. cg.getcpuregister(current_asmdata.CurrAsmList,regd);
  618. {Sign extension depends on the left type.}
  619. if is_signed(left.resultdef) then
  620. case left.resultdef.size of
  621. {$ifdef x86_64}
  622. 8:
  623. emit_none(A_CQO,S_NO);
  624. {$endif x86_64}
  625. 4:
  626. emit_none(A_CDQ,S_NO);
  627. else
  628. internalerror(2013102701);
  629. end
  630. else
  631. emit_reg_reg(A_XOR,opsize,regd,regd);
  632. { Division depends on the result type }
  633. if is_signed(resultdef) then
  634. op:=A_IDIV
  635. else
  636. op:=A_DIV;
  637. if right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
  638. emit_ref(op,opsize,right.location.reference)
  639. else if right.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
  640. emit_reg(op,opsize,right.location.register)
  641. else
  642. begin
  643. hreg1:=cg.getintregister(current_asmdata.CurrAsmList,right.location.size);
  644. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,right.resultdef,right.location,hreg1);
  645. emit_reg(op,opsize,hreg1);
  646. end;
  647. { Copy the result into a new register. Release R/EAX & R/EDX.}
  648. cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);
  649. cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);
  650. location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  651. if nodetype=divn then
  652. cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,rega,location.register)
  653. else
  654. cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,regd,location.register);
  655. end;
  656. end;
  657. end.