nx86mat.pas 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. Generate x86 code for math nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86mat;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,ncgmat;
  22. type
  23. tx86unaryminusnode = class(tcgunaryminusnode)
  24. {$ifdef SUPPORT_MMX}
  25. procedure second_mmx;override;
  26. {$endif SUPPORT_MMX}
  27. procedure second_float;override;
  28. function pass_1:tnode;override;
  29. end;
  30. tx86notnode = class(tcgnotnode)
  31. procedure second_boolean;override;
  32. {$ifdef SUPPORT_MMX}
  33. procedure second_mmx;override;
  34. {$endif SUPPORT_MMX}
  35. end;
  36. tx86moddivnode = class(tcgmoddivnode)
  37. procedure pass_generate_code;override;
  38. end;
  39. implementation
  40. uses
  41. globtype,
  42. constexp,
  43. cutils,verbose,globals,
  44. symconst,symdef,
  45. aasmbase,aasmtai,aasmcpu,aasmdata,defutil,
  46. cgbase,pass_1,pass_2,
  47. ncon,
  48. cpubase,cpuinfo,
  49. cga,cgobj,hlcgobj,cgx86,cgutils;
  50. {*****************************************************************************
  51. TI386UNARYMINUSNODE
  52. *****************************************************************************}
  53. function tx86unaryminusnode.pass_1 : tnode;
  54. begin
  55. result:=nil;
  56. firstpass(left);
  57. if codegenerror then
  58. exit;
  59. if (left.resultdef.typ=floatdef) then
  60. begin
  61. if use_vectorfpu(left.resultdef) then
  62. expectloc:=LOC_MMREGISTER
  63. else
  64. expectloc:=LOC_FPUREGISTER;
  65. end
  66. {$ifdef SUPPORT_MMX}
  67. else
  68. if (cs_mmx in current_settings.localswitches) and
  69. is_mmx_able_array(left.resultdef) then
  70. begin
  71. expectloc:=LOC_MMXREGISTER;
  72. end
  73. {$endif SUPPORT_MMX}
  74. else
  75. inherited pass_1;
  76. end;
  77. {$ifdef SUPPORT_MMX}
  78. procedure tx86unaryminusnode.second_mmx;
  79. var
  80. op : tasmop;
  81. hreg : tregister;
  82. begin
  83. op:=A_NONE;
  84. secondpass(left);
  85. location_reset(location,LOC_MMXREGISTER,OS_NO);
  86. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  87. emit_reg_reg(A_PXOR,S_NO,hreg,hreg);
  88. case left.location.loc of
  89. LOC_MMXREGISTER:
  90. begin
  91. location.register:=left.location.register;
  92. end;
  93. LOC_CMMXREGISTER:
  94. begin
  95. location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  96. emit_reg_reg(A_MOVQ,S_NO,left.location.register,location.register);
  97. end;
  98. LOC_REFERENCE,
  99. LOC_CREFERENCE:
  100. begin
  101. location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  102. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,location.register);
  103. end;
  104. else
  105. internalerror(200203225);
  106. end;
  107. if cs_mmx_saturation in current_settings.localswitches then
  108. case mmx_type(resultdef) of
  109. mmxs8bit:
  110. op:=A_PSUBSB;
  111. mmxu8bit:
  112. op:=A_PSUBUSB;
  113. mmxs16bit,mmxfixed16:
  114. op:=A_PSUBSW;
  115. mmxu16bit:
  116. op:=A_PSUBUSW;
  117. end
  118. else
  119. case mmx_type(resultdef) of
  120. mmxs8bit,mmxu8bit:
  121. op:=A_PSUBB;
  122. mmxs16bit,mmxu16bit,mmxfixed16:
  123. op:=A_PSUBW;
  124. mmxs32bit,mmxu32bit:
  125. op:=A_PSUBD;
  126. end;
  127. if op = A_NONE then
  128. internalerror(201408202);
  129. emit_reg_reg(op,S_NO,location.register,hreg);
  130. emit_reg_reg(A_MOVQ,S_NO,hreg,location.register);
  131. end;
  132. {$endif SUPPORT_MMX}
  133. procedure tx86unaryminusnode.second_float;
  134. var
  135. reg : tregister;
  136. href : treference;
  137. l1 : tasmlabel;
  138. begin
  139. secondpass(left);
  140. if expectloc=LOC_MMREGISTER then
  141. begin
  142. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  143. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  144. { make life of register allocator easier }
  145. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
  146. current_asmdata.getglobaldatalabel(l1);
  147. new_section(current_asmdata.asmlists[al_typedconsts],sec_rodata_norel,l1.name,const_align(sizeof(pint)));
  148. current_asmdata.asmlists[al_typedconsts].concat(Tai_label.Create(l1));
  149. case def_cgsize(resultdef) of
  150. OS_F32:
  151. current_asmdata.asmlists[al_typedconsts].concat(tai_const.create_32bit(longint(1 shl 31)));
  152. OS_F64:
  153. begin
  154. current_asmdata.asmlists[al_typedconsts].concat(tai_const.create_32bit(0));
  155. current_asmdata.asmlists[al_typedconsts].concat(tai_const.create_32bit(-(1 shl 31)));
  156. end
  157. else
  158. internalerror(2004110215);
  159. end;
  160. reference_reset_symbol(href,l1,0,resultdef.alignment,[]);
  161. if UseAVX then
  162. cg.a_opmm_ref_reg_reg(current_asmdata.CurrAsmList,OP_XOR,left.location.size,href,left.location.register,location.register,nil)
  163. else
  164. begin
  165. reg:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
  166. cg.a_loadmm_ref_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),def_cgsize(resultdef),href,reg,mms_movescalar);
  167. cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,def_cgsize(resultdef),def_cgsize(resultdef),left.location.register,location.register,mms_movescalar);
  168. cg.a_opmm_reg_reg(current_asmdata.CurrAsmList,OP_XOR,left.location.size,reg,location.register,nil);
  169. end;
  170. end
  171. else
  172. begin
  173. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  174. case left.location.loc of
  175. LOC_REFERENCE,
  176. LOC_CREFERENCE:
  177. begin
  178. location.register:=NR_ST;
  179. cg.a_loadfpu_ref_reg(current_asmdata.CurrAsmList,
  180. left.location.size,location.size,
  181. left.location.reference,location.register);
  182. emit_none(A_FCHS,S_NO);
  183. end;
  184. LOC_FPUREGISTER,
  185. LOC_CFPUREGISTER:
  186. begin
  187. { "load st,st" is ignored by the code generator }
  188. cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmList,left.location.size,location.size,left.location.register,NR_ST);
  189. location.register:=NR_ST;
  190. emit_none(A_FCHS,S_NO);
  191. end;
  192. else
  193. internalerror(200312241);
  194. end;
  195. end;
  196. end;
  197. {*****************************************************************************
  198. TX86NOTNODE
  199. *****************************************************************************}
  200. procedure tx86notnode.second_boolean;
  201. var
  202. opsize : tcgsize;
  203. {$if defined(cpu32bitalu) or defined(cpu16bitalu)}
  204. hreg: tregister;
  205. {$endif}
  206. begin
  207. opsize:=def_cgsize(resultdef);
  208. if not handle_locjump then
  209. begin
  210. { the second pass could change the location of left }
  211. { if it is a register variable, so we've to do }
  212. { this before the case statement }
  213. secondpass(left);
  214. case left.location.loc of
  215. LOC_FLAGS :
  216. begin
  217. location_reset(location,LOC_FLAGS,OS_NO);
  218. location.resflags:=left.location.resflags;
  219. inverse_flags(location.resflags);
  220. end;
  221. LOC_CREFERENCE,
  222. LOC_REFERENCE:
  223. begin
  224. {$if defined(cpu32bitalu)}
  225. if is_64bit(resultdef) then
  226. begin
  227. hreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_32);
  228. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  229. cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_32,OS_32,left.location.reference,hreg);
  230. inc(left.location.reference.offset,4);
  231. cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_32,left.location.reference,hreg);
  232. end
  233. else
  234. {$elseif defined(cpu16bitalu)}
  235. if is_64bit(resultdef) then
  236. begin
  237. hreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_16);
  238. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  239. cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_16,OS_16,left.location.reference,hreg);
  240. inc(left.location.reference.offset,2);
  241. cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
  242. inc(left.location.reference.offset,2);
  243. cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
  244. inc(left.location.reference.offset,2);
  245. cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
  246. end
  247. else if is_32bit(resultdef) then
  248. begin
  249. hreg:=cg.GetIntRegister(current_asmdata.CurrAsmList,OS_16);
  250. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  251. cg.a_load_ref_reg(current_asmdata.CurrAsmList,OS_16,OS_16,left.location.reference,hreg);
  252. inc(left.location.reference.offset,2);
  253. cg.a_op_ref_reg(current_asmdata.CurrAsmList,OP_OR,OS_16,left.location.reference,hreg);
  254. end
  255. else
  256. {$endif}
  257. emit_const_ref(A_CMP, TCGSize2Opsize[opsize], 0, left.location.reference);
  258. location_reset(location,LOC_FLAGS,OS_NO);
  259. location.resflags:=F_E;
  260. end;
  261. LOC_CONSTANT,
  262. LOC_REGISTER,
  263. LOC_CREGISTER,
  264. LOC_SUBSETREG,
  265. LOC_CSUBSETREG,
  266. LOC_SUBSETREF,
  267. LOC_CSUBSETREF :
  268. begin
  269. {$if defined(cpu32bitalu)}
  270. if is_64bit(resultdef) then
  271. begin
  272. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
  273. emit_reg_reg(A_OR,S_L,left.location.register64.reghi,left.location.register64.reglo);
  274. end
  275. else
  276. {$elseif defined(cpu16bitalu)}
  277. if is_64bit(resultdef) then
  278. begin
  279. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
  280. emit_reg_reg(A_OR,S_W,cg.GetNextReg(left.location.register64.reghi),left.location.register64.reghi);
  281. emit_reg_reg(A_OR,S_W,cg.GetNextReg(left.location.register64.reglo),left.location.register64.reglo);
  282. emit_reg_reg(A_OR,S_W,left.location.register64.reghi,left.location.register64.reglo);
  283. end
  284. else if is_32bit(resultdef) then
  285. begin
  286. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
  287. emit_reg_reg(A_OR,S_L,cg.GetNextReg(left.location.register),left.location.register);
  288. end
  289. else
  290. {$endif}
  291. begin
  292. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,true);
  293. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  294. end;
  295. location_reset(location,LOC_FLAGS,OS_NO);
  296. location.resflags:=F_E;
  297. end;
  298. else
  299. internalerror(200203224);
  300. end;
  301. end;
  302. end;
  303. {$ifdef SUPPORT_MMX}
  304. procedure tx86notnode.second_mmx;
  305. var hreg,r:Tregister;
  306. begin
  307. secondpass(left);
  308. location_reset(location,LOC_MMXREGISTER,OS_NO);
  309. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_INT);
  310. emit_const_reg(A_MOV,S_L,longint($ffffffff),r);
  311. { load operand }
  312. case left.location.loc of
  313. LOC_MMXREGISTER:
  314. location_copy(location,left.location);
  315. LOC_CMMXREGISTER:
  316. begin
  317. location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  318. emit_reg_reg(A_MOVQ,S_NO,left.location.register,location.register);
  319. end;
  320. LOC_REFERENCE,
  321. LOC_CREFERENCE:
  322. begin
  323. location.register:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  324. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,location.register);
  325. end;
  326. end;
  327. { load mask }
  328. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  329. emit_reg_reg(A_MOVD,S_NO,r,hreg);
  330. { lower 32 bit }
  331. emit_reg_reg(A_PXOR,S_NO,hreg,location.register);
  332. { shift mask }
  333. emit_const_reg(A_PSLLQ,S_B,32,hreg);
  334. { higher 32 bit }
  335. emit_reg_reg(A_PXOR,S_NO,hreg,location.register);
  336. end;
  337. {$endif SUPPORT_MMX}
  338. {*****************************************************************************
  339. TX86MODDIVNODE
  340. *****************************************************************************}
  341. procedure tx86moddivnode.pass_generate_code;
  342. var
  343. hreg1,hreg2,hreg3,rega,regd:Tregister;
  344. power:longint;
  345. instr:TAiCpu;
  346. op:Tasmop;
  347. cgsize:TCgSize;
  348. opsize:topsize;
  349. e, sm: aint;
  350. d,m: aword;
  351. m_add, invertsign: boolean;
  352. s: byte;
  353. label
  354. DefaultDiv;
  355. begin
  356. secondpass(left);
  357. if codegenerror then
  358. exit;
  359. secondpass(right);
  360. if codegenerror then
  361. exit;
  362. { put numerator in register }
  363. cgsize:=def_cgsize(resultdef);
  364. opsize:=TCGSize2OpSize[cgsize];
  365. if not (cgsize in [OS_32,OS_S32,OS_64,OS_S64]) then
  366. InternalError(2013102702);
  367. rega:=newreg(R_INTREGISTER,RS_EAX,cgsize2subreg(R_INTREGISTER,cgsize));
  368. regd:=newreg(R_INTREGISTER,RS_EDX,cgsize2subreg(R_INTREGISTER,cgsize));
  369. location_reset(location,LOC_REGISTER,cgsize);
  370. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,resultdef,false);
  371. hreg1:=left.location.register;
  372. if (nodetype=divn) and (right.nodetype=ordconstn) then
  373. begin
  374. if isabspowerof2(int64(tordconstnode(right).value),power) then
  375. begin
  376. { for signed numbers, the numerator must be adjusted before the
  377. shift instruction, but not wih unsigned numbers! Otherwise,
  378. "Cardinal($ffffffff) div 16" overflows! (JM) }
  379. if is_signed(left.resultdef) Then
  380. begin
  381. invertsign:=tordconstnode(right).value<0;
  382. { use a sequence without jumps, saw this in
  383. comp.compilers (JM) }
  384. { no jumps, but more operations }
  385. hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  386. emit_reg_reg(A_MOV,opsize,hreg1,hreg2);
  387. if power=1 then
  388. begin
  389. {If the left value is negative, hreg2=(1 shl power)-1=1, otherwise 0.}
  390. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,cgsize,resultdef.size*8-1,hreg2);
  391. end
  392. else
  393. begin
  394. {If the left value is negative, hreg2=$ffffffff, otherwise 0.}
  395. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,cgsize,resultdef.size*8-1,hreg2);
  396. {If negative, hreg2=(1 shl power)-1, otherwise 0.}
  397. { (don't use emit_const_reg, because if value>high(longint)
  398. then it must first be loaded into a register) }
  399. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_AND,cgsize,(aint(1) shl power)-1,hreg2);
  400. end;
  401. { add to the left value }
  402. emit_reg_reg(A_ADD,opsize,hreg2,hreg1);
  403. { do the shift }
  404. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,cgsize,power,hreg1);
  405. if invertsign then
  406. emit_reg(A_NEG,opsize,hreg1);
  407. end
  408. else
  409. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SHR,cgsize,power,hreg1);
  410. location.register:=hreg1;
  411. end
  412. else
  413. begin
  414. if is_signed(left.resultdef) then
  415. begin
  416. e:=tordconstnode(right).value.svalue;
  417. calc_divconst_magic_signed(resultdef.size*8,e,sm,s);
  418. cg.getcpuregister(current_asmdata.CurrAsmList,rega);
  419. emit_const_reg(A_MOV,opsize,sm,rega);
  420. cg.getcpuregister(current_asmdata.CurrAsmList,regd);
  421. emit_reg(A_IMUL,opsize,hreg1);
  422. { only the high half of result is used }
  423. cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);
  424. { add or subtract dividend }
  425. if (e>0) and (sm<0) then
  426. emit_reg_reg(A_ADD,opsize,hreg1,regd)
  427. else if (e<0) and (sm>0) then
  428. emit_reg_reg(A_SUB,opsize,hreg1,regd);
  429. { shift if necessary }
  430. if (s<>0) then
  431. emit_const_reg(A_SAR,opsize,s,regd);
  432. { extract and add the sign bit }
  433. if (e<0) then
  434. emit_reg_reg(A_MOV,opsize,regd,hreg1);
  435. { if e>=0, hreg1 still contains dividend }
  436. emit_const_reg(A_SHR,opsize,left.resultdef.size*8-1,hreg1);
  437. emit_reg_reg(A_ADD,opsize,hreg1,regd);
  438. cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);
  439. location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  440. cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,regd,location.register)
  441. end
  442. else
  443. begin
  444. d:=tordconstnode(right).value.svalue;
  445. if d>=aword(1) shl (left.resultdef.size*8-1) then
  446. begin
  447. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  448. if (cgsize in [OS_64,OS_S64]) then
  449. begin
  450. hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  451. emit_const_reg(A_MOV,opsize,aint(d),hreg2);
  452. emit_reg_reg(A_CMP,opsize,hreg2,hreg1);
  453. end
  454. else
  455. emit_const_reg(A_CMP,opsize,aint(d),hreg1);
  456. location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  457. emit_const_reg(A_MOV,opsize,0,location.register);
  458. emit_const_reg(A_SBB,opsize,-1,location.register);
  459. cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  460. end
  461. else
  462. begin
  463. calc_divconst_magic_unsigned(resultdef.size*8,d,m,m_add,s);
  464. cg.getcpuregister(current_asmdata.CurrAsmList,rega);
  465. emit_const_reg(A_MOV,opsize,aint(m),rega);
  466. cg.getcpuregister(current_asmdata.CurrAsmList,regd);
  467. emit_reg(A_MUL,opsize,hreg1);
  468. cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);
  469. if m_add then
  470. begin
  471. { addition can overflow, shift first bit considering carry,
  472. then shift remaining bits in regular way. }
  473. emit_reg_reg(A_ADD,opsize,hreg1,regd);
  474. emit_const_reg(A_RCR,opsize,1,regd);
  475. dec(s);
  476. end;
  477. if s<>0 then
  478. emit_const_reg(A_SHR,opsize,aint(s),regd);
  479. cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);
  480. location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  481. cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,regd,location.register)
  482. end;
  483. end;
  484. end;
  485. end
  486. { unsigned modulus by a (+/-)power-of-2 constant? }
  487. else if (nodetype=modn) and (right.nodetype=ordconstn) and not(is_signed(left.resultdef)) then
  488. begin
  489. if isabspowerof2(tordconstnode(right).value,power) then
  490. begin
  491. emit_const_reg(A_AND,opsize,(aint(1) shl power)-1,hreg1);
  492. location.register:=hreg1;
  493. end
  494. else
  495. begin
  496. d:=tordconstnode(right).value.svalue;
  497. if d>=aword(1) shl (left.resultdef.size*8-1) then
  498. begin
  499. if not (CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) then
  500. goto DefaultDiv;
  501. location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  502. hreg3:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  503. m := aword(-aint(d)); { Two's complement of d }
  504. if (cgsize in [OS_64,OS_S64]) then { Cannot use 64-bit constants in CMP }
  505. begin
  506. hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  507. emit_const_reg(A_MOV,opsize,aint(d),hreg2);
  508. emit_const_reg(A_MOV,opsize,aint(m),hreg3);
  509. emit_reg_reg(A_XOR,opsize,location.register,location.register);
  510. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  511. emit_reg_reg(A_CMP,opsize,hreg2,hreg1);
  512. { Emit conditional move that depends on the carry flag }
  513. instr:=TAiCpu.op_reg_reg(A_CMOVcc,opsize,hreg3,location.register);
  514. instr.condition := C_AE;
  515. current_asmdata.CurrAsmList.concat(instr);
  516. cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  517. end
  518. else
  519. begin
  520. emit_const_reg(A_MOV,opsize,aint(m),hreg3);
  521. emit_reg_reg(A_XOR,opsize,location.register,location.register);
  522. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  523. emit_const_reg(A_CMP,opsize,aint(d),hreg1);
  524. { Emit conditional move that depends on the carry flag }
  525. instr:=TAiCpu.op_reg_reg(A_CMOVcc,opsize,hreg3,location.register);
  526. instr.condition := C_AE;
  527. current_asmdata.CurrAsmList.concat(instr);
  528. cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  529. end;
  530. emit_reg_reg(A_ADD,opsize,hreg1,location.register);
  531. end
  532. else
  533. begin
  534. { Convert the division to a multiplication }
  535. calc_divconst_magic_unsigned(resultdef.size*8,d,m,m_add,s);
  536. cg.getcpuregister(current_asmdata.CurrAsmList,rega);
  537. emit_const_reg(A_MOV,opsize,aint(m),rega);
  538. cg.getcpuregister(current_asmdata.CurrAsmList,regd);
  539. emit_reg(A_MUL,opsize,hreg1);
  540. cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);
  541. hreg2:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  542. emit_reg_reg(A_MOV,opsize,hreg1,hreg2);
  543. if m_add then
  544. begin
  545. { addition can overflow, shift first bit considering carry,
  546. then shift remaining bits in regular way. }
  547. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  548. emit_reg_reg(A_ADD,opsize,hreg1,regd);
  549. emit_const_reg(A_RCR,opsize,1,regd);
  550. cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  551. dec(s);
  552. end;
  553. if s<>0 then
  554. emit_const_reg(A_SHR,opsize,aint(s),regd);
  555. if (cgsize in [OS_64,OS_S64]) then { Cannot use 64-bit constants in IMUL }
  556. begin
  557. hreg3:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  558. emit_const_reg(A_MOV,opsize,aint(d),hreg3);
  559. emit_reg_reg(A_IMUL,opsize,hreg3,regd);
  560. end
  561. else
  562. emit_const_reg(A_IMUL,opsize,aint(d),regd);
  563. emit_reg_reg(A_SUB,opsize,regd,hreg2);
  564. cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);
  565. location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  566. cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,hreg2,location.register)
  567. end;
  568. end;
  569. end
  570. else
  571. begin
  572. DefaultDiv:
  573. {Bring denominator to a register.}
  574. cg.getcpuregister(current_asmdata.CurrAsmList,rega);
  575. emit_reg_reg(A_MOV,opsize,hreg1,rega);
  576. cg.getcpuregister(current_asmdata.CurrAsmList,regd);
  577. {Sign extension depends on the left type.}
  578. if is_signed(left.resultdef) then
  579. case left.resultdef.size of
  580. {$ifdef x86_64}
  581. 8:
  582. emit_none(A_CQO,S_NO);
  583. {$endif x86_64}
  584. 4:
  585. emit_none(A_CDQ,S_NO);
  586. else
  587. internalerror(2013102701);
  588. end
  589. else
  590. emit_reg_reg(A_XOR,opsize,regd,regd);
  591. { Division depends on the result type }
  592. if is_signed(resultdef) then
  593. op:=A_IDIV
  594. else
  595. op:=A_DIV;
  596. if right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE] then
  597. emit_ref(op,opsize,right.location.reference)
  598. else if right.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
  599. emit_reg(op,opsize,right.location.register)
  600. else
  601. begin
  602. hreg1:=cg.getintregister(current_asmdata.CurrAsmList,right.location.size);
  603. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,right.resultdef,right.location,hreg1);
  604. emit_reg(op,opsize,hreg1);
  605. end;
  606. { Copy the result into a new register. Release R/EAX & R/EDX.}
  607. cg.ungetcpuregister(current_asmdata.CurrAsmList,regd);
  608. cg.ungetcpuregister(current_asmdata.CurrAsmList,rega);
  609. location.register:=cg.getintregister(current_asmdata.CurrAsmList,cgsize);
  610. if nodetype=divn then
  611. cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,rega,location.register)
  612. else
  613. cg.a_load_reg_reg(current_asmdata.CurrAsmList,cgsize,cgsize,regd,location.register);
  614. end;
  615. end;
  616. end.