n386add.pas 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Code generation for add nodes on the i386
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit n386add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,nadd,cpubase,nx86add;
  22. type
  23. ti386addnode = class(tx86addnode)
  24. {$ifdef SUPPORT_MMX}
  25. procedure second_addmmxset;override;
  26. procedure second_addmmx;override;
  27. {$endif SUPPORT_MMX}
  28. procedure second_add64bit;override;
  29. procedure second_cmp64bit;override;
  30. procedure second_mul;override;
  31. end;
  32. implementation
  33. uses
  34. globtype,systems,
  35. cutils,verbose,globals,
  36. symconst,symdef,paramgr,
  37. aasmbase,aasmtai,aasmcpu,
  38. cgbase,
  39. ncon,nset,cgutils,tgobj,
  40. cga,ncgutil,cgobj,cg64f32;
  41. {*****************************************************************************
  42. addmmxset
  43. *****************************************************************************}
  44. {$ifdef SUPPORT_MMX}
  45. procedure ti386addnode.second_addmmxset;
  46. var opsize : TCGSize;
  47. op : TAsmOp;
  48. cmpop,
  49. pushedfpu,
  50. noswap : boolean;
  51. begin
  52. pass_left_and_right(pushedfpu);
  53. cmpop:=false;
  54. noswap:=false;
  55. opsize:=OS_32;
  56. case nodetype of
  57. addn:
  58. begin
  59. { are we adding set elements ? }
  60. if right.nodetype=setelementn then
  61. begin
  62. { adding elements is not commutative }
  63. { if nf_swaped in flags then
  64. swapleftright;}
  65. { bts requires both elements to be registers }
  66. { location_force_reg(exprasmlist,left.location,opsize_2_cgsize[opsize],false);
  67. location_force_reg(exprasmlist,right.location,opsize_2_cgsize[opsize],true);
  68. op:=A_BTS;
  69. noswap:=true;}
  70. end
  71. else
  72. op:=A_POR;
  73. end;
  74. symdifn :
  75. op:=A_PXOR;
  76. muln:
  77. op:=A_PAND;
  78. subn:
  79. op:=A_PANDN;
  80. equaln,
  81. unequaln :
  82. begin
  83. op:=A_PCMPEQD;
  84. cmpop:=true;
  85. end;
  86. lten,gten:
  87. begin
  88. if (not(nf_swaped in flags) and (nodetype = lten)) or
  89. ((nf_swaped in flags) and (nodetype = gten)) then
  90. swapleftright;
  91. location_force_reg(exprasmlist,left.location,opsize,true);
  92. emit_op_right_left(A_AND,TCGSize2Opsize[opsize]);
  93. op:=A_PCMPEQD;
  94. cmpop:=true;
  95. { warning: ugly hack, we need a JE so change the node to equaln }
  96. nodetype:=equaln;
  97. end;
  98. xorn :
  99. op:=A_PXOR;
  100. orn :
  101. op:=A_POR;
  102. andn :
  103. op:=A_PAND;
  104. else
  105. internalerror(2003042215);
  106. end;
  107. { left must be a register }
  108. left_must_be_reg(opsize,noswap);
  109. { emit_generic_code(op,opsize,true,extra_not,false);}
  110. location_freetemp(exprasmlist,right.location);
  111. if cmpop then
  112. location_freetemp(exprasmlist,left.location);
  113. set_result_location(cmpop,true);
  114. end;
  115. {$endif SUPPORT_MMX}
  116. {*****************************************************************************
  117. Add64bit
  118. *****************************************************************************}
  119. procedure ti386addnode.second_add64bit;
  120. var
  121. op : TOpCG;
  122. op1,op2 : TAsmOp;
  123. opsize : TOpSize;
  124. hregister,
  125. hregister2 : tregister;
  126. hl4 : tasmlabel;
  127. mboverflow,
  128. unsigned:boolean;
  129. r:Tregister;
  130. begin
  131. firstcomplex(self);
  132. pass_left_right;
  133. op1:=A_NONE;
  134. op2:=A_NONE;
  135. mboverflow:=false;
  136. opsize:=S_L;
  137. unsigned:=((left.resulttype.def.deftype=orddef) and
  138. (torddef(left.resulttype.def).typ=u64bit)) or
  139. ((right.resulttype.def.deftype=orddef) and
  140. (torddef(right.resulttype.def).typ=u64bit));
  141. case nodetype of
  142. addn :
  143. begin
  144. op:=OP_ADD;
  145. mboverflow:=true;
  146. end;
  147. subn :
  148. begin
  149. op:=OP_SUB;
  150. op1:=A_SUB;
  151. op2:=A_SBB;
  152. mboverflow:=true;
  153. end;
  154. xorn:
  155. op:=OP_XOR;
  156. orn:
  157. op:=OP_OR;
  158. andn:
  159. op:=OP_AND;
  160. else
  161. begin
  162. { everything should be handled in pass_1 (JM) }
  163. internalerror(200109051);
  164. end;
  165. end;
  166. { left and right no register? }
  167. { then one must be demanded }
  168. if (left.location.loc<>LOC_REGISTER) then
  169. begin
  170. if (right.location.loc<>LOC_REGISTER) then
  171. begin
  172. hregister:=cg.getintregister(exprasmlist,OS_INT);
  173. hregister2:=cg.getintregister(exprasmlist,OS_INT);
  174. cg64.a_load64_loc_reg(exprasmlist,left.location,joinreg64(hregister,hregister2));
  175. location_reset(left.location,LOC_REGISTER,OS_64);
  176. left.location.register64.reglo:=hregister;
  177. left.location.register64.reghi:=hregister2;
  178. end
  179. else
  180. begin
  181. location_swap(left.location,right.location);
  182. toggleflag(nf_swaped);
  183. end;
  184. end;
  185. { at this point, left.location.loc should be LOC_REGISTER }
  186. if right.location.loc=LOC_REGISTER then
  187. begin
  188. { when swapped another result register }
  189. if (nodetype=subn) and (nf_swaped in flags) then
  190. begin
  191. cg64.a_op64_reg_reg(exprasmlist,op,location.size,
  192. left.location.register64,
  193. right.location.register64);
  194. location_swap(left.location,right.location);
  195. toggleflag(nf_swaped);
  196. end
  197. else
  198. begin
  199. cg64.a_op64_reg_reg(exprasmlist,op,location.size,
  200. right.location.register64,
  201. left.location.register64);
  202. end;
  203. end
  204. else
  205. begin
  206. { right.location<>LOC_REGISTER }
  207. if (nodetype=subn) and (nf_swaped in flags) then
  208. begin
  209. r:=cg.getintregister(exprasmlist,OS_INT);
  210. cg64.a_load64low_loc_reg(exprasmlist,right.location,r);
  211. emit_reg_reg(op1,opsize,left.location.register64.reglo,r);
  212. emit_reg_reg(A_MOV,opsize,r,left.location.register64.reglo);
  213. cg64.a_load64high_loc_reg(exprasmlist,right.location,r);
  214. { the carry flag is still ok }
  215. emit_reg_reg(op2,opsize,left.location.register64.reghi,r);
  216. emit_reg_reg(A_MOV,opsize,r,left.location.register64.reghi);
  217. end
  218. else
  219. begin
  220. cg64.a_op64_loc_reg(exprasmlist,op,location.size,right.location,
  221. left.location.register64);
  222. end;
  223. location_freetemp(exprasmlist,right.location);
  224. end;
  225. { only in case of overflow operations }
  226. { produce overflow code }
  227. { we must put it here directly, because sign of operation }
  228. { is in unsigned VAR!! }
  229. if mboverflow then
  230. begin
  231. if cs_check_overflow in aktlocalswitches then
  232. begin
  233. objectlibrary.getlabel(hl4);
  234. if unsigned then
  235. cg.a_jmp_flags(exprasmlist,F_AE,hl4)
  236. else
  237. cg.a_jmp_flags(exprasmlist,F_NO,hl4);
  238. cg.a_call_name(exprasmlist,'FPC_OVERFLOW');
  239. cg.a_label(exprasmlist,hl4);
  240. end;
  241. end;
  242. location_copy(location,left.location);
  243. end;
  244. procedure ti386addnode.second_cmp64bit;
  245. var
  246. hregister,
  247. hregister2 : tregister;
  248. href : treference;
  249. unsigned : boolean;
  250. procedure firstjmp64bitcmp;
  251. var
  252. oldnodetype : tnodetype;
  253. begin
  254. {$ifdef OLDREGVARS}
  255. load_all_regvars(exprasmlist);
  256. {$endif OLDREGVARS}
  257. { the jump the sequence is a little bit hairy }
  258. case nodetype of
  259. ltn,gtn:
  260. begin
  261. cg.a_jmp_flags(exprasmlist,getresflags(unsigned),truelabel);
  262. { cheat a little bit for the negative test }
  263. toggleflag(nf_swaped);
  264. cg.a_jmp_flags(exprasmlist,getresflags(unsigned),falselabel);
  265. toggleflag(nf_swaped);
  266. end;
  267. lten,gten:
  268. begin
  269. oldnodetype:=nodetype;
  270. if nodetype=lten then
  271. nodetype:=ltn
  272. else
  273. nodetype:=gtn;
  274. cg.a_jmp_flags(exprasmlist,getresflags(unsigned),truelabel);
  275. { cheat for the negative test }
  276. if nodetype=ltn then
  277. nodetype:=gtn
  278. else
  279. nodetype:=ltn;
  280. cg.a_jmp_flags(exprasmlist,getresflags(unsigned),falselabel);
  281. nodetype:=oldnodetype;
  282. end;
  283. equaln:
  284. cg.a_jmp_flags(exprasmlist,F_NE,falselabel);
  285. unequaln:
  286. cg.a_jmp_flags(exprasmlist,F_NE,truelabel);
  287. end;
  288. end;
  289. procedure secondjmp64bitcmp;
  290. begin
  291. { the jump the sequence is a little bit hairy }
  292. case nodetype of
  293. ltn,gtn,lten,gten:
  294. begin
  295. { the comparisaion of the low dword have to be }
  296. { always unsigned! }
  297. cg.a_jmp_flags(exprasmlist,getresflags(true),truelabel);
  298. cg.a_jmp_always(exprasmlist,falselabel);
  299. end;
  300. equaln:
  301. begin
  302. cg.a_jmp_flags(exprasmlist,F_NE,falselabel);
  303. cg.a_jmp_always(exprasmlist,truelabel);
  304. end;
  305. unequaln:
  306. begin
  307. cg.a_jmp_flags(exprasmlist,F_NE,truelabel);
  308. cg.a_jmp_always(exprasmlist,falselabel);
  309. end;
  310. end;
  311. end;
  312. begin
  313. firstcomplex(self);
  314. pass_left_right;
  315. unsigned:=((left.resulttype.def.deftype=orddef) and
  316. (torddef(left.resulttype.def).typ=u64bit)) or
  317. ((right.resulttype.def.deftype=orddef) and
  318. (torddef(right.resulttype.def).typ=u64bit));
  319. { left and right no register? }
  320. { then one must be demanded }
  321. if (left.location.loc<>LOC_REGISTER) then
  322. begin
  323. if (right.location.loc<>LOC_REGISTER) then
  324. begin
  325. { we can reuse a CREGISTER for comparison }
  326. if (left.location.loc<>LOC_CREGISTER) then
  327. begin
  328. hregister:=cg.getintregister(exprasmlist,OS_INT);
  329. hregister2:=cg.getintregister(exprasmlist,OS_INT);
  330. cg64.a_load64_loc_reg(exprasmlist,left.location,joinreg64(hregister,hregister2));
  331. location_reset(left.location,LOC_REGISTER,OS_64);
  332. left.location.register64.reglo:=hregister;
  333. left.location.register64.reghi:=hregister2;
  334. end;
  335. end
  336. else
  337. begin
  338. location_swap(left.location,right.location);
  339. toggleflag(nf_swaped);
  340. end;
  341. end;
  342. { at this point, left.location.loc should be LOC_REGISTER }
  343. if right.location.loc=LOC_REGISTER then
  344. begin
  345. emit_reg_reg(A_CMP,S_L,right.location.register64.reghi,left.location.register64.reghi);
  346. firstjmp64bitcmp;
  347. emit_reg_reg(A_CMP,S_L,right.location.register64.reglo,left.location.register64.reglo);
  348. secondjmp64bitcmp;
  349. end
  350. else
  351. begin
  352. case right.location.loc of
  353. LOC_CREGISTER :
  354. begin
  355. emit_reg_reg(A_CMP,S_L,right.location.register64.reghi,left.location.register64.reghi);
  356. firstjmp64bitcmp;
  357. emit_reg_reg(A_CMP,S_L,right.location.register64.reglo,left.location.register64.reglo);
  358. secondjmp64bitcmp;
  359. end;
  360. LOC_CREFERENCE,
  361. LOC_REFERENCE :
  362. begin
  363. href:=right.location.reference;
  364. inc(href.offset,4);
  365. emit_ref_reg(A_CMP,S_L,href,left.location.register64.reghi);
  366. firstjmp64bitcmp;
  367. emit_ref_reg(A_CMP,S_L,right.location.reference,left.location.register64.reglo);
  368. secondjmp64bitcmp;
  369. cg.a_jmp_always(exprasmlist,falselabel);
  370. location_freetemp(exprasmlist,right.location);
  371. end;
  372. LOC_CONSTANT :
  373. begin
  374. exprasmlist.concat(taicpu.op_const_reg(A_CMP,S_L,aint(hi(right.location.value64)),left.location.register64.reghi));
  375. firstjmp64bitcmp;
  376. exprasmlist.concat(taicpu.op_const_reg(A_CMP,S_L,aint(lo(right.location.value64)),left.location.register64.reglo));
  377. secondjmp64bitcmp;
  378. end;
  379. else
  380. internalerror(200203282);
  381. end;
  382. end;
  383. location_freetemp(exprasmlist,left.location);
  384. { we have LOC_JUMP as result }
  385. location_reset(location,LOC_JUMP,OS_NO)
  386. end;
  387. {*****************************************************************************
  388. AddMMX
  389. *****************************************************************************}
  390. {$ifdef SUPPORT_MMX}
  391. procedure ti386addnode.second_addmmx;
  392. var
  393. op : TAsmOp;
  394. pushedfpu,
  395. cmpop : boolean;
  396. mmxbase : tmmxtype;
  397. hreg,
  398. hregister : tregister;
  399. begin
  400. pass_left_and_right(pushedfpu);
  401. cmpop:=false;
  402. mmxbase:=mmx_type(left.resulttype.def);
  403. case nodetype of
  404. addn :
  405. begin
  406. if (cs_mmx_saturation in aktlocalswitches) then
  407. begin
  408. case mmxbase of
  409. mmxs8bit:
  410. op:=A_PADDSB;
  411. mmxu8bit:
  412. op:=A_PADDUSB;
  413. mmxs16bit,mmxfixed16:
  414. op:=A_PADDSB;
  415. mmxu16bit:
  416. op:=A_PADDUSW;
  417. end;
  418. end
  419. else
  420. begin
  421. case mmxbase of
  422. mmxs8bit,mmxu8bit:
  423. op:=A_PADDB;
  424. mmxs16bit,mmxu16bit,mmxfixed16:
  425. op:=A_PADDW;
  426. mmxs32bit,mmxu32bit:
  427. op:=A_PADDD;
  428. end;
  429. end;
  430. end;
  431. muln :
  432. begin
  433. case mmxbase of
  434. mmxs16bit,mmxu16bit:
  435. op:=A_PMULLW;
  436. mmxfixed16:
  437. op:=A_PMULHW;
  438. end;
  439. end;
  440. subn :
  441. begin
  442. if (cs_mmx_saturation in aktlocalswitches) then
  443. begin
  444. case mmxbase of
  445. mmxs8bit:
  446. op:=A_PSUBSB;
  447. mmxu8bit:
  448. op:=A_PSUBUSB;
  449. mmxs16bit,mmxfixed16:
  450. op:=A_PSUBSB;
  451. mmxu16bit:
  452. op:=A_PSUBUSW;
  453. end;
  454. end
  455. else
  456. begin
  457. case mmxbase of
  458. mmxs8bit,mmxu8bit:
  459. op:=A_PSUBB;
  460. mmxs16bit,mmxu16bit,mmxfixed16:
  461. op:=A_PSUBW;
  462. mmxs32bit,mmxu32bit:
  463. op:=A_PSUBD;
  464. end;
  465. end;
  466. end;
  467. xorn:
  468. op:=A_PXOR;
  469. orn:
  470. op:=A_POR;
  471. andn:
  472. op:=A_PAND;
  473. else
  474. internalerror(2003042214);
  475. end;
  476. { left and right no register? }
  477. { then one must be demanded }
  478. if (left.location.loc<>LOC_MMXREGISTER) then
  479. begin
  480. if (right.location.loc=LOC_MMXREGISTER) then
  481. begin
  482. location_swap(left.location,right.location);
  483. toggleflag(nf_swaped);
  484. end
  485. else
  486. begin
  487. { register variable ? }
  488. if (left.location.loc=LOC_CMMXREGISTER) then
  489. begin
  490. hregister:=cg.getmmxregister(exprasmlist,OS_M64);
  491. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  492. end
  493. else
  494. begin
  495. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  496. internalerror(200203245);
  497. hregister:=cg.getmmxregister(exprasmlist,OS_M64);
  498. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  499. end;
  500. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  501. left.location.register:=hregister;
  502. end;
  503. end;
  504. { at this point, left.location.loc should be LOC_MMXREGISTER }
  505. if right.location.loc<>LOC_MMXREGISTER then
  506. begin
  507. if (nodetype=subn) and (nf_swaped in flags) then
  508. begin
  509. if right.location.loc=LOC_CMMXREGISTER then
  510. begin
  511. hreg:=cg.getmmxregister(exprasmlist,OS_M64);
  512. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  513. emit_reg_reg(op,S_NO,left.location.register,hreg);
  514. cg.ungetregister(exprasmlist,hreg);
  515. emit_reg_reg(A_MOVQ,S_NO,hreg,left.location.register);
  516. end
  517. else
  518. begin
  519. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  520. internalerror(200203247);
  521. hreg:=cg.getmmxregister(exprasmlist,OS_M64);
  522. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  523. emit_reg_reg(op,S_NO,left.location.register,hreg);
  524. cg.ungetregister(exprasmlist,hreg);
  525. emit_reg_reg(A_MOVQ,S_NO,hreg,left.location.register);
  526. end;
  527. end
  528. else
  529. begin
  530. if (right.location.loc=LOC_CMMXREGISTER) then
  531. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  532. else
  533. begin
  534. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  535. internalerror(200203246);
  536. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  537. end;
  538. end;
  539. end
  540. else
  541. begin
  542. { right.location=LOC_MMXREGISTER }
  543. if (nodetype=subn) and (nf_swaped in flags) then
  544. begin
  545. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  546. location_swap(left.location,right.location);
  547. toggleflag(nf_swaped);
  548. end
  549. else
  550. begin
  551. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  552. end;
  553. end;
  554. location_freetemp(exprasmlist,right.location);
  555. if cmpop then
  556. location_freetemp(exprasmlist,left.location);
  557. set_result_location(cmpop,true);
  558. end;
  559. {$endif SUPPORT_MMX}
  560. {*****************************************************************************
  561. x86 MUL
  562. *****************************************************************************}
  563. procedure ti386addnode.second_mul;
  564. var r:Tregister;
  565. hl4 : tasmlabel;
  566. begin
  567. {The location.register will be filled in later (JM)}
  568. location_reset(location,LOC_REGISTER,OS_INT);
  569. {Get a temp register and load the left value into it
  570. and free the location.}
  571. r:=cg.getintregister(exprasmlist,OS_INT);
  572. cg.a_load_loc_reg(exprasmlist,OS_INT,left.location,r);
  573. {Allocate EAX.}
  574. cg.getcpuregister(exprasmlist,NR_EAX);
  575. {Load the right value.}
  576. cg.a_load_loc_reg(exprasmlist,OS_INT,right.location,NR_EAX);
  577. {Also allocate EDX, since it is also modified by a mul (JM).}
  578. cg.getcpuregister(exprasmlist,NR_EDX);
  579. emit_reg(A_MUL,S_L,r);
  580. if cs_check_overflow in aktlocalswitches then
  581. begin
  582. objectlibrary.getlabel(hl4);
  583. cg.a_jmp_flags(exprasmlist,F_AE,hl4);
  584. cg.a_call_name(exprasmlist,'FPC_OVERFLOW');
  585. cg.a_label(exprasmlist,hl4);
  586. end;
  587. {Free EAX,EDX}
  588. cg.ungetcpuregister(exprasmlist,NR_EDX);
  589. cg.ungetcpuregister(exprasmlist,NR_EAX);
  590. {Allocate a new register and store the result in EAX in it.}
  591. location.register:=cg.getintregister(exprasmlist,OS_INT);
  592. cg.a_load_reg_reg(exprasmlist,OS_INT,OS_INT,NR_EAX,location.register);
  593. location_freetemp(exprasmlist,left.location);
  594. location_freetemp(exprasmlist,right.location);
  595. end;
  596. begin
  597. caddnode:=ti386addnode;
  598. end.