n386add.pas 41 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193
  1. {
  2. $Id$
  3. Copyright (c) 2000-2002 by Florian Klaempfl
  4. Code generation for add nodes on the i386
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit n386add;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. node,nadd,cpubase,nx86add;
  23. type
  24. ti386addnode = class(tx86addnode)
  25. procedure second_addboolean;override;
  26. procedure second_addsmallset;override;
  27. procedure second_addmmxset;override;
  28. procedure second_mul;override;
  29. {$ifdef SUPPORT_MMX}
  30. procedure second_addmmx;override;
  31. {$endif SUPPORT_MMX}
  32. procedure second_add64bit;override;
  33. end;
  34. implementation
  35. uses
  36. globtype,systems,
  37. cutils,verbose,globals,
  38. symconst,symdef,paramgr,
  39. aasmbase,aasmtai,aasmcpu,defutil,htypechk,
  40. cgbase,pass_2,regvars,
  41. ncon,nset,
  42. cga,cgx86,ncgutil,cgobj,cg64f32;
  43. {*****************************************************************************
  44. AddBoolean
  45. *****************************************************************************}
  46. procedure ti386addnode.second_addboolean;
  47. var
  48. op : TAsmOp;
  49. opsize : TCGSize;
  50. cmpop,
  51. isjump : boolean;
  52. otl,ofl : tasmlabel;
  53. begin
  54. { calculate the operator which is more difficult }
  55. firstcomplex(self);
  56. cmpop:=false;
  57. if (torddef(left.resulttype.def).typ=bool8bit) or
  58. (torddef(right.resulttype.def).typ=bool8bit) then
  59. opsize:=OS_8
  60. else
  61. if (torddef(left.resulttype.def).typ=bool16bit) or
  62. (torddef(right.resulttype.def).typ=bool16bit) then
  63. opsize:=OS_16
  64. else
  65. opsize:=OS_32;
  66. if (cs_full_boolean_eval in aktlocalswitches) or
  67. (nodetype in [unequaln,ltn,lten,gtn,gten,equaln,xorn]) then
  68. begin
  69. if left.nodetype in [ordconstn,realconstn] then
  70. swapleftright;
  71. isjump:=(left.expectloc=LOC_JUMP);
  72. if isjump then
  73. begin
  74. otl:=truelabel;
  75. objectlibrary.getlabel(truelabel);
  76. ofl:=falselabel;
  77. objectlibrary.getlabel(falselabel);
  78. end;
  79. secondpass(left);
  80. if left.location.loc in [LOC_FLAGS,LOC_JUMP] then
  81. location_force_reg(exprasmlist,left.location,opsize,false);
  82. if isjump then
  83. begin
  84. truelabel:=otl;
  85. falselabel:=ofl;
  86. end
  87. else if left.location.loc=LOC_JUMP then
  88. internalerror(200310081);
  89. isjump:=(right.expectloc=LOC_JUMP);
  90. if isjump then
  91. begin
  92. otl:=truelabel;
  93. objectlibrary.getlabel(truelabel);
  94. ofl:=falselabel;
  95. objectlibrary.getlabel(falselabel);
  96. end;
  97. secondpass(right);
  98. if right.location.loc in [LOC_FLAGS,LOC_JUMP] then
  99. location_force_reg(exprasmlist,right.location,opsize,false);
  100. if isjump then
  101. begin
  102. truelabel:=otl;
  103. falselabel:=ofl;
  104. end
  105. else if left.location.loc=LOC_JUMP then
  106. internalerror(200310082);
  107. { left must be a register }
  108. left_must_be_reg(opsize,false);
  109. { compare the }
  110. case nodetype of
  111. ltn,lten,gtn,gten,
  112. equaln,unequaln :
  113. begin
  114. op:=A_CMP;
  115. cmpop:=true;
  116. end;
  117. xorn :
  118. op:=A_XOR;
  119. orn :
  120. op:=A_OR;
  121. andn :
  122. op:=A_AND;
  123. else
  124. internalerror(200203247);
  125. end;
  126. emit_op_right_left(op,TCGSize2Opsize[opsize]);
  127. location_freetemp(exprasmlist,right.location);
  128. location_release(exprasmlist,right.location);
  129. if cmpop then
  130. begin
  131. location_freetemp(exprasmlist,left.location);
  132. location_release(exprasmlist,left.location);
  133. end;
  134. set_result_location(cmpop,true);
  135. end
  136. else
  137. begin
  138. case nodetype of
  139. andn,
  140. orn :
  141. begin
  142. location_reset(location,LOC_JUMP,OS_NO);
  143. case nodetype of
  144. andn :
  145. begin
  146. otl:=truelabel;
  147. objectlibrary.getlabel(truelabel);
  148. secondpass(left);
  149. maketojumpbool(exprasmlist,left,lr_load_regvars);
  150. cg.a_label(exprasmlist,truelabel);
  151. truelabel:=otl;
  152. end;
  153. orn :
  154. begin
  155. ofl:=falselabel;
  156. objectlibrary.getlabel(falselabel);
  157. secondpass(left);
  158. maketojumpbool(exprasmlist,left,lr_load_regvars);
  159. cg.a_label(exprasmlist,falselabel);
  160. falselabel:=ofl;
  161. end;
  162. else
  163. internalerror(2003042212);
  164. end;
  165. secondpass(right);
  166. maketojumpbool(exprasmlist,right,lr_load_regvars);
  167. end;
  168. else
  169. internalerror(2003042213);
  170. end;
  171. end;
  172. end;
  173. {*****************************************************************************
  174. AddSmallSet
  175. *****************************************************************************}
  176. procedure ti386addnode.second_addsmallset;
  177. var
  178. opsize : TCGSize;
  179. op : TAsmOp;
  180. cmpop,
  181. pushedfpu,
  182. extra_not,
  183. noswap : boolean;
  184. begin
  185. pass_left_and_right(pushedfpu);
  186. { when a setdef is passed, it has to be a smallset }
  187. if ((left.resulttype.def.deftype=setdef) and
  188. (tsetdef(left.resulttype.def).settype<>smallset)) or
  189. ((right.resulttype.def.deftype=setdef) and
  190. (tsetdef(right.resulttype.def).settype<>smallset)) then
  191. internalerror(200203301);
  192. cmpop:=false;
  193. noswap:=false;
  194. extra_not:=false;
  195. opsize:=OS_32;
  196. case nodetype of
  197. addn :
  198. begin
  199. { this is a really ugly hack!!!!!!!!!! }
  200. { this could be done later using EDI }
  201. { as it is done for subn }
  202. { instead of two registers!!!! }
  203. { adding elements is not commutative }
  204. if (nf_swaped in flags) and (left.nodetype=setelementn) then
  205. swapleftright;
  206. { are we adding set elements ? }
  207. if right.nodetype=setelementn then
  208. begin
  209. { no range support for smallsets! }
  210. if assigned(tsetelementnode(right).right) then
  211. internalerror(43244);
  212. { bts requires both elements to be registers }
  213. location_force_reg(exprasmlist,left.location,opsize,false);
  214. location_force_reg(exprasmlist,right.location,opsize,true);
  215. op:=A_BTS;
  216. noswap:=true;
  217. end
  218. else
  219. op:=A_OR;
  220. end;
  221. symdifn :
  222. op:=A_XOR;
  223. muln :
  224. op:=A_AND;
  225. subn :
  226. begin
  227. op:=A_AND;
  228. if (not(nf_swaped in flags)) and
  229. (right.location.loc=LOC_CONSTANT) then
  230. right.location.value := not(right.location.value)
  231. else if (nf_swaped in flags) and
  232. (left.location.loc=LOC_CONSTANT) then
  233. left.location.value := not(left.location.value)
  234. else
  235. extra_not:=true;
  236. end;
  237. equaln,
  238. unequaln :
  239. begin
  240. op:=A_CMP;
  241. cmpop:=true;
  242. end;
  243. lten,gten:
  244. begin
  245. if (not(nf_swaped in flags) and (nodetype = lten)) or
  246. ((nf_swaped in flags) and (nodetype = gten)) then
  247. swapleftright;
  248. location_force_reg(exprasmlist,left.location,opsize,true);
  249. emit_op_right_left(A_AND,TCGSize2Opsize[opsize]);
  250. op:=A_CMP;
  251. cmpop:=true;
  252. { warning: ugly hack, we need a JE so change the node to equaln }
  253. nodetype:=equaln;
  254. end;
  255. xorn :
  256. op:=A_XOR;
  257. orn :
  258. op:=A_OR;
  259. andn :
  260. op:=A_AND;
  261. else
  262. internalerror(2003042215);
  263. end;
  264. { left must be a register }
  265. left_must_be_reg(opsize,noswap);
  266. emit_generic_code(op,opsize,true,extra_not,false);
  267. location_freetemp(exprasmlist,right.location);
  268. location_release(exprasmlist,right.location);
  269. if cmpop then
  270. begin
  271. location_freetemp(exprasmlist,left.location);
  272. location_release(exprasmlist,left.location);
  273. end;
  274. set_result_location(cmpop,true);
  275. end;
  276. {*****************************************************************************
  277. addmmxset
  278. *****************************************************************************}
  279. procedure ti386addnode.second_addmmxset;
  280. var opsize : TCGSize;
  281. op : TAsmOp;
  282. cmpop,
  283. pushedfpu,
  284. noswap : boolean;
  285. begin
  286. pass_left_and_right(pushedfpu);
  287. cmpop:=false;
  288. noswap:=false;
  289. opsize:=OS_32;
  290. case nodetype of
  291. addn:
  292. begin
  293. { are we adding set elements ? }
  294. if right.nodetype=setelementn then
  295. begin
  296. { adding elements is not commutative }
  297. { if nf_swaped in flags then
  298. swapleftright;}
  299. { bts requires both elements to be registers }
  300. { location_force_reg(exprasmlist,left.location,opsize_2_cgsize[opsize],false);
  301. location_force_reg(exprasmlist,right.location,opsize_2_cgsize[opsize],true);
  302. op:=A_BTS;
  303. noswap:=true;}
  304. end
  305. else
  306. op:=A_POR;
  307. end;
  308. symdifn :
  309. op:=A_PXOR;
  310. muln:
  311. op:=A_PAND;
  312. subn:
  313. op:=A_PANDN;
  314. equaln,
  315. unequaln :
  316. begin
  317. op:=A_PCMPEQD;
  318. cmpop:=true;
  319. end;
  320. lten,gten:
  321. begin
  322. if (not(nf_swaped in flags) and (nodetype = lten)) or
  323. ((nf_swaped in flags) and (nodetype = gten)) then
  324. swapleftright;
  325. location_force_reg(exprasmlist,left.location,opsize,true);
  326. emit_op_right_left(A_AND,TCGSize2Opsize[opsize]);
  327. op:=A_PCMPEQD;
  328. cmpop:=true;
  329. { warning: ugly hack, we need a JE so change the node to equaln }
  330. nodetype:=equaln;
  331. end;
  332. xorn :
  333. op:=A_PXOR;
  334. orn :
  335. op:=A_POR;
  336. andn :
  337. op:=A_PAND;
  338. else
  339. internalerror(2003042215);
  340. end;
  341. { left must be a register }
  342. left_must_be_reg(opsize,noswap);
  343. { emit_generic_code(op,opsize,true,extra_not,false);}
  344. location_freetemp(exprasmlist,right.location);
  345. location_release(exprasmlist,right.location);
  346. if cmpop then
  347. begin
  348. location_freetemp(exprasmlist,left.location);
  349. location_release(exprasmlist,left.location);
  350. end;
  351. set_result_location(cmpop,true);
  352. end;
  353. {*****************************************************************************
  354. Add64bit
  355. *****************************************************************************}
  356. procedure ti386addnode.second_add64bit;
  357. var
  358. op : TOpCG;
  359. op1,op2 : TAsmOp;
  360. opsize : TOpSize;
  361. hregister,
  362. hregister2 : tregister;
  363. href : treference;
  364. hl4 : tasmlabel;
  365. pushedfpu,
  366. mboverflow,
  367. cmpop,
  368. unsigned:boolean;
  369. r:Tregister;
  370. procedure firstjmp64bitcmp;
  371. var
  372. oldnodetype : tnodetype;
  373. begin
  374. {$ifdef OLDREGVARS}
  375. load_all_regvars(exprasmlist);
  376. {$endif OLDREGVARS}
  377. { the jump the sequence is a little bit hairy }
  378. case nodetype of
  379. ltn,gtn:
  380. begin
  381. cg.a_jmp_flags(exprasmlist,getresflags(unsigned),truelabel);
  382. { cheat a little bit for the negative test }
  383. toggleflag(nf_swaped);
  384. cg.a_jmp_flags(exprasmlist,getresflags(unsigned),falselabel);
  385. toggleflag(nf_swaped);
  386. end;
  387. lten,gten:
  388. begin
  389. oldnodetype:=nodetype;
  390. if nodetype=lten then
  391. nodetype:=ltn
  392. else
  393. nodetype:=gtn;
  394. cg.a_jmp_flags(exprasmlist,getresflags(unsigned),truelabel);
  395. { cheat for the negative test }
  396. if nodetype=ltn then
  397. nodetype:=gtn
  398. else
  399. nodetype:=ltn;
  400. cg.a_jmp_flags(exprasmlist,getresflags(unsigned),falselabel);
  401. nodetype:=oldnodetype;
  402. end;
  403. equaln:
  404. cg.a_jmp_flags(exprasmlist,F_NE,falselabel);
  405. unequaln:
  406. cg.a_jmp_flags(exprasmlist,F_NE,truelabel);
  407. end;
  408. end;
  409. procedure secondjmp64bitcmp;
  410. begin
  411. { the jump the sequence is a little bit hairy }
  412. case nodetype of
  413. ltn,gtn,lten,gten:
  414. begin
  415. { the comparisaion of the low dword have to be }
  416. { always unsigned! }
  417. cg.a_jmp_flags(exprasmlist,getresflags(true),truelabel);
  418. cg.a_jmp_always(exprasmlist,falselabel);
  419. end;
  420. equaln:
  421. begin
  422. cg.a_jmp_flags(exprasmlist,F_NE,falselabel);
  423. cg.a_jmp_always(exprasmlist,truelabel);
  424. end;
  425. unequaln:
  426. begin
  427. cg.a_jmp_flags(exprasmlist,F_NE,truelabel);
  428. cg.a_jmp_always(exprasmlist,falselabel);
  429. end;
  430. end;
  431. end;
  432. begin
  433. firstcomplex(self);
  434. pass_left_and_right(pushedfpu);
  435. op1:=A_NONE;
  436. op2:=A_NONE;
  437. mboverflow:=false;
  438. cmpop:=false;
  439. opsize:=S_L;
  440. unsigned:=((left.resulttype.def.deftype=orddef) and
  441. (torddef(left.resulttype.def).typ=u64bit)) or
  442. ((right.resulttype.def.deftype=orddef) and
  443. (torddef(right.resulttype.def).typ=u64bit));
  444. case nodetype of
  445. addn :
  446. begin
  447. op:=OP_ADD;
  448. mboverflow:=true;
  449. end;
  450. subn :
  451. begin
  452. op:=OP_SUB;
  453. op1:=A_SUB;
  454. op2:=A_SBB;
  455. mboverflow:=true;
  456. end;
  457. ltn,lten,
  458. gtn,gten,
  459. equaln,unequaln:
  460. begin
  461. op:=OP_NONE;
  462. cmpop:=true;
  463. end;
  464. xorn:
  465. op:=OP_XOR;
  466. orn:
  467. op:=OP_OR;
  468. andn:
  469. op:=OP_AND;
  470. else
  471. begin
  472. { everything should be handled in pass_1 (JM) }
  473. internalerror(200109051);
  474. end;
  475. end;
  476. { left and right no register? }
  477. { then one must be demanded }
  478. if (left.location.loc<>LOC_REGISTER) then
  479. begin
  480. if (right.location.loc<>LOC_REGISTER) then
  481. begin
  482. { we can reuse a CREGISTER for comparison }
  483. if not((left.location.loc=LOC_CREGISTER) and cmpop) then
  484. begin
  485. hregister:=cg.getintregister(exprasmlist,OS_INT);
  486. hregister2:=cg.getintregister(exprasmlist,OS_INT);
  487. cg64.a_load64_loc_reg(exprasmlist,left.location,joinreg64(hregister,hregister2));
  488. location_reset(left.location,LOC_REGISTER,OS_64);
  489. left.location.registerlow:=hregister;
  490. left.location.registerhigh:=hregister2;
  491. end;
  492. end
  493. else
  494. begin
  495. location_swap(left.location,right.location);
  496. toggleflag(nf_swaped);
  497. end;
  498. end;
  499. { at this point, left.location.loc should be LOC_REGISTER }
  500. if right.location.loc=LOC_REGISTER then
  501. begin
  502. { when swapped another result register }
  503. if (nodetype=subn) and (nf_swaped in flags) then
  504. begin
  505. cg64.a_op64_reg_reg(exprasmlist,op,
  506. left.location.register64,
  507. right.location.register64);
  508. location_swap(left.location,right.location);
  509. toggleflag(nf_swaped);
  510. end
  511. else if cmpop then
  512. begin
  513. emit_reg_reg(A_CMP,S_L,right.location.registerhigh,left.location.registerhigh);
  514. firstjmp64bitcmp;
  515. emit_reg_reg(A_CMP,S_L,right.location.registerlow,left.location.registerlow);
  516. secondjmp64bitcmp;
  517. end
  518. else
  519. begin
  520. cg64.a_op64_reg_reg(exprasmlist,op,
  521. right.location.register64,
  522. left.location.register64);
  523. end;
  524. location_release(exprasmlist,right.location);
  525. end
  526. else
  527. begin
  528. { right.location<>LOC_REGISTER }
  529. if (nodetype=subn) and (nf_swaped in flags) then
  530. begin
  531. r:=cg.getintregister(exprasmlist,OS_INT);
  532. cg64.a_load64low_loc_reg(exprasmlist,right.location,r);
  533. emit_reg_reg(op1,opsize,left.location.registerlow,r);
  534. emit_reg_reg(A_MOV,opsize,r,left.location.registerlow);
  535. cg64.a_load64high_loc_reg(exprasmlist,right.location,r);
  536. { the carry flag is still ok }
  537. emit_reg_reg(op2,opsize,left.location.registerhigh,r);
  538. emit_reg_reg(A_MOV,opsize,r,left.location.registerhigh);
  539. cg.ungetregister(exprasmlist,r);
  540. if right.location.loc<>LOC_CREGISTER then
  541. begin
  542. location_freetemp(exprasmlist,right.location);
  543. location_release(exprasmlist,right.location);
  544. end;
  545. end
  546. else if cmpop then
  547. begin
  548. case right.location.loc of
  549. LOC_CREGISTER :
  550. begin
  551. emit_reg_reg(A_CMP,S_L,right.location.registerhigh,left.location.registerhigh);
  552. firstjmp64bitcmp;
  553. emit_reg_reg(A_CMP,S_L,right.location.registerlow,left.location.registerlow);
  554. secondjmp64bitcmp;
  555. end;
  556. LOC_CREFERENCE,
  557. LOC_REFERENCE :
  558. begin
  559. href:=right.location.reference;
  560. inc(href.offset,4);
  561. emit_ref_reg(A_CMP,S_L,href,left.location.registerhigh);
  562. firstjmp64bitcmp;
  563. emit_ref_reg(A_CMP,S_L,right.location.reference,left.location.registerlow);
  564. secondjmp64bitcmp;
  565. cg.a_jmp_always(exprasmlist,falselabel);
  566. location_freetemp(exprasmlist,right.location);
  567. location_release(exprasmlist,right.location);
  568. end;
  569. LOC_CONSTANT :
  570. begin
  571. exprasmlist.concat(taicpu.op_const_reg(A_CMP,S_L,hi(right.location.valueqword),left.location.registerhigh));
  572. firstjmp64bitcmp;
  573. exprasmlist.concat(taicpu.op_const_reg(A_CMP,S_L,lo(right.location.valueqword),left.location.registerlow));
  574. secondjmp64bitcmp;
  575. end;
  576. else
  577. internalerror(200203282);
  578. end;
  579. end
  580. else
  581. begin
  582. cg64.a_op64_loc_reg(exprasmlist,op,right.location,
  583. left.location.register64);
  584. if (right.location.loc<>LOC_CREGISTER) then
  585. begin
  586. location_freetemp(exprasmlist,right.location);
  587. location_release(exprasmlist,right.location);
  588. end;
  589. end;
  590. end;
  591. if (left.location.loc<>LOC_CREGISTER) and cmpop then
  592. begin
  593. location_freetemp(exprasmlist,left.location);
  594. location_release(exprasmlist,left.location);
  595. end;
  596. { only in case of overflow operations }
  597. { produce overflow code }
  598. { we must put it here directly, because sign of operation }
  599. { is in unsigned VAR!! }
  600. if mboverflow then
  601. begin
  602. if cs_check_overflow in aktlocalswitches then
  603. begin
  604. objectlibrary.getlabel(hl4);
  605. if unsigned then
  606. cg.a_jmp_flags(exprasmlist,F_AE,hl4)
  607. else
  608. cg.a_jmp_flags(exprasmlist,F_NO,hl4);
  609. cg.a_call_name(exprasmlist,'FPC_OVERFLOW');
  610. cg.a_label(exprasmlist,hl4);
  611. end;
  612. end;
  613. { we have LOC_JUMP as result }
  614. if cmpop then
  615. location_reset(location,LOC_JUMP,OS_NO)
  616. else
  617. location_copy(location,left.location);
  618. end;
  619. {*****************************************************************************
  620. AddMMX
  621. *****************************************************************************}
  622. {$ifdef SUPPORT_MMX}
  623. procedure ti386addnode.second_addmmx;
  624. var
  625. op : TAsmOp;
  626. pushedfpu,
  627. cmpop : boolean;
  628. mmxbase : tmmxtype;
  629. hreg,
  630. hregister : tregister;
  631. begin
  632. pass_left_and_right(pushedfpu);
  633. cmpop:=false;
  634. mmxbase:=mmx_type(left.resulttype.def);
  635. case nodetype of
  636. addn :
  637. begin
  638. if (cs_mmx_saturation in aktlocalswitches) then
  639. begin
  640. case mmxbase of
  641. mmxs8bit:
  642. op:=A_PADDSB;
  643. mmxu8bit:
  644. op:=A_PADDUSB;
  645. mmxs16bit,mmxfixed16:
  646. op:=A_PADDSB;
  647. mmxu16bit:
  648. op:=A_PADDUSW;
  649. end;
  650. end
  651. else
  652. begin
  653. case mmxbase of
  654. mmxs8bit,mmxu8bit:
  655. op:=A_PADDB;
  656. mmxs16bit,mmxu16bit,mmxfixed16:
  657. op:=A_PADDW;
  658. mmxs32bit,mmxu32bit:
  659. op:=A_PADDD;
  660. end;
  661. end;
  662. end;
  663. muln :
  664. begin
  665. case mmxbase of
  666. mmxs16bit,mmxu16bit:
  667. op:=A_PMULLW;
  668. mmxfixed16:
  669. op:=A_PMULHW;
  670. end;
  671. end;
  672. subn :
  673. begin
  674. if (cs_mmx_saturation in aktlocalswitches) then
  675. begin
  676. case mmxbase of
  677. mmxs8bit:
  678. op:=A_PSUBSB;
  679. mmxu8bit:
  680. op:=A_PSUBUSB;
  681. mmxs16bit,mmxfixed16:
  682. op:=A_PSUBSB;
  683. mmxu16bit:
  684. op:=A_PSUBUSW;
  685. end;
  686. end
  687. else
  688. begin
  689. case mmxbase of
  690. mmxs8bit,mmxu8bit:
  691. op:=A_PSUBB;
  692. mmxs16bit,mmxu16bit,mmxfixed16:
  693. op:=A_PSUBW;
  694. mmxs32bit,mmxu32bit:
  695. op:=A_PSUBD;
  696. end;
  697. end;
  698. end;
  699. xorn:
  700. op:=A_PXOR;
  701. orn:
  702. op:=A_POR;
  703. andn:
  704. op:=A_PAND;
  705. else
  706. internalerror(2003042214);
  707. end;
  708. { left and right no register? }
  709. { then one must be demanded }
  710. if (left.location.loc<>LOC_MMXREGISTER) then
  711. begin
  712. if (right.location.loc=LOC_MMXREGISTER) then
  713. begin
  714. location_swap(left.location,right.location);
  715. toggleflag(nf_swaped);
  716. end
  717. else
  718. begin
  719. { register variable ? }
  720. if (left.location.loc=LOC_CMMXREGISTER) then
  721. begin
  722. hregister:=cg.getmmxregister(exprasmlist,OS_M64);
  723. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  724. end
  725. else
  726. begin
  727. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  728. internalerror(200203245);
  729. location_release(exprasmlist,left.location);
  730. hregister:=cg.getmmxregister(exprasmlist,OS_M64);
  731. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  732. end;
  733. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  734. left.location.register:=hregister;
  735. end;
  736. end;
  737. { at this point, left.location.loc should be LOC_MMXREGISTER }
  738. if right.location.loc<>LOC_MMXREGISTER then
  739. begin
  740. if (nodetype=subn) and (nf_swaped in flags) then
  741. begin
  742. if right.location.loc=LOC_CMMXREGISTER then
  743. begin
  744. hreg:=cg.getmmxregister(exprasmlist,OS_M64);
  745. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  746. emit_reg_reg(op,S_NO,left.location.register,hreg);
  747. cg.ungetregister(exprasmlist,hreg);
  748. emit_reg_reg(A_MOVQ,S_NO,hreg,left.location.register);
  749. end
  750. else
  751. begin
  752. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  753. internalerror(200203247);
  754. location_release(exprasmlist,right.location);
  755. hreg:=cg.getmmxregister(exprasmlist,OS_M64);
  756. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  757. emit_reg_reg(op,S_NO,left.location.register,hreg);
  758. cg.ungetregister(exprasmlist,hreg);
  759. emit_reg_reg(A_MOVQ,S_NO,hreg,left.location.register);
  760. end;
  761. end
  762. else
  763. begin
  764. if (right.location.loc=LOC_CMMXREGISTER) then
  765. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  766. else
  767. begin
  768. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  769. internalerror(200203246);
  770. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  771. location_release(exprasmlist,right.location);
  772. end;
  773. end;
  774. end
  775. else
  776. begin
  777. { right.location=LOC_MMXREGISTER }
  778. if (nodetype=subn) and (nf_swaped in flags) then
  779. begin
  780. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  781. location_swap(left.location,right.location);
  782. toggleflag(nf_swaped);
  783. end
  784. else
  785. begin
  786. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  787. end;
  788. end;
  789. location_freetemp(exprasmlist,right.location);
  790. location_release(exprasmlist,right.location);
  791. if cmpop then
  792. begin
  793. location_freetemp(exprasmlist,left.location);
  794. location_release(exprasmlist,left.location);
  795. end;
  796. set_result_location(cmpop,true);
  797. end;
  798. {$endif SUPPORT_MMX}
  799. {*****************************************************************************
  800. MUL
  801. *****************************************************************************}
  802. procedure ti386addnode.second_mul;
  803. var r:Tregister;
  804. hl4 : tasmlabel;
  805. begin
  806. {The location.register will be filled in later (JM)}
  807. location_reset(location,LOC_REGISTER,OS_INT);
  808. {Get a temp register and load the left value into it
  809. and free the location.}
  810. r:=cg.getintregister(exprasmlist,OS_INT);
  811. cg.a_load_loc_reg(exprasmlist,OS_INT,left.location,r);
  812. location_release(exprasmlist,left.location);
  813. {Allocate EAX.}
  814. cg.getexplicitregister(exprasmlist,NR_EAX);
  815. {Load the right value.}
  816. cg.a_load_loc_reg(exprasmlist,OS_INT,right.location,NR_EAX);
  817. location_release(exprasmlist,right.location);
  818. {The mul instruction frees register r.}
  819. cg.ungetregister(exprasmlist,r);
  820. {Also allocate EDX, since it is also modified by a mul (JM).}
  821. cg.getexplicitregister(exprasmlist,NR_EDX);
  822. emit_reg(A_MUL,S_L,r);
  823. if cs_check_overflow in aktlocalswitches then
  824. begin
  825. objectlibrary.getlabel(hl4);
  826. cg.a_jmp_flags(exprasmlist,F_AE,hl4);
  827. cg.a_call_name(exprasmlist,'FPC_OVERFLOW');
  828. cg.a_label(exprasmlist,hl4);
  829. end;
  830. {Free EDX}
  831. cg.ungetregister(exprasmlist,NR_EDX);
  832. {Free EAX}
  833. cg.ungetregister(exprasmlist,NR_EAX);
  834. {Allocate a new register and store the result in EAX in it.}
  835. location.register:=cg.getintregister(exprasmlist,OS_INT);
  836. emit_reg_reg(A_MOV,S_L,NR_EAX,location.register);
  837. location_freetemp(exprasmlist,left.location);
  838. location_freetemp(exprasmlist,right.location);
  839. end;
  840. begin
  841. caddnode:=ti386addnode;
  842. end.
  843. {
  844. $Log$
  845. Revision 1.94 2004-01-20 12:59:37 florian
  846. * common addnode code for x86-64 and i386
  847. Revision 1.93 2004/01/14 17:19:04 peter
  848. * disable addmmxset
  849. Revision 1.92 2003/12/25 01:07:09 florian
  850. + $fputype directive support
  851. + single data type operations with sse unit
  852. * fixed more x86-64 stuff
  853. Revision 1.91 2003/12/24 00:10:02 florian
  854. - delete parameter in cg64 methods removed
  855. Revision 1.90 2003/12/23 22:13:41 peter
  856. * overlfow support in second_mul
  857. Revision 1.89 2003/12/21 11:28:41 daniel
  858. * Some work to allow mmx instructions to be used for 32 byte sets
  859. Revision 1.88 2003/12/06 01:15:23 florian
  860. * reverted Peter's alloctemp patch; hopefully properly
  861. Revision 1.87 2003/12/03 23:13:20 peter
  862. * delayed paraloc allocation, a_param_*() gets extra parameter
  863. if it needs to allocate temp or real paralocation
  864. * optimized/simplified int-real loading
  865. Revision 1.86 2003/10/17 14:38:32 peter
  866. * 64k registers supported
  867. * fixed some memory leaks
  868. Revision 1.85 2003/10/13 09:38:22 florian
  869. * fixed forgotten commit
  870. Revision 1.84 2003/10/13 01:58:03 florian
  871. * some ideas for mm support implemented
  872. Revision 1.83 2003/10/10 17:48:14 peter
  873. * old trgobj moved to x86/rgcpu and renamed to trgx86fpu
  874. * tregisteralloctor renamed to trgobj
  875. * removed rgobj from a lot of units
  876. * moved location_* and reference_* to cgobj
  877. * first things for mmx register allocation
  878. Revision 1.82 2003/10/09 21:31:37 daniel
  879. * Register allocator splitted, ans abstract now
  880. Revision 1.81 2003/10/08 09:13:16 florian
  881. * fixed full bool evalution and bool xor, if the left or right side have LOC_JUMP
  882. Revision 1.80 2003/10/01 20:34:49 peter
  883. * procinfo unit contains tprocinfo
  884. * cginfo renamed to cgbase
  885. * moved cgmessage to verbose
  886. * fixed ppc and sparc compiles
  887. Revision 1.79 2003/09/28 21:48:20 peter
  888. * fix register leaks
  889. Revision 1.78 2003/09/28 13:35:40 peter
  890. * shortstr compare updated for different calling conventions
  891. Revision 1.77 2003/09/10 08:31:48 marco
  892. * Patch from Peter for paraloc
  893. Revision 1.76 2003/09/03 15:55:01 peter
  894. * NEWRA branch merged
  895. Revision 1.75.2.2 2003/08/31 13:50:16 daniel
  896. * Remove sorting and use pregenerated indexes
  897. * Some work on making things compile
  898. Revision 1.75.2.1 2003/08/29 17:29:00 peter
  899. * next batch of updates
  900. Revision 1.75 2003/08/03 20:38:00 daniel
  901. * Made code generator reverse or/add/and/xor/imul instructions when
  902. possible to reduce the slowdown of spills.
  903. Revision 1.74 2003/08/03 20:19:43 daniel
  904. - Removed cmpop from Ti386addnode.second_addstring
  905. Revision 1.73 2003/07/06 15:31:21 daniel
  906. * Fixed register allocator. *Lots* of fixes.
  907. Revision 1.72 2003/06/17 16:51:30 peter
  908. * cycle fixes
  909. Revision 1.71 2003/06/07 18:57:04 jonas
  910. + added freeintparaloc
  911. * ppc get/freeintparaloc now check whether the parameter regs are
  912. properly allocated/deallocated (and get an extra list para)
  913. * ppc a_call_* now internalerrors if pi_do_call is not yet set
  914. * fixed lot of missing pi_do_call's
  915. Revision 1.70 2003/06/03 13:01:59 daniel
  916. * Register allocator finished
  917. Revision 1.69 2003/05/30 23:49:18 jonas
  918. * a_load_loc_reg now has an extra size parameter for the destination
  919. register (properly fixes what I worked around in revision 1.106 of
  920. ncgutil.pas)
  921. Revision 1.68 2003/05/26 19:38:28 peter
  922. * generic fpc_shorstr_concat
  923. + fpc_shortstr_append_shortstr optimization
  924. Revision 1.67 2003/05/22 21:32:29 peter
  925. * removed some unit dependencies
  926. Revision 1.66 2003/04/26 09:12:55 peter
  927. * add string returns in LOC_REFERENCE
  928. Revision 1.65 2003/04/23 20:16:04 peter
  929. + added currency support based on int64
  930. + is_64bit for use in cg units instead of is_64bitint
  931. * removed cgmessage from n386add, replace with internalerrors
  932. Revision 1.64 2003/04/23 09:51:16 daniel
  933. * Removed usage of edi in a lot of places when new register allocator used
  934. + Added newra versions of g_concatcopy and secondadd_float
  935. Revision 1.63 2003/04/22 23:50:23 peter
  936. * firstpass uses expectloc
  937. * checks if there are differences between the expectloc and
  938. location.loc from secondpass in EXTDEBUG
  939. Revision 1.62 2003/04/22 10:09:35 daniel
  940. + Implemented the actual register allocator
  941. + Scratch registers unavailable when new register allocator used
  942. + maybe_save/maybe_restore unavailable when new register allocator used
  943. Revision 1.61 2003/04/17 10:02:48 daniel
  944. * Tweaked register allocate/deallocate positition to less interferences
  945. are generated.
  946. Revision 1.60 2003/03/28 19:16:57 peter
  947. * generic constructor working for i386
  948. * remove fixed self register
  949. * esi added as address register for i386
  950. Revision 1.59 2003/03/13 19:52:23 jonas
  951. * and more new register allocator fixes (in the i386 code generator this
  952. time). At least now the ppc cross compiler can compile the linux
  953. system unit again, but I haven't tested it.
  954. Revision 1.58 2003/03/08 20:36:41 daniel
  955. + Added newra version of Ti386shlshrnode
  956. + Added interference graph construction code
  957. Revision 1.57 2003/03/08 13:59:17 daniel
  958. * Work to handle new register notation in ag386nsm
  959. + Added newra version of Ti386moddivnode
  960. Revision 1.56 2003/03/08 10:53:48 daniel
  961. * Created newra version of secondmul in n386add.pas
  962. Revision 1.55 2003/02/19 22:00:15 daniel
  963. * Code generator converted to new register notation
  964. - Horribily outdated todo.txt removed
  965. Revision 1.54 2003/01/13 18:37:44 daniel
  966. * Work on register conversion
  967. Revision 1.53 2003/01/08 18:43:57 daniel
  968. * Tregister changed into a record
  969. Revision 1.52 2002/11/25 17:43:26 peter
  970. * splitted defbase in defutil,symutil,defcmp
  971. * merged isconvertable and is_equal into compare_defs(_ext)
  972. * made operator search faster by walking the list only once
  973. Revision 1.51 2002/11/15 01:58:56 peter
  974. * merged changes from 1.0.7 up to 04-11
  975. - -V option for generating bug report tracing
  976. - more tracing for option parsing
  977. - errors for cdecl and high()
  978. - win32 import stabs
  979. - win32 records<=8 are returned in eax:edx (turned off by default)
  980. - heaptrc update
  981. - more info for temp management in .s file with EXTDEBUG
  982. Revision 1.50 2002/10/20 13:11:27 jonas
  983. * re-enabled optimized version of comparisons with the empty string that
  984. I accidentally disabled in revision 1.26
  985. Revision 1.49 2002/08/23 16:14:49 peter
  986. * tempgen cleanup
  987. * tt_noreuse temp type added that will be used in genentrycode
  988. Revision 1.48 2002/08/14 18:41:48 jonas
  989. - remove valuelow/valuehigh fields from tlocation, because they depend
  990. on the endianess of the host operating system -> difficult to get
  991. right. Use lo/hi(location.valueqword) instead (remember to use
  992. valueqword and not value!!)
  993. Revision 1.47 2002/08/11 14:32:29 peter
  994. * renamed current_library to objectlibrary
  995. Revision 1.46 2002/08/11 13:24:16 peter
  996. * saving of asmsymbols in ppu supported
  997. * asmsymbollist global is removed and moved into a new class
  998. tasmlibrarydata that will hold the info of a .a file which
  999. corresponds with a single module. Added librarydata to tmodule
  1000. to keep the library info stored for the module. In the future the
  1001. objectfiles will also be stored to the tasmlibrarydata class
  1002. * all getlabel/newasmsymbol and friends are moved to the new class
  1003. Revision 1.45 2002/07/26 11:17:52 jonas
  1004. * the optimization of converting a multiplication with a power of two to
  1005. a shl is moved from n386add/secondpass to nadd/resulttypepass
  1006. Revision 1.44 2002/07/20 11:58:00 florian
  1007. * types.pas renamed to defbase.pas because D6 contains a types
  1008. unit so this would conflicts if D6 programms are compiled
  1009. + Willamette/SSE2 instructions to assembler added
  1010. Revision 1.43 2002/07/11 14:41:32 florian
  1011. * start of the new generic parameter handling
  1012. Revision 1.42 2002/07/07 09:52:33 florian
  1013. * powerpc target fixed, very simple units can be compiled
  1014. * some basic stuff for better callparanode handling, far from being finished
  1015. Revision 1.41 2002/07/01 18:46:31 peter
  1016. * internal linker
  1017. * reorganized aasm layer
  1018. Revision 1.40 2002/07/01 16:23:55 peter
  1019. * cg64 patch
  1020. * basics for currency
  1021. * asnode updates for class and interface (not finished)
  1022. Revision 1.39 2002/05/18 13:34:22 peter
  1023. * readded missing revisions
  1024. Revision 1.38 2002/05/16 19:46:51 carl
  1025. + defines.inc -> fpcdefs.inc to avoid conflicts if compiling by hand
  1026. + try to fix temp allocation (still in ifdef)
  1027. + generic constructor calls
  1028. + start of tassembler / tmodulebase class cleanup
  1029. Revision 1.36 2002/05/13 19:54:37 peter
  1030. * removed n386ld and n386util units
  1031. * maybe_save/maybe_restore added instead of the old maybe_push
  1032. Revision 1.35 2002/05/12 16:53:17 peter
  1033. * moved entry and exitcode to ncgutil and cgobj
  1034. * foreach gets extra argument for passing local data to the
  1035. iterator function
  1036. * -CR checks also class typecasts at runtime by changing them
  1037. into as
  1038. * fixed compiler to cycle with the -CR option
  1039. * fixed stabs with elf writer, finally the global variables can
  1040. be watched
  1041. * removed a lot of routines from cga unit and replaced them by
  1042. calls to cgobj
  1043. * u32bit-s32bit updates for and,or,xor nodes. When one element is
  1044. u32bit then the other is typecasted also to u32bit without giving
  1045. a rangecheck warning/error.
  1046. * fixed pascal calling method with reversing also the high tree in
  1047. the parast, detected by tcalcst3 test
  1048. Revision 1.34 2002/04/25 20:16:40 peter
  1049. * moved more routines from cga/n386util
  1050. Revision 1.33 2002/04/05 15:09:13 jonas
  1051. * fixed web bug 1915
  1052. Revision 1.32 2002/04/04 19:06:10 peter
  1053. * removed unused units
  1054. * use tlocation.size in cg.a_*loc*() routines
  1055. Revision 1.31 2002/04/02 17:11:35 peter
  1056. * tlocation,treference update
  1057. * LOC_CONSTANT added for better constant handling
  1058. * secondadd splitted in multiple routines
  1059. * location_force_reg added for loading a location to a register
  1060. of a specified size
  1061. * secondassignment parses now first the right and then the left node
  1062. (this is compatible with Kylix). This saves a lot of push/pop especially
  1063. with string operations
  1064. * adapted some routines to use the new cg methods
  1065. Revision 1.29 2002/03/04 19:10:13 peter
  1066. * removed compiler warnings
  1067. }