n386add.pas 65 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855
  1. {
  2. $Id$
  3. Copyright (c) 2000-2002 by Florian Klaempfl
  4. Code generation for add nodes on the i386
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit n386add;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. node,nadd,cpubase,nx86add;
  23. type
  24. ti386addnode = class(tx86addnode)
  25. procedure pass_2;override;
  26. protected
  27. function first_addstring : tnode; override;
  28. private
  29. procedure pass_left_and_right(var pushedfpu:boolean);
  30. function getresflags(unsigned : boolean) : tresflags;
  31. procedure left_must_be_reg(opsize:TOpSize;noswap:boolean);
  32. procedure emit_op_right_left(op:TAsmOp;opsize:TOpSize);
  33. procedure emit_generic_code(op:TAsmOp;opsize:TOpSize;unsigned,extra_not,mboverflow:boolean);
  34. procedure set_result_location(cmpop,unsigned:boolean);
  35. procedure second_addstring;
  36. procedure second_addboolean;
  37. procedure second_addfloat;
  38. procedure second_addsmallset;
  39. procedure second_addmmxset;
  40. procedure second_mul;
  41. {$ifdef SUPPORT_MMX}
  42. procedure second_addmmx;
  43. {$endif SUPPORT_MMX}
  44. procedure second_add64bit;
  45. end;
  46. implementation
  47. uses
  48. globtype,systems,
  49. cutils,verbose,globals,
  50. symconst,symdef,paramgr,
  51. aasmbase,aasmtai,aasmcpu,defutil,htypechk,
  52. cgbase,pass_2,regvars,
  53. ncon,nset,
  54. cga,cgx86,ncgutil,cgobj,cg64f32;
  55. {*****************************************************************************
  56. Helpers
  57. *****************************************************************************}
  58. const
  59. opsize_2_cgsize : array[S_B..S_L] of tcgsize = (OS_8,OS_16,OS_32);
  60. procedure ti386addnode.pass_left_and_right(var pushedfpu:boolean);
  61. begin
  62. { calculate the operator which is more difficult }
  63. firstcomplex(self);
  64. { in case of constant put it to the left }
  65. if (left.nodetype=ordconstn) then
  66. swapleftright;
  67. secondpass(left);
  68. { are too few registers free? }
  69. if location.loc=LOC_FPUREGISTER then
  70. pushedfpu:=maybe_pushfpu(exprasmlist,right.registersfpu,left.location)
  71. else
  72. pushedfpu:=false;
  73. secondpass(right);
  74. end;
  75. function ti386addnode.getresflags(unsigned : boolean) : tresflags;
  76. begin
  77. case nodetype of
  78. equaln : getresflags:=F_E;
  79. unequaln : getresflags:=F_NE;
  80. else
  81. if not(unsigned) then
  82. begin
  83. if nf_swaped in flags then
  84. case nodetype of
  85. ltn : getresflags:=F_G;
  86. lten : getresflags:=F_GE;
  87. gtn : getresflags:=F_L;
  88. gten : getresflags:=F_LE;
  89. end
  90. else
  91. case nodetype of
  92. ltn : getresflags:=F_L;
  93. lten : getresflags:=F_LE;
  94. gtn : getresflags:=F_G;
  95. gten : getresflags:=F_GE;
  96. end;
  97. end
  98. else
  99. begin
  100. if nf_swaped in flags then
  101. case nodetype of
  102. ltn : getresflags:=F_A;
  103. lten : getresflags:=F_AE;
  104. gtn : getresflags:=F_B;
  105. gten : getresflags:=F_BE;
  106. end
  107. else
  108. case nodetype of
  109. ltn : getresflags:=F_B;
  110. lten : getresflags:=F_BE;
  111. gtn : getresflags:=F_A;
  112. gten : getresflags:=F_AE;
  113. end;
  114. end;
  115. end;
  116. end;
  117. procedure ti386addnode.left_must_be_reg(opsize:TOpSize;noswap:boolean);
  118. begin
  119. { left location is not a register? }
  120. if (left.location.loc<>LOC_REGISTER) then
  121. begin
  122. { if right is register then we can swap the locations }
  123. if (not noswap) and
  124. (right.location.loc=LOC_REGISTER) then
  125. begin
  126. location_swap(left.location,right.location);
  127. toggleflag(nf_swaped);
  128. end
  129. else
  130. begin
  131. { maybe we can reuse a constant register when the
  132. operation is a comparison that doesn't change the
  133. value of the register }
  134. location_force_reg(exprasmlist,left.location,opsize_2_cgsize[opsize],(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  135. end;
  136. end;
  137. end;
  138. procedure ti386addnode.emit_op_right_left(op:TAsmOp;opsize:TOpsize);
  139. begin
  140. { left must be a register }
  141. case right.location.loc of
  142. LOC_REGISTER,
  143. LOC_CREGISTER :
  144. exprasmlist.concat(taicpu.op_reg_reg(op,opsize,right.location.register,left.location.register));
  145. LOC_REFERENCE,
  146. LOC_CREFERENCE :
  147. exprasmlist.concat(taicpu.op_ref_reg(op,opsize,right.location.reference,left.location.register));
  148. LOC_CONSTANT :
  149. exprasmlist.concat(taicpu.op_const_reg(op,opsize,right.location.value,left.location.register));
  150. else
  151. internalerror(200203232);
  152. end;
  153. end;
  154. procedure ti386addnode.set_result_location(cmpop,unsigned:boolean);
  155. begin
  156. if cmpop then
  157. begin
  158. location_reset(location,LOC_FLAGS,OS_NO);
  159. location.resflags:=getresflags(unsigned);
  160. end
  161. else
  162. location_copy(location,left.location);
  163. end;
  164. procedure ti386addnode.emit_generic_code(op:TAsmOp;opsize:TOpSize;unsigned,extra_not,mboverflow:boolean);
  165. var
  166. power : longint;
  167. hl4 : tasmlabel;
  168. r : Tregister;
  169. begin
  170. { at this point, left.location.loc should be LOC_REGISTER }
  171. if right.location.loc=LOC_REGISTER then
  172. begin
  173. { right.location is a LOC_REGISTER }
  174. { when swapped another result register }
  175. if (nodetype=subn) and (nf_swaped in flags) then
  176. begin
  177. if extra_not then
  178. emit_reg(A_NOT,S_L,left.location.register);
  179. emit_reg_reg(op,opsize,left.location.register,right.location.register);
  180. { newly swapped also set swapped flag }
  181. location_swap(left.location,right.location);
  182. toggleflag(nf_swaped);
  183. end
  184. else
  185. begin
  186. if extra_not then
  187. emit_reg(A_NOT,S_L,right.location.register);
  188. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  189. location_swap(left.location,right.location);
  190. emit_reg_reg(op,opsize,right.location.register,left.location.register);
  191. end;
  192. end
  193. else
  194. begin
  195. { right.location is not a LOC_REGISTER }
  196. if (nodetype=subn) and (nf_swaped in flags) then
  197. begin
  198. if extra_not then
  199. emit_reg(A_NOT,opsize,left.location.register);
  200. r:=cg.getintregister(exprasmlist,OS_INT);
  201. cg.a_load_loc_reg(exprasmlist,OS_INT,right.location,r);
  202. emit_reg_reg(op,opsize,left.location.register,r);
  203. emit_reg_reg(A_MOV,opsize,r,left.location.register);
  204. cg.ungetregister(exprasmlist,r);
  205. end
  206. else
  207. begin
  208. { Optimizations when right.location is a constant value }
  209. if (op=A_CMP) and
  210. (nodetype in [equaln,unequaln]) and
  211. (right.location.loc=LOC_CONSTANT) and
  212. (right.location.value=0) then
  213. begin
  214. emit_reg_reg(A_TEST,opsize,left.location.register,left.location.register);
  215. end
  216. else
  217. if (op=A_ADD) and
  218. (right.location.loc=LOC_CONSTANT) and
  219. (right.location.value=1) and
  220. not(cs_check_overflow in aktlocalswitches) then
  221. begin
  222. emit_reg(A_INC,opsize,left.location.register);
  223. end
  224. else
  225. if (op=A_SUB) and
  226. (right.location.loc=LOC_CONSTANT) and
  227. (right.location.value=1) and
  228. not(cs_check_overflow in aktlocalswitches) then
  229. begin
  230. emit_reg(A_DEC,opsize,left.location.register);
  231. end
  232. else
  233. if (op=A_IMUL) and
  234. (right.location.loc=LOC_CONSTANT) and
  235. (ispowerof2(right.location.value,power)) and
  236. not(cs_check_overflow in aktlocalswitches) then
  237. begin
  238. emit_const_reg(A_SHL,opsize,power,left.location.register);
  239. end
  240. else
  241. begin
  242. if extra_not then
  243. begin
  244. r:=cg.getintregister(exprasmlist,OS_INT);
  245. cg.a_load_loc_reg(exprasmlist,OS_INT,right.location,r);
  246. emit_reg(A_NOT,S_L,r);
  247. emit_reg_reg(A_AND,S_L,r,left.location.register);
  248. cg.ungetregister(exprasmlist,r);
  249. end
  250. else
  251. begin
  252. emit_op_right_left(op,opsize);
  253. end;
  254. end;
  255. end;
  256. end;
  257. { only in case of overflow operations }
  258. { produce overflow code }
  259. { we must put it here directly, because sign of operation }
  260. { is in unsigned VAR!! }
  261. if mboverflow then
  262. begin
  263. if cs_check_overflow in aktlocalswitches then
  264. begin
  265. objectlibrary.getlabel(hl4);
  266. if unsigned then
  267. cg.a_jmp_flags(exprasmlist,F_AE,hl4)
  268. else
  269. cg.a_jmp_flags(exprasmlist,F_NO,hl4);
  270. cg.a_call_name(exprasmlist,'FPC_OVERFLOW');
  271. cg.a_label(exprasmlist,hl4);
  272. end;
  273. end;
  274. end;
  275. {*****************************************************************************
  276. Addstring
  277. *****************************************************************************}
  278. { note: if you implemented an fpc_shortstr_concat similar to the }
  279. { one in i386.inc, you have to override first_addstring like in }
  280. { ti386addnode.first_string and implement the shortstring concat }
  281. { manually! The generic routine is different from the i386 one (JM) }
  282. function ti386addnode.first_addstring : tnode;
  283. begin
  284. { special cases for shortstrings, handled in pass_2 (JM) }
  285. { can't handle fpc_shortstr_compare with compilerproc either because it }
  286. { returns its results in the flags instead of in eax }
  287. if (nodetype in [ltn,lten,gtn,gten,equaln,unequaln]) and
  288. is_shortstring(left.resulttype.def) and
  289. not(((left.nodetype=stringconstn) and (str_length(left)=0)) or
  290. ((right.nodetype=stringconstn) and (str_length(right)=0))) then
  291. begin
  292. expectloc:=LOC_FLAGS;
  293. calcregisters(self,0,0,0);
  294. result := nil;
  295. exit;
  296. end;
  297. { otherwise, use the generic code }
  298. result := inherited first_addstring;
  299. end;
  300. procedure ti386addnode.second_addstring;
  301. var
  302. paraloc1,
  303. paraloc2 : tparalocation;
  304. hregister1,
  305. hregister2 : tregister;
  306. begin
  307. { string operations are not commutative }
  308. if nf_swaped in flags then
  309. swapleftright;
  310. case tstringdef(left.resulttype.def).string_typ of
  311. st_shortstring:
  312. begin
  313. case nodetype of
  314. ltn,lten,gtn,gten,equaln,unequaln :
  315. begin
  316. paraloc1:=paramanager.getintparaloc(pocall_default,1);
  317. paraloc2:=paramanager.getintparaloc(pocall_default,2);
  318. { process parameters }
  319. secondpass(left);
  320. location_release(exprasmlist,left.location);
  321. if paraloc2.loc=LOC_REGISTER then
  322. begin
  323. hregister2:=cg.getaddressregister(exprasmlist);
  324. cg.a_loadaddr_ref_reg(exprasmlist,left.location.reference,hregister2);
  325. end
  326. else
  327. begin
  328. paramanager.allocparaloc(exprasmlist,paraloc2);
  329. cg.a_paramaddr_ref(exprasmlist,left.location.reference,paraloc2);
  330. end;
  331. secondpass(right);
  332. location_release(exprasmlist,right.location);
  333. if paraloc1.loc=LOC_REGISTER then
  334. begin
  335. hregister1:=cg.getaddressregister(exprasmlist);
  336. cg.a_loadaddr_ref_reg(exprasmlist,right.location.reference,hregister1);
  337. end
  338. else
  339. begin
  340. paramanager.allocparaloc(exprasmlist,paraloc1);
  341. cg.a_paramaddr_ref(exprasmlist,right.location.reference,paraloc1);
  342. end;
  343. { push parameters }
  344. if paraloc1.loc=LOC_REGISTER then
  345. begin
  346. cg.ungetregister(exprasmlist,hregister2);
  347. paramanager.allocparaloc(exprasmlist,paraloc2);
  348. cg.a_param_reg(exprasmlist,OS_ADDR,hregister2,paraloc2);
  349. end;
  350. if paraloc2.loc=LOC_REGISTER then
  351. begin
  352. cg.ungetregister(exprasmlist,hregister1);
  353. paramanager.allocparaloc(exprasmlist,paraloc1);
  354. cg.a_param_reg(exprasmlist,OS_ADDR,hregister1,paraloc1);
  355. end;
  356. paramanager.freeparaloc(exprasmlist,paraloc1);
  357. paramanager.freeparaloc(exprasmlist,paraloc2);
  358. cg.allocexplicitregisters(exprasmlist,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  359. cg.a_call_name(exprasmlist,'FPC_SHORTSTR_COMPARE');
  360. cg.deallocexplicitregisters(exprasmlist,R_INTREGISTER,paramanager.get_volatile_registers_int(pocall_default));
  361. location_freetemp(exprasmlist,left.location);
  362. location_freetemp(exprasmlist,right.location);
  363. end;
  364. end;
  365. set_result_location(true,true);
  366. end;
  367. else
  368. { rest should be handled in first pass (JM) }
  369. internalerror(200108303);
  370. end;
  371. end;
  372. {*****************************************************************************
  373. AddBoolean
  374. *****************************************************************************}
  375. procedure ti386addnode.second_addboolean;
  376. var
  377. op : TAsmOp;
  378. opsize : TOpsize;
  379. cmpop,
  380. isjump : boolean;
  381. otl,ofl : tasmlabel;
  382. begin
  383. { calculate the operator which is more difficult }
  384. firstcomplex(self);
  385. cmpop:=false;
  386. if (torddef(left.resulttype.def).typ=bool8bit) or
  387. (torddef(right.resulttype.def).typ=bool8bit) then
  388. opsize:=S_B
  389. else
  390. if (torddef(left.resulttype.def).typ=bool16bit) or
  391. (torddef(right.resulttype.def).typ=bool16bit) then
  392. opsize:=S_W
  393. else
  394. opsize:=S_L;
  395. if (cs_full_boolean_eval in aktlocalswitches) or
  396. (nodetype in [unequaln,ltn,lten,gtn,gten,equaln,xorn]) then
  397. begin
  398. if left.nodetype in [ordconstn,realconstn] then
  399. swapleftright;
  400. isjump:=(left.expectloc=LOC_JUMP);
  401. if isjump then
  402. begin
  403. otl:=truelabel;
  404. objectlibrary.getlabel(truelabel);
  405. ofl:=falselabel;
  406. objectlibrary.getlabel(falselabel);
  407. end;
  408. secondpass(left);
  409. if left.location.loc in [LOC_FLAGS,LOC_JUMP] then
  410. location_force_reg(exprasmlist,left.location,opsize_2_cgsize[opsize],false);
  411. if isjump then
  412. begin
  413. truelabel:=otl;
  414. falselabel:=ofl;
  415. end
  416. else if left.location.loc=LOC_JUMP then
  417. internalerror(200310081);
  418. isjump:=(right.expectloc=LOC_JUMP);
  419. if isjump then
  420. begin
  421. otl:=truelabel;
  422. objectlibrary.getlabel(truelabel);
  423. ofl:=falselabel;
  424. objectlibrary.getlabel(falselabel);
  425. end;
  426. secondpass(right);
  427. if right.location.loc in [LOC_FLAGS,LOC_JUMP] then
  428. location_force_reg(exprasmlist,right.location,opsize_2_cgsize[opsize],false);
  429. if isjump then
  430. begin
  431. truelabel:=otl;
  432. falselabel:=ofl;
  433. end
  434. else if left.location.loc=LOC_JUMP then
  435. internalerror(200310082);
  436. { left must be a register }
  437. left_must_be_reg(opsize,false);
  438. { compare the }
  439. case nodetype of
  440. ltn,lten,gtn,gten,
  441. equaln,unequaln :
  442. begin
  443. op:=A_CMP;
  444. cmpop:=true;
  445. end;
  446. xorn :
  447. op:=A_XOR;
  448. orn :
  449. op:=A_OR;
  450. andn :
  451. op:=A_AND;
  452. else
  453. internalerror(200203247);
  454. end;
  455. emit_op_right_left(op,opsize);
  456. location_freetemp(exprasmlist,right.location);
  457. location_release(exprasmlist,right.location);
  458. if cmpop then
  459. begin
  460. location_freetemp(exprasmlist,left.location);
  461. location_release(exprasmlist,left.location);
  462. end;
  463. set_result_location(cmpop,true);
  464. end
  465. else
  466. begin
  467. case nodetype of
  468. andn,
  469. orn :
  470. begin
  471. location_reset(location,LOC_JUMP,OS_NO);
  472. case nodetype of
  473. andn :
  474. begin
  475. otl:=truelabel;
  476. objectlibrary.getlabel(truelabel);
  477. secondpass(left);
  478. maketojumpbool(exprasmlist,left,lr_load_regvars);
  479. cg.a_label(exprasmlist,truelabel);
  480. truelabel:=otl;
  481. end;
  482. orn :
  483. begin
  484. ofl:=falselabel;
  485. objectlibrary.getlabel(falselabel);
  486. secondpass(left);
  487. maketojumpbool(exprasmlist,left,lr_load_regvars);
  488. cg.a_label(exprasmlist,falselabel);
  489. falselabel:=ofl;
  490. end;
  491. else
  492. internalerror(2003042212);
  493. end;
  494. secondpass(right);
  495. maketojumpbool(exprasmlist,right,lr_load_regvars);
  496. end;
  497. else
  498. internalerror(2003042213);
  499. end;
  500. end;
  501. end;
  502. {*****************************************************************************
  503. AddFloat
  504. *****************************************************************************}
  505. procedure ti386addnode.second_addfloat;
  506. var
  507. op : TAsmOp;
  508. resflags : tresflags;
  509. pushedfpu,
  510. cmpop : boolean;
  511. begin
  512. pass_left_and_right(pushedfpu);
  513. cmpop:=false;
  514. case nodetype of
  515. addn :
  516. op:=A_FADDP;
  517. muln :
  518. op:=A_FMULP;
  519. subn :
  520. op:=A_FSUBP;
  521. slashn :
  522. op:=A_FDIVP;
  523. ltn,lten,gtn,gten,
  524. equaln,unequaln :
  525. begin
  526. op:=A_FCOMPP;
  527. cmpop:=true;
  528. end;
  529. else
  530. internalerror(2003042214);
  531. end;
  532. if (right.location.loc<>LOC_FPUREGISTER) then
  533. begin
  534. cg.a_loadfpu_loc_reg(exprasmlist,right.location,NR_ST);
  535. if (right.location.loc <> LOC_CFPUREGISTER) and
  536. pushedfpu then
  537. location_freetemp(exprasmlist,left.location);
  538. if (left.location.loc<>LOC_FPUREGISTER) then
  539. begin
  540. cg.a_loadfpu_loc_reg(exprasmlist,left.location,NR_ST);
  541. if (left.location.loc <> LOC_CFPUREGISTER) and
  542. pushedfpu then
  543. location_freetemp(exprasmlist,left.location);
  544. end
  545. else
  546. begin
  547. { left was on the stack => swap }
  548. toggleflag(nf_swaped);
  549. end;
  550. { releases the right reference }
  551. location_release(exprasmlist,right.location);
  552. end
  553. { the nominator in st0 }
  554. else if (left.location.loc<>LOC_FPUREGISTER) then
  555. begin
  556. cg.a_loadfpu_loc_reg(exprasmlist,left.location,NR_ST);
  557. if (left.location.loc <> LOC_CFPUREGISTER) and
  558. pushedfpu then
  559. location_freetemp(exprasmlist,left.location);
  560. end
  561. else
  562. begin
  563. { fpu operands are always in the wrong order on the stack }
  564. toggleflag(nf_swaped);
  565. end;
  566. { releases the left reference }
  567. if (left.location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then
  568. location_release(exprasmlist,left.location);
  569. { if we swaped the tree nodes, then use the reverse operator }
  570. if nf_swaped in flags then
  571. begin
  572. if (nodetype=slashn) then
  573. op:=A_FDIVRP
  574. else if (nodetype=subn) then
  575. op:=A_FSUBRP;
  576. end;
  577. { to avoid the pentium bug
  578. if (op=FDIVP) and (opt_processors=pentium) then
  579. cg.a_call_name(exprasmlist,'EMUL_FDIVP')
  580. else
  581. }
  582. { the Intel assemblers want operands }
  583. if op<>A_FCOMPP then
  584. begin
  585. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  586. tcgx86(cg).dec_fpu_stack;
  587. end
  588. else
  589. begin
  590. emit_none(op,S_NO);
  591. tcgx86(cg).dec_fpu_stack;
  592. tcgx86(cg).dec_fpu_stack;
  593. end;
  594. { on comparison load flags }
  595. if cmpop then
  596. begin
  597. cg.getexplicitregister(exprasmlist,NR_AX);
  598. emit_reg(A_FNSTSW,S_NO,NR_AX);
  599. emit_none(A_SAHF,S_NO);
  600. cg.ungetregister(exprasmlist,NR_AX);
  601. if nf_swaped in flags then
  602. begin
  603. case nodetype of
  604. equaln : resflags:=F_E;
  605. unequaln : resflags:=F_NE;
  606. ltn : resflags:=F_A;
  607. lten : resflags:=F_AE;
  608. gtn : resflags:=F_B;
  609. gten : resflags:=F_BE;
  610. end;
  611. end
  612. else
  613. begin
  614. case nodetype of
  615. equaln : resflags:=F_E;
  616. unequaln : resflags:=F_NE;
  617. ltn : resflags:=F_B;
  618. lten : resflags:=F_BE;
  619. gtn : resflags:=F_A;
  620. gten : resflags:=F_AE;
  621. end;
  622. end;
  623. location_reset(location,LOC_FLAGS,OS_NO);
  624. location.resflags:=resflags;
  625. end
  626. else
  627. begin
  628. location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
  629. location.register:=NR_ST;
  630. end;
  631. end;
  632. {*****************************************************************************
  633. AddSmallSet
  634. *****************************************************************************}
  635. procedure ti386addnode.second_addsmallset;
  636. var
  637. opsize : TOpSize;
  638. op : TAsmOp;
  639. cmpop,
  640. pushedfpu,
  641. extra_not,
  642. noswap : boolean;
  643. begin
  644. pass_left_and_right(pushedfpu);
  645. { when a setdef is passed, it has to be a smallset }
  646. if ((left.resulttype.def.deftype=setdef) and
  647. (tsetdef(left.resulttype.def).settype<>smallset)) or
  648. ((right.resulttype.def.deftype=setdef) and
  649. (tsetdef(right.resulttype.def).settype<>smallset)) then
  650. internalerror(200203301);
  651. cmpop:=false;
  652. noswap:=false;
  653. extra_not:=false;
  654. opsize:=S_L;
  655. case nodetype of
  656. addn :
  657. begin
  658. { this is a really ugly hack!!!!!!!!!! }
  659. { this could be done later using EDI }
  660. { as it is done for subn }
  661. { instead of two registers!!!! }
  662. { adding elements is not commutative }
  663. if (nf_swaped in flags) and (left.nodetype=setelementn) then
  664. swapleftright;
  665. { are we adding set elements ? }
  666. if right.nodetype=setelementn then
  667. begin
  668. { no range support for smallsets! }
  669. if assigned(tsetelementnode(right).right) then
  670. internalerror(43244);
  671. { bts requires both elements to be registers }
  672. location_force_reg(exprasmlist,left.location,opsize_2_cgsize[opsize],false);
  673. location_force_reg(exprasmlist,right.location,opsize_2_cgsize[opsize],true);
  674. op:=A_BTS;
  675. noswap:=true;
  676. end
  677. else
  678. op:=A_OR;
  679. end;
  680. symdifn :
  681. op:=A_XOR;
  682. muln :
  683. op:=A_AND;
  684. subn :
  685. begin
  686. op:=A_AND;
  687. if (not(nf_swaped in flags)) and
  688. (right.location.loc=LOC_CONSTANT) then
  689. right.location.value := not(right.location.value)
  690. else if (nf_swaped in flags) and
  691. (left.location.loc=LOC_CONSTANT) then
  692. left.location.value := not(left.location.value)
  693. else
  694. extra_not:=true;
  695. end;
  696. equaln,
  697. unequaln :
  698. begin
  699. op:=A_CMP;
  700. cmpop:=true;
  701. end;
  702. lten,gten:
  703. begin
  704. if (not(nf_swaped in flags) and (nodetype = lten)) or
  705. ((nf_swaped in flags) and (nodetype = gten)) then
  706. swapleftright;
  707. location_force_reg(exprasmlist,left.location,opsize_2_cgsize[opsize],true);
  708. emit_op_right_left(A_AND,opsize);
  709. op:=A_CMP;
  710. cmpop:=true;
  711. { warning: ugly hack, we need a JE so change the node to equaln }
  712. nodetype:=equaln;
  713. end;
  714. xorn :
  715. op:=A_XOR;
  716. orn :
  717. op:=A_OR;
  718. andn :
  719. op:=A_AND;
  720. else
  721. internalerror(2003042215);
  722. end;
  723. { left must be a register }
  724. left_must_be_reg(opsize,noswap);
  725. emit_generic_code(op,opsize,true,extra_not,false);
  726. location_freetemp(exprasmlist,right.location);
  727. location_release(exprasmlist,right.location);
  728. if cmpop then
  729. begin
  730. location_freetemp(exprasmlist,left.location);
  731. location_release(exprasmlist,left.location);
  732. end;
  733. set_result_location(cmpop,true);
  734. end;
  735. {*****************************************************************************
  736. addmmxset
  737. *****************************************************************************}
  738. procedure ti386addnode.second_addmmxset;
  739. var opsize : TOpSize;
  740. op : TAsmOp;
  741. cmpop,
  742. pushedfpu,
  743. noswap : boolean;
  744. begin
  745. pass_left_and_right(pushedfpu);
  746. cmpop:=false;
  747. noswap:=false;
  748. opsize:=S_L;
  749. case nodetype of
  750. addn:
  751. begin
  752. { are we adding set elements ? }
  753. if right.nodetype=setelementn then
  754. begin
  755. { adding elements is not commutative }
  756. { if nf_swaped in flags then
  757. swapleftright;}
  758. { bts requires both elements to be registers }
  759. { location_force_reg(exprasmlist,left.location,opsize_2_cgsize[opsize],false);
  760. location_force_reg(exprasmlist,right.location,opsize_2_cgsize[opsize],true);
  761. op:=A_BTS;
  762. noswap:=true;}
  763. end
  764. else
  765. op:=A_POR;
  766. end;
  767. symdifn :
  768. op:=A_PXOR;
  769. muln:
  770. op:=A_PAND;
  771. subn:
  772. op:=A_PANDN;
  773. equaln,
  774. unequaln :
  775. begin
  776. op:=A_PCMPEQD;
  777. cmpop:=true;
  778. end;
  779. lten,gten:
  780. begin
  781. if (not(nf_swaped in flags) and (nodetype = lten)) or
  782. ((nf_swaped in flags) and (nodetype = gten)) then
  783. swapleftright;
  784. location_force_reg(exprasmlist,left.location,opsize_2_cgsize[opsize],true);
  785. emit_op_right_left(A_AND,opsize);
  786. op:=A_PCMPEQD;
  787. cmpop:=true;
  788. { warning: ugly hack, we need a JE so change the node to equaln }
  789. nodetype:=equaln;
  790. end;
  791. xorn :
  792. op:=A_PXOR;
  793. orn :
  794. op:=A_POR;
  795. andn :
  796. op:=A_PAND;
  797. else
  798. internalerror(2003042215);
  799. end;
  800. { left must be a register }
  801. left_must_be_reg(opsize,noswap);
  802. { emit_generic_code(op,opsize,true,extra_not,false);}
  803. location_freetemp(exprasmlist,right.location);
  804. location_release(exprasmlist,right.location);
  805. if cmpop then
  806. begin
  807. location_freetemp(exprasmlist,left.location);
  808. location_release(exprasmlist,left.location);
  809. end;
  810. set_result_location(cmpop,true);
  811. end;
  812. {*****************************************************************************
  813. Add64bit
  814. *****************************************************************************}
  815. procedure ti386addnode.second_add64bit;
  816. var
  817. op : TOpCG;
  818. op1,op2 : TAsmOp;
  819. opsize : TOpSize;
  820. hregister,
  821. hregister2 : tregister;
  822. href : treference;
  823. hl4 : tasmlabel;
  824. pushedfpu,
  825. mboverflow,
  826. cmpop,
  827. unsigned,delete:boolean;
  828. r:Tregister;
  829. procedure firstjmp64bitcmp;
  830. var
  831. oldnodetype : tnodetype;
  832. begin
  833. {$ifdef OLDREGVARS}
  834. load_all_regvars(exprasmlist);
  835. {$endif OLDREGVARS}
  836. { the jump the sequence is a little bit hairy }
  837. case nodetype of
  838. ltn,gtn:
  839. begin
  840. cg.a_jmp_flags(exprasmlist,getresflags(unsigned),truelabel);
  841. { cheat a little bit for the negative test }
  842. toggleflag(nf_swaped);
  843. cg.a_jmp_flags(exprasmlist,getresflags(unsigned),falselabel);
  844. toggleflag(nf_swaped);
  845. end;
  846. lten,gten:
  847. begin
  848. oldnodetype:=nodetype;
  849. if nodetype=lten then
  850. nodetype:=ltn
  851. else
  852. nodetype:=gtn;
  853. cg.a_jmp_flags(exprasmlist,getresflags(unsigned),truelabel);
  854. { cheat for the negative test }
  855. if nodetype=ltn then
  856. nodetype:=gtn
  857. else
  858. nodetype:=ltn;
  859. cg.a_jmp_flags(exprasmlist,getresflags(unsigned),falselabel);
  860. nodetype:=oldnodetype;
  861. end;
  862. equaln:
  863. cg.a_jmp_flags(exprasmlist,F_NE,falselabel);
  864. unequaln:
  865. cg.a_jmp_flags(exprasmlist,F_NE,truelabel);
  866. end;
  867. end;
  868. procedure secondjmp64bitcmp;
  869. begin
  870. { the jump the sequence is a little bit hairy }
  871. case nodetype of
  872. ltn,gtn,lten,gten:
  873. begin
  874. { the comparisaion of the low dword have to be }
  875. { always unsigned! }
  876. cg.a_jmp_flags(exprasmlist,getresflags(true),truelabel);
  877. cg.a_jmp_always(exprasmlist,falselabel);
  878. end;
  879. equaln:
  880. begin
  881. cg.a_jmp_flags(exprasmlist,F_NE,falselabel);
  882. cg.a_jmp_always(exprasmlist,truelabel);
  883. end;
  884. unequaln:
  885. begin
  886. cg.a_jmp_flags(exprasmlist,F_NE,truelabel);
  887. cg.a_jmp_always(exprasmlist,falselabel);
  888. end;
  889. end;
  890. end;
  891. begin
  892. firstcomplex(self);
  893. pass_left_and_right(pushedfpu);
  894. op1:=A_NONE;
  895. op2:=A_NONE;
  896. mboverflow:=false;
  897. cmpop:=false;
  898. opsize:=S_L;
  899. unsigned:=((left.resulttype.def.deftype=orddef) and
  900. (torddef(left.resulttype.def).typ=u64bit)) or
  901. ((right.resulttype.def.deftype=orddef) and
  902. (torddef(right.resulttype.def).typ=u64bit));
  903. case nodetype of
  904. addn :
  905. begin
  906. op:=OP_ADD;
  907. mboverflow:=true;
  908. end;
  909. subn :
  910. begin
  911. op:=OP_SUB;
  912. op1:=A_SUB;
  913. op2:=A_SBB;
  914. mboverflow:=true;
  915. end;
  916. ltn,lten,
  917. gtn,gten,
  918. equaln,unequaln:
  919. begin
  920. op:=OP_NONE;
  921. cmpop:=true;
  922. end;
  923. xorn:
  924. op:=OP_XOR;
  925. orn:
  926. op:=OP_OR;
  927. andn:
  928. op:=OP_AND;
  929. else
  930. begin
  931. { everything should be handled in pass_1 (JM) }
  932. internalerror(200109051);
  933. end;
  934. end;
  935. { left and right no register? }
  936. { then one must be demanded }
  937. if (left.location.loc<>LOC_REGISTER) then
  938. begin
  939. if (right.location.loc<>LOC_REGISTER) then
  940. begin
  941. { we can reuse a CREGISTER for comparison }
  942. if not((left.location.loc=LOC_CREGISTER) and cmpop) then
  943. begin
  944. delete:=left.location.loc<>LOC_CREGISTER;
  945. hregister:=cg.getintregister(exprasmlist,OS_INT);
  946. hregister2:=cg.getintregister(exprasmlist,OS_INT);
  947. cg64.a_load64_loc_reg(exprasmlist,left.location,joinreg64(hregister,hregister2),delete);
  948. location_reset(left.location,LOC_REGISTER,OS_64);
  949. left.location.registerlow:=hregister;
  950. left.location.registerhigh:=hregister2;
  951. end;
  952. end
  953. else
  954. begin
  955. location_swap(left.location,right.location);
  956. toggleflag(nf_swaped);
  957. end;
  958. end;
  959. { at this point, left.location.loc should be LOC_REGISTER }
  960. if right.location.loc=LOC_REGISTER then
  961. begin
  962. { when swapped another result register }
  963. if (nodetype=subn) and (nf_swaped in flags) then
  964. begin
  965. cg64.a_op64_reg_reg(exprasmlist,op,
  966. left.location.register64,
  967. right.location.register64);
  968. location_swap(left.location,right.location);
  969. toggleflag(nf_swaped);
  970. end
  971. else if cmpop then
  972. begin
  973. emit_reg_reg(A_CMP,S_L,right.location.registerhigh,left.location.registerhigh);
  974. firstjmp64bitcmp;
  975. emit_reg_reg(A_CMP,S_L,right.location.registerlow,left.location.registerlow);
  976. secondjmp64bitcmp;
  977. end
  978. else
  979. begin
  980. cg64.a_op64_reg_reg(exprasmlist,op,
  981. right.location.register64,
  982. left.location.register64);
  983. end;
  984. location_release(exprasmlist,right.location);
  985. end
  986. else
  987. begin
  988. { right.location<>LOC_REGISTER }
  989. if (nodetype=subn) and (nf_swaped in flags) then
  990. begin
  991. r:=cg.getintregister(exprasmlist,OS_INT);
  992. cg64.a_load64low_loc_reg(exprasmlist,right.location,r);
  993. emit_reg_reg(op1,opsize,left.location.registerlow,r);
  994. emit_reg_reg(A_MOV,opsize,r,left.location.registerlow);
  995. cg64.a_load64high_loc_reg(exprasmlist,right.location,r);
  996. { the carry flag is still ok }
  997. emit_reg_reg(op2,opsize,left.location.registerhigh,r);
  998. emit_reg_reg(A_MOV,opsize,r,left.location.registerhigh);
  999. cg.ungetregister(exprasmlist,r);
  1000. if right.location.loc<>LOC_CREGISTER then
  1001. begin
  1002. location_freetemp(exprasmlist,right.location);
  1003. location_release(exprasmlist,right.location);
  1004. end;
  1005. end
  1006. else if cmpop then
  1007. begin
  1008. case right.location.loc of
  1009. LOC_CREGISTER :
  1010. begin
  1011. emit_reg_reg(A_CMP,S_L,right.location.registerhigh,left.location.registerhigh);
  1012. firstjmp64bitcmp;
  1013. emit_reg_reg(A_CMP,S_L,right.location.registerlow,left.location.registerlow);
  1014. secondjmp64bitcmp;
  1015. end;
  1016. LOC_CREFERENCE,
  1017. LOC_REFERENCE :
  1018. begin
  1019. href:=right.location.reference;
  1020. inc(href.offset,4);
  1021. emit_ref_reg(A_CMP,S_L,href,left.location.registerhigh);
  1022. firstjmp64bitcmp;
  1023. emit_ref_reg(A_CMP,S_L,right.location.reference,left.location.registerlow);
  1024. secondjmp64bitcmp;
  1025. cg.a_jmp_always(exprasmlist,falselabel);
  1026. location_freetemp(exprasmlist,right.location);
  1027. location_release(exprasmlist,right.location);
  1028. end;
  1029. LOC_CONSTANT :
  1030. begin
  1031. exprasmlist.concat(taicpu.op_const_reg(A_CMP,S_L,hi(right.location.valueqword),left.location.registerhigh));
  1032. firstjmp64bitcmp;
  1033. exprasmlist.concat(taicpu.op_const_reg(A_CMP,S_L,lo(right.location.valueqword),left.location.registerlow));
  1034. secondjmp64bitcmp;
  1035. end;
  1036. else
  1037. internalerror(200203282);
  1038. end;
  1039. end
  1040. else
  1041. begin
  1042. cg64.a_op64_loc_reg(exprasmlist,op,right.location,
  1043. left.location.register64);
  1044. if (right.location.loc<>LOC_CREGISTER) then
  1045. begin
  1046. location_freetemp(exprasmlist,right.location);
  1047. location_release(exprasmlist,right.location);
  1048. end;
  1049. end;
  1050. end;
  1051. if (left.location.loc<>LOC_CREGISTER) and cmpop then
  1052. begin
  1053. location_freetemp(exprasmlist,left.location);
  1054. location_release(exprasmlist,left.location);
  1055. end;
  1056. { only in case of overflow operations }
  1057. { produce overflow code }
  1058. { we must put it here directly, because sign of operation }
  1059. { is in unsigned VAR!! }
  1060. if mboverflow then
  1061. begin
  1062. if cs_check_overflow in aktlocalswitches then
  1063. begin
  1064. objectlibrary.getlabel(hl4);
  1065. if unsigned then
  1066. cg.a_jmp_flags(exprasmlist,F_AE,hl4)
  1067. else
  1068. cg.a_jmp_flags(exprasmlist,F_NO,hl4);
  1069. cg.a_call_name(exprasmlist,'FPC_OVERFLOW');
  1070. cg.a_label(exprasmlist,hl4);
  1071. end;
  1072. end;
  1073. { we have LOC_JUMP as result }
  1074. if cmpop then
  1075. location_reset(location,LOC_JUMP,OS_NO)
  1076. else
  1077. location_copy(location,left.location);
  1078. end;
  1079. {*****************************************************************************
  1080. AddMMX
  1081. *****************************************************************************}
  1082. {$ifdef SUPPORT_MMX}
  1083. procedure ti386addnode.second_addmmx;
  1084. var
  1085. op : TAsmOp;
  1086. pushedfpu,
  1087. cmpop : boolean;
  1088. mmxbase : tmmxtype;
  1089. hreg,
  1090. hregister : tregister;
  1091. begin
  1092. pass_left_and_right(pushedfpu);
  1093. cmpop:=false;
  1094. mmxbase:=mmx_type(left.resulttype.def);
  1095. case nodetype of
  1096. addn :
  1097. begin
  1098. if (cs_mmx_saturation in aktlocalswitches) then
  1099. begin
  1100. case mmxbase of
  1101. mmxs8bit:
  1102. op:=A_PADDSB;
  1103. mmxu8bit:
  1104. op:=A_PADDUSB;
  1105. mmxs16bit,mmxfixed16:
  1106. op:=A_PADDSB;
  1107. mmxu16bit:
  1108. op:=A_PADDUSW;
  1109. end;
  1110. end
  1111. else
  1112. begin
  1113. case mmxbase of
  1114. mmxs8bit,mmxu8bit:
  1115. op:=A_PADDB;
  1116. mmxs16bit,mmxu16bit,mmxfixed16:
  1117. op:=A_PADDW;
  1118. mmxs32bit,mmxu32bit:
  1119. op:=A_PADDD;
  1120. end;
  1121. end;
  1122. end;
  1123. muln :
  1124. begin
  1125. case mmxbase of
  1126. mmxs16bit,mmxu16bit:
  1127. op:=A_PMULLW;
  1128. mmxfixed16:
  1129. op:=A_PMULHW;
  1130. end;
  1131. end;
  1132. subn :
  1133. begin
  1134. if (cs_mmx_saturation in aktlocalswitches) then
  1135. begin
  1136. case mmxbase of
  1137. mmxs8bit:
  1138. op:=A_PSUBSB;
  1139. mmxu8bit:
  1140. op:=A_PSUBUSB;
  1141. mmxs16bit,mmxfixed16:
  1142. op:=A_PSUBSB;
  1143. mmxu16bit:
  1144. op:=A_PSUBUSW;
  1145. end;
  1146. end
  1147. else
  1148. begin
  1149. case mmxbase of
  1150. mmxs8bit,mmxu8bit:
  1151. op:=A_PSUBB;
  1152. mmxs16bit,mmxu16bit,mmxfixed16:
  1153. op:=A_PSUBW;
  1154. mmxs32bit,mmxu32bit:
  1155. op:=A_PSUBD;
  1156. end;
  1157. end;
  1158. end;
  1159. xorn:
  1160. op:=A_PXOR;
  1161. orn:
  1162. op:=A_POR;
  1163. andn:
  1164. op:=A_PAND;
  1165. else
  1166. internalerror(2003042214);
  1167. end;
  1168. { left and right no register? }
  1169. { then one must be demanded }
  1170. if (left.location.loc<>LOC_MMXREGISTER) then
  1171. begin
  1172. if (right.location.loc=LOC_MMXREGISTER) then
  1173. begin
  1174. location_swap(left.location,right.location);
  1175. toggleflag(nf_swaped);
  1176. end
  1177. else
  1178. begin
  1179. { register variable ? }
  1180. if (left.location.loc=LOC_CMMXREGISTER) then
  1181. begin
  1182. hregister:=cg.getmmxregister(exprasmlist,OS_M64);
  1183. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  1184. end
  1185. else
  1186. begin
  1187. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  1188. internalerror(200203245);
  1189. location_release(exprasmlist,left.location);
  1190. hregister:=cg.getmmxregister(exprasmlist,OS_M64);
  1191. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  1192. end;
  1193. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  1194. left.location.register:=hregister;
  1195. end;
  1196. end;
  1197. { at this point, left.location.loc should be LOC_MMXREGISTER }
  1198. if right.location.loc<>LOC_MMXREGISTER then
  1199. begin
  1200. if (nodetype=subn) and (nf_swaped in flags) then
  1201. begin
  1202. if right.location.loc=LOC_CMMXREGISTER then
  1203. begin
  1204. hreg:=cg.getmmxregister(exprasmlist,OS_M64);
  1205. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  1206. emit_reg_reg(op,S_NO,left.location.register,hreg);
  1207. cg.ungetregister(exprasmlist,hreg);
  1208. emit_reg_reg(A_MOVQ,S_NO,hreg,left.location.register);
  1209. end
  1210. else
  1211. begin
  1212. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  1213. internalerror(200203247);
  1214. location_release(exprasmlist,right.location);
  1215. hreg:=cg.getmmxregister(exprasmlist,OS_M64);
  1216. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  1217. emit_reg_reg(op,S_NO,left.location.register,hreg);
  1218. cg.ungetregister(exprasmlist,hreg);
  1219. emit_reg_reg(A_MOVQ,S_NO,hreg,left.location.register);
  1220. end;
  1221. end
  1222. else
  1223. begin
  1224. if (right.location.loc=LOC_CMMXREGISTER) then
  1225. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  1226. else
  1227. begin
  1228. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  1229. internalerror(200203246);
  1230. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  1231. location_release(exprasmlist,right.location);
  1232. end;
  1233. end;
  1234. end
  1235. else
  1236. begin
  1237. { right.location=LOC_MMXREGISTER }
  1238. if (nodetype=subn) and (nf_swaped in flags) then
  1239. begin
  1240. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  1241. location_swap(left.location,right.location);
  1242. toggleflag(nf_swaped);
  1243. end
  1244. else
  1245. begin
  1246. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  1247. end;
  1248. end;
  1249. location_freetemp(exprasmlist,right.location);
  1250. location_release(exprasmlist,right.location);
  1251. if cmpop then
  1252. begin
  1253. location_freetemp(exprasmlist,left.location);
  1254. location_release(exprasmlist,left.location);
  1255. end;
  1256. set_result_location(cmpop,true);
  1257. end;
  1258. {$endif SUPPORT_MMX}
  1259. {*****************************************************************************
  1260. MUL
  1261. *****************************************************************************}
  1262. procedure ti386addnode.second_mul;
  1263. var r:Tregister;
  1264. begin
  1265. {The location.register will be filled in later (JM)}
  1266. location_reset(location,LOC_REGISTER,OS_INT);
  1267. {Get a temp register and load the left value into it
  1268. and free the location.}
  1269. r:=cg.getintregister(exprasmlist,OS_INT);
  1270. cg.a_load_loc_reg(exprasmlist,OS_INT,left.location,r);
  1271. location_release(exprasmlist,left.location);
  1272. {Allocate EAX.}
  1273. cg.getexplicitregister(exprasmlist,NR_EAX);
  1274. {Load the right value.}
  1275. cg.a_load_loc_reg(exprasmlist,OS_INT,right.location,NR_EAX);
  1276. location_release(exprasmlist,right.location);
  1277. {The mul instruction frees register r.}
  1278. cg.ungetregister(exprasmlist,r);
  1279. {Also allocate EDX, since it is also modified by a mul (JM).}
  1280. cg.getexplicitregister(exprasmlist,NR_EDX);
  1281. emit_reg(A_MUL,S_L,r);
  1282. {Free EDX}
  1283. cg.ungetregister(exprasmlist,NR_EDX);
  1284. {Free EAX}
  1285. cg.ungetregister(exprasmlist,NR_EAX);
  1286. {Allocate a new register and store the result in EAX in it.}
  1287. location.register:=cg.getintregister(exprasmlist,OS_INT);
  1288. emit_reg_reg(A_MOV,S_L,NR_EAX,location.register);
  1289. location_freetemp(exprasmlist,left.location);
  1290. location_freetemp(exprasmlist,right.location);
  1291. end;
  1292. {*****************************************************************************
  1293. pass_2
  1294. *****************************************************************************}
  1295. procedure ti386addnode.pass_2;
  1296. { is also being used for xor, and "mul", "sub, or and comparative }
  1297. { operators }
  1298. var
  1299. pushedfpu,
  1300. mboverflow,cmpop : boolean;
  1301. op : tasmop;
  1302. opsize : topsize;
  1303. { true, if unsigned types are compared }
  1304. unsigned : boolean;
  1305. { is_in_dest if the result is put directly into }
  1306. { the resulting refernce or varregister }
  1307. {is_in_dest : boolean;}
  1308. { true, if for sets subtractions the extra not should generated }
  1309. extra_not : boolean;
  1310. begin
  1311. { to make it more readable, string and set have their own procedures }
  1312. case left.resulttype.def.deftype of
  1313. orddef :
  1314. begin
  1315. { handling boolean expressions }
  1316. if is_boolean(left.resulttype.def) and
  1317. is_boolean(right.resulttype.def) then
  1318. begin
  1319. second_addboolean;
  1320. exit;
  1321. end
  1322. { 64bit operations }
  1323. else if is_64bit(left.resulttype.def) then
  1324. begin
  1325. second_add64bit;
  1326. exit;
  1327. end;
  1328. end;
  1329. stringdef :
  1330. begin
  1331. second_addstring;
  1332. exit;
  1333. end;
  1334. setdef :
  1335. begin
  1336. {Normalsets are already handled in pass1 if mmx
  1337. should not be used.}
  1338. if (tsetdef(left.resulttype.def).settype<>smallset) then
  1339. begin
  1340. if cs_mmx in aktlocalswitches then
  1341. second_addmmxset
  1342. else
  1343. internalerror(200109041);
  1344. end
  1345. else
  1346. second_addsmallset;
  1347. exit;
  1348. end;
  1349. arraydef :
  1350. begin
  1351. {$ifdef SUPPORT_MMX}
  1352. if is_mmx_able_array(left.resulttype.def) then
  1353. begin
  1354. second_addmmx;
  1355. exit;
  1356. end;
  1357. {$endif SUPPORT_MMX}
  1358. end;
  1359. floatdef :
  1360. begin
  1361. second_addfloat;
  1362. exit;
  1363. end;
  1364. end;
  1365. { defaults }
  1366. {is_in_dest:=false;}
  1367. extra_not:=false;
  1368. mboverflow:=false;
  1369. cmpop:=false;
  1370. unsigned:=not(is_signed(left.resulttype.def)) or
  1371. not(is_signed(right.resulttype.def));
  1372. opsize:=def_opsize(left.resulttype.def);
  1373. pass_left_and_right(pushedfpu);
  1374. if (left.resulttype.def.deftype=pointerdef) or
  1375. (right.resulttype.def.deftype=pointerdef) or
  1376. (is_class_or_interface(right.resulttype.def) and is_class_or_interface(left.resulttype.def)) or
  1377. (left.resulttype.def.deftype=classrefdef) or
  1378. (left.resulttype.def.deftype=procvardef) or
  1379. ((left.resulttype.def.deftype=enumdef) and
  1380. (left.resulttype.def.size=4)) or
  1381. ((left.resulttype.def.deftype=orddef) and
  1382. (torddef(left.resulttype.def).typ in [s32bit,u32bit])) or
  1383. ((right.resulttype.def.deftype=orddef) and
  1384. (torddef(right.resulttype.def).typ in [s32bit,u32bit])) then
  1385. begin
  1386. case nodetype of
  1387. addn :
  1388. begin
  1389. op:=A_ADD;
  1390. mboverflow:=true;
  1391. end;
  1392. muln :
  1393. begin
  1394. if unsigned then
  1395. op:=A_MUL
  1396. else
  1397. op:=A_IMUL;
  1398. mboverflow:=true;
  1399. end;
  1400. subn :
  1401. begin
  1402. op:=A_SUB;
  1403. mboverflow:=true;
  1404. end;
  1405. ltn,lten,
  1406. gtn,gten,
  1407. equaln,unequaln :
  1408. begin
  1409. op:=A_CMP;
  1410. cmpop:=true;
  1411. end;
  1412. xorn :
  1413. op:=A_XOR;
  1414. orn :
  1415. op:=A_OR;
  1416. andn :
  1417. op:=A_AND;
  1418. else
  1419. internalerror(200304229);
  1420. end;
  1421. { filter MUL, which requires special handling }
  1422. if op=A_MUL then
  1423. begin
  1424. second_mul;
  1425. exit;
  1426. end;
  1427. { Convert flags to register first }
  1428. if (left.location.loc=LOC_FLAGS) then
  1429. location_force_reg(exprasmlist,left.location,opsize_2_cgsize[opsize],false);
  1430. if (right.location.loc=LOC_FLAGS) then
  1431. location_force_reg(exprasmlist,right.location,opsize_2_cgsize[opsize],false);
  1432. left_must_be_reg(opsize,false);
  1433. emit_generic_code(op,opsize,unsigned,extra_not,mboverflow);
  1434. location_freetemp(exprasmlist,right.location);
  1435. location_release(exprasmlist,right.location);
  1436. if cmpop and
  1437. (left.location.loc<>LOC_CREGISTER) then
  1438. begin
  1439. location_freetemp(exprasmlist,left.location);
  1440. location_release(exprasmlist,left.location);
  1441. end;
  1442. set_result_location(cmpop,unsigned);
  1443. end
  1444. { 8/16 bit enum,char,wchar types }
  1445. else
  1446. if ((left.resulttype.def.deftype=orddef) and
  1447. (torddef(left.resulttype.def).typ in [uchar,uwidechar])) or
  1448. ((left.resulttype.def.deftype=enumdef) and
  1449. ((left.resulttype.def.size=1) or
  1450. (left.resulttype.def.size=2))) then
  1451. begin
  1452. case nodetype of
  1453. ltn,lten,gtn,gten,
  1454. equaln,unequaln :
  1455. cmpop:=true;
  1456. else
  1457. internalerror(2003042210);
  1458. end;
  1459. left_must_be_reg(opsize,false);
  1460. emit_op_right_left(A_CMP,opsize);
  1461. location_freetemp(exprasmlist,right.location);
  1462. location_release(exprasmlist,right.location);
  1463. if left.location.loc<>LOC_CREGISTER then
  1464. begin
  1465. location_freetemp(exprasmlist,left.location);
  1466. location_release(exprasmlist,left.location);
  1467. end;
  1468. set_result_location(true,true);
  1469. end
  1470. else
  1471. internalerror(2003042211);
  1472. end;
  1473. begin
  1474. caddnode:=ti386addnode;
  1475. end.
  1476. {
  1477. $Log$
  1478. Revision 1.89 2003-12-21 11:28:41 daniel
  1479. * Some work to allow mmx instructions to be used for 32 byte sets
  1480. Revision 1.88 2003/12/06 01:15:23 florian
  1481. * reverted Peter's alloctemp patch; hopefully properly
  1482. Revision 1.87 2003/12/03 23:13:20 peter
  1483. * delayed paraloc allocation, a_param_*() gets extra parameter
  1484. if it needs to allocate temp or real paralocation
  1485. * optimized/simplified int-real loading
  1486. Revision 1.86 2003/10/17 14:38:32 peter
  1487. * 64k registers supported
  1488. * fixed some memory leaks
  1489. Revision 1.85 2003/10/13 09:38:22 florian
  1490. * fixed forgotten commit
  1491. Revision 1.84 2003/10/13 01:58:03 florian
  1492. * some ideas for mm support implemented
  1493. Revision 1.83 2003/10/10 17:48:14 peter
  1494. * old trgobj moved to x86/rgcpu and renamed to trgx86fpu
  1495. * tregisteralloctor renamed to trgobj
  1496. * removed rgobj from a lot of units
  1497. * moved location_* and reference_* to cgobj
  1498. * first things for mmx register allocation
  1499. Revision 1.82 2003/10/09 21:31:37 daniel
  1500. * Register allocator splitted, ans abstract now
  1501. Revision 1.81 2003/10/08 09:13:16 florian
  1502. * fixed full bool evalution and bool xor, if the left or right side have LOC_JUMP
  1503. Revision 1.80 2003/10/01 20:34:49 peter
  1504. * procinfo unit contains tprocinfo
  1505. * cginfo renamed to cgbase
  1506. * moved cgmessage to verbose
  1507. * fixed ppc and sparc compiles
  1508. Revision 1.79 2003/09/28 21:48:20 peter
  1509. * fix register leaks
  1510. Revision 1.78 2003/09/28 13:35:40 peter
  1511. * shortstr compare updated for different calling conventions
  1512. Revision 1.77 2003/09/10 08:31:48 marco
  1513. * Patch from Peter for paraloc
  1514. Revision 1.76 2003/09/03 15:55:01 peter
  1515. * NEWRA branch merged
  1516. Revision 1.75.2.2 2003/08/31 13:50:16 daniel
  1517. * Remove sorting and use pregenerated indexes
  1518. * Some work on making things compile
  1519. Revision 1.75.2.1 2003/08/29 17:29:00 peter
  1520. * next batch of updates
  1521. Revision 1.75 2003/08/03 20:38:00 daniel
  1522. * Made code generator reverse or/add/and/xor/imul instructions when
  1523. possible to reduce the slowdown of spills.
  1524. Revision 1.74 2003/08/03 20:19:43 daniel
  1525. - Removed cmpop from Ti386addnode.second_addstring
  1526. Revision 1.73 2003/07/06 15:31:21 daniel
  1527. * Fixed register allocator. *Lots* of fixes.
  1528. Revision 1.72 2003/06/17 16:51:30 peter
  1529. * cycle fixes
  1530. Revision 1.71 2003/06/07 18:57:04 jonas
  1531. + added freeintparaloc
  1532. * ppc get/freeintparaloc now check whether the parameter regs are
  1533. properly allocated/deallocated (and get an extra list para)
  1534. * ppc a_call_* now internalerrors if pi_do_call is not yet set
  1535. * fixed lot of missing pi_do_call's
  1536. Revision 1.70 2003/06/03 13:01:59 daniel
  1537. * Register allocator finished
  1538. Revision 1.69 2003/05/30 23:49:18 jonas
  1539. * a_load_loc_reg now has an extra size parameter for the destination
  1540. register (properly fixes what I worked around in revision 1.106 of
  1541. ncgutil.pas)
  1542. Revision 1.68 2003/05/26 19:38:28 peter
  1543. * generic fpc_shorstr_concat
  1544. + fpc_shortstr_append_shortstr optimization
  1545. Revision 1.67 2003/05/22 21:32:29 peter
  1546. * removed some unit dependencies
  1547. Revision 1.66 2003/04/26 09:12:55 peter
  1548. * add string returns in LOC_REFERENCE
  1549. Revision 1.65 2003/04/23 20:16:04 peter
  1550. + added currency support based on int64
  1551. + is_64bit for use in cg units instead of is_64bitint
  1552. * removed cgmessage from n386add, replace with internalerrors
  1553. Revision 1.64 2003/04/23 09:51:16 daniel
  1554. * Removed usage of edi in a lot of places when new register allocator used
  1555. + Added newra versions of g_concatcopy and secondadd_float
  1556. Revision 1.63 2003/04/22 23:50:23 peter
  1557. * firstpass uses expectloc
  1558. * checks if there are differences between the expectloc and
  1559. location.loc from secondpass in EXTDEBUG
  1560. Revision 1.62 2003/04/22 10:09:35 daniel
  1561. + Implemented the actual register allocator
  1562. + Scratch registers unavailable when new register allocator used
  1563. + maybe_save/maybe_restore unavailable when new register allocator used
  1564. Revision 1.61 2003/04/17 10:02:48 daniel
  1565. * Tweaked register allocate/deallocate positition to less interferences
  1566. are generated.
  1567. Revision 1.60 2003/03/28 19:16:57 peter
  1568. * generic constructor working for i386
  1569. * remove fixed self register
  1570. * esi added as address register for i386
  1571. Revision 1.59 2003/03/13 19:52:23 jonas
  1572. * and more new register allocator fixes (in the i386 code generator this
  1573. time). At least now the ppc cross compiler can compile the linux
  1574. system unit again, but I haven't tested it.
  1575. Revision 1.58 2003/03/08 20:36:41 daniel
  1576. + Added newra version of Ti386shlshrnode
  1577. + Added interference graph construction code
  1578. Revision 1.57 2003/03/08 13:59:17 daniel
  1579. * Work to handle new register notation in ag386nsm
  1580. + Added newra version of Ti386moddivnode
  1581. Revision 1.56 2003/03/08 10:53:48 daniel
  1582. * Created newra version of secondmul in n386add.pas
  1583. Revision 1.55 2003/02/19 22:00:15 daniel
  1584. * Code generator converted to new register notation
  1585. - Horribily outdated todo.txt removed
  1586. Revision 1.54 2003/01/13 18:37:44 daniel
  1587. * Work on register conversion
  1588. Revision 1.53 2003/01/08 18:43:57 daniel
  1589. * Tregister changed into a record
  1590. Revision 1.52 2002/11/25 17:43:26 peter
  1591. * splitted defbase in defutil,symutil,defcmp
  1592. * merged isconvertable and is_equal into compare_defs(_ext)
  1593. * made operator search faster by walking the list only once
  1594. Revision 1.51 2002/11/15 01:58:56 peter
  1595. * merged changes from 1.0.7 up to 04-11
  1596. - -V option for generating bug report tracing
  1597. - more tracing for option parsing
  1598. - errors for cdecl and high()
  1599. - win32 import stabs
  1600. - win32 records<=8 are returned in eax:edx (turned off by default)
  1601. - heaptrc update
  1602. - more info for temp management in .s file with EXTDEBUG
  1603. Revision 1.50 2002/10/20 13:11:27 jonas
  1604. * re-enabled optimized version of comparisons with the empty string that
  1605. I accidentally disabled in revision 1.26
  1606. Revision 1.49 2002/08/23 16:14:49 peter
  1607. * tempgen cleanup
  1608. * tt_noreuse temp type added that will be used in genentrycode
  1609. Revision 1.48 2002/08/14 18:41:48 jonas
  1610. - remove valuelow/valuehigh fields from tlocation, because they depend
  1611. on the endianess of the host operating system -> difficult to get
  1612. right. Use lo/hi(location.valueqword) instead (remember to use
  1613. valueqword and not value!!)
  1614. Revision 1.47 2002/08/11 14:32:29 peter
  1615. * renamed current_library to objectlibrary
  1616. Revision 1.46 2002/08/11 13:24:16 peter
  1617. * saving of asmsymbols in ppu supported
  1618. * asmsymbollist global is removed and moved into a new class
  1619. tasmlibrarydata that will hold the info of a .a file which
  1620. corresponds with a single module. Added librarydata to tmodule
  1621. to keep the library info stored for the module. In the future the
  1622. objectfiles will also be stored to the tasmlibrarydata class
  1623. * all getlabel/newasmsymbol and friends are moved to the new class
  1624. Revision 1.45 2002/07/26 11:17:52 jonas
  1625. * the optimization of converting a multiplication with a power of two to
  1626. a shl is moved from n386add/secondpass to nadd/resulttypepass
  1627. Revision 1.44 2002/07/20 11:58:00 florian
  1628. * types.pas renamed to defbase.pas because D6 contains a types
  1629. unit so this would conflicts if D6 programms are compiled
  1630. + Willamette/SSE2 instructions to assembler added
  1631. Revision 1.43 2002/07/11 14:41:32 florian
  1632. * start of the new generic parameter handling
  1633. Revision 1.42 2002/07/07 09:52:33 florian
  1634. * powerpc target fixed, very simple units can be compiled
  1635. * some basic stuff for better callparanode handling, far from being finished
  1636. Revision 1.41 2002/07/01 18:46:31 peter
  1637. * internal linker
  1638. * reorganized aasm layer
  1639. Revision 1.40 2002/07/01 16:23:55 peter
  1640. * cg64 patch
  1641. * basics for currency
  1642. * asnode updates for class and interface (not finished)
  1643. Revision 1.39 2002/05/18 13:34:22 peter
  1644. * readded missing revisions
  1645. Revision 1.38 2002/05/16 19:46:51 carl
  1646. + defines.inc -> fpcdefs.inc to avoid conflicts if compiling by hand
  1647. + try to fix temp allocation (still in ifdef)
  1648. + generic constructor calls
  1649. + start of tassembler / tmodulebase class cleanup
  1650. Revision 1.36 2002/05/13 19:54:37 peter
  1651. * removed n386ld and n386util units
  1652. * maybe_save/maybe_restore added instead of the old maybe_push
  1653. Revision 1.35 2002/05/12 16:53:17 peter
  1654. * moved entry and exitcode to ncgutil and cgobj
  1655. * foreach gets extra argument for passing local data to the
  1656. iterator function
  1657. * -CR checks also class typecasts at runtime by changing them
  1658. into as
  1659. * fixed compiler to cycle with the -CR option
  1660. * fixed stabs with elf writer, finally the global variables can
  1661. be watched
  1662. * removed a lot of routines from cga unit and replaced them by
  1663. calls to cgobj
  1664. * u32bit-s32bit updates for and,or,xor nodes. When one element is
  1665. u32bit then the other is typecasted also to u32bit without giving
  1666. a rangecheck warning/error.
  1667. * fixed pascal calling method with reversing also the high tree in
  1668. the parast, detected by tcalcst3 test
  1669. Revision 1.34 2002/04/25 20:16:40 peter
  1670. * moved more routines from cga/n386util
  1671. Revision 1.33 2002/04/05 15:09:13 jonas
  1672. * fixed web bug 1915
  1673. Revision 1.32 2002/04/04 19:06:10 peter
  1674. * removed unused units
  1675. * use tlocation.size in cg.a_*loc*() routines
  1676. Revision 1.31 2002/04/02 17:11:35 peter
  1677. * tlocation,treference update
  1678. * LOC_CONSTANT added for better constant handling
  1679. * secondadd splitted in multiple routines
  1680. * location_force_reg added for loading a location to a register
  1681. of a specified size
  1682. * secondassignment parses now first the right and then the left node
  1683. (this is compatible with Kylix). This saves a lot of push/pop especially
  1684. with string operations
  1685. * adapted some routines to use the new cg methods
  1686. Revision 1.29 2002/03/04 19:10:13 peter
  1687. * removed compiler warnings
  1688. }