n386add.pas 65 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841
  1. {
  2. $Id$
  3. Copyright (c) 2000-2002 by Florian Klaempfl
  4. Code generation for add nodes on the i386
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit n386add;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. node,nadd,cpubase,cginfo;
  23. type
  24. ti386addnode = class(taddnode)
  25. procedure pass_2;override;
  26. protected
  27. function first_addstring : tnode; override;
  28. private
  29. procedure pass_left_and_right(var pushedfpu:boolean);
  30. function getresflags(unsigned : boolean) : tresflags;
  31. procedure left_must_be_reg(opsize:TOpSize;noswap:boolean);
  32. procedure emit_op_right_left(op:TAsmOp;opsize:TOpSize);
  33. procedure emit_generic_code(op:TAsmOp;opsize:TOpSize;unsigned,extra_not,mboverflow:boolean);
  34. procedure set_result_location(cmpop,unsigned:boolean);
  35. procedure second_addstring;
  36. procedure second_addboolean;
  37. procedure second_addfloat;
  38. procedure second_addsmallset;
  39. procedure second_mul;
  40. {$ifdef SUPPORT_MMX}
  41. procedure second_addmmx;
  42. {$endif SUPPORT_MMX}
  43. procedure second_add64bit;
  44. end;
  45. implementation
  46. uses
  47. globtype,systems,
  48. cutils,verbose,globals,
  49. symconst,symdef,paramgr,
  50. aasmbase,aasmtai,aasmcpu,defutil,htypechk,
  51. cgbase,pass_2,regvars,
  52. cpupara,
  53. ncon,nset,
  54. cga,ncgutil,tgobj,rgobj,rgcpu,cgobj,cg64f32;
  55. {*****************************************************************************
  56. Helpers
  57. *****************************************************************************}
  58. const
  59. opsize_2_cgsize : array[S_B..S_L] of tcgsize = (OS_8,OS_16,OS_32);
  60. procedure ti386addnode.pass_left_and_right(var pushedfpu:boolean);
  61. var
  62. pushedregs : tmaybesave;
  63. begin
  64. { calculate the operator which is more difficult }
  65. firstcomplex(self);
  66. { in case of constant put it to the left }
  67. if (left.nodetype=ordconstn) then
  68. swapleftright;
  69. secondpass(left);
  70. { are too few registers free? }
  71. {$ifndef newra}
  72. maybe_save(exprasmlist,right.registers32,left.location,pushedregs);
  73. {$endif newra}
  74. if location.loc=LOC_FPUREGISTER then
  75. pushedfpu:=maybe_pushfpu(exprasmlist,right.registersfpu,left.location)
  76. else
  77. pushedfpu:=false;
  78. secondpass(right);
  79. {$ifndef newra}
  80. maybe_restore(exprasmlist,left.location,pushedregs);
  81. {$endif}
  82. end;
  83. function ti386addnode.getresflags(unsigned : boolean) : tresflags;
  84. begin
  85. case nodetype of
  86. equaln : getresflags:=F_E;
  87. unequaln : getresflags:=F_NE;
  88. else
  89. if not(unsigned) then
  90. begin
  91. if nf_swaped in flags then
  92. case nodetype of
  93. ltn : getresflags:=F_G;
  94. lten : getresflags:=F_GE;
  95. gtn : getresflags:=F_L;
  96. gten : getresflags:=F_LE;
  97. end
  98. else
  99. case nodetype of
  100. ltn : getresflags:=F_L;
  101. lten : getresflags:=F_LE;
  102. gtn : getresflags:=F_G;
  103. gten : getresflags:=F_GE;
  104. end;
  105. end
  106. else
  107. begin
  108. if nf_swaped in flags then
  109. case nodetype of
  110. ltn : getresflags:=F_A;
  111. lten : getresflags:=F_AE;
  112. gtn : getresflags:=F_B;
  113. gten : getresflags:=F_BE;
  114. end
  115. else
  116. case nodetype of
  117. ltn : getresflags:=F_B;
  118. lten : getresflags:=F_BE;
  119. gtn : getresflags:=F_A;
  120. gten : getresflags:=F_AE;
  121. end;
  122. end;
  123. end;
  124. end;
  125. procedure ti386addnode.left_must_be_reg(opsize:TOpSize;noswap:boolean);
  126. begin
  127. { left location is not a register? }
  128. if (left.location.loc<>LOC_REGISTER) then
  129. begin
  130. { if right is register then we can swap the locations }
  131. if (not noswap) and
  132. (right.location.loc=LOC_REGISTER) then
  133. begin
  134. location_swap(left.location,right.location);
  135. toggleflag(nf_swaped);
  136. end
  137. else
  138. begin
  139. { maybe we can reuse a constant register when the
  140. operation is a comparison that doesn't change the
  141. value of the register }
  142. location_force_reg(exprasmlist,left.location,opsize_2_cgsize[opsize],(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  143. end;
  144. end;
  145. end;
  146. procedure ti386addnode.emit_op_right_left(op:TAsmOp;opsize:TOpsize);
  147. begin
  148. { left must be a register }
  149. case right.location.loc of
  150. LOC_REGISTER,
  151. LOC_CREGISTER :
  152. exprasmlist.concat(taicpu.op_reg_reg(op,opsize,right.location.register,left.location.register));
  153. LOC_REFERENCE,
  154. LOC_CREFERENCE :
  155. exprasmlist.concat(taicpu.op_ref_reg(op,opsize,right.location.reference,left.location.register));
  156. LOC_CONSTANT :
  157. exprasmlist.concat(taicpu.op_const_reg(op,opsize,right.location.value,left.location.register));
  158. else
  159. internalerror(200203232);
  160. end;
  161. end;
  162. procedure ti386addnode.set_result_location(cmpop,unsigned:boolean);
  163. begin
  164. if cmpop then
  165. begin
  166. location_reset(location,LOC_FLAGS,OS_NO);
  167. location.resflags:=getresflags(unsigned);
  168. end
  169. else
  170. location_copy(location,left.location);
  171. end;
  172. procedure ti386addnode.emit_generic_code(op:TAsmOp;opsize:TOpSize;unsigned,extra_not,mboverflow:boolean);
  173. var
  174. power : longint;
  175. hl4 : tasmlabel;
  176. r : Tregister;
  177. begin
  178. { at this point, left.location.loc should be LOC_REGISTER }
  179. if right.location.loc=LOC_REGISTER then
  180. begin
  181. { right.location is a LOC_REGISTER }
  182. { when swapped another result register }
  183. if (nodetype=subn) and (nf_swaped in flags) then
  184. begin
  185. if extra_not then
  186. emit_reg(A_NOT,S_L,left.location.register);
  187. emit_reg_reg(op,opsize,left.location.register,right.location.register);
  188. { newly swapped also set swapped flag }
  189. location_swap(left.location,right.location);
  190. toggleflag(nf_swaped);
  191. end
  192. else
  193. begin
  194. if extra_not then
  195. emit_reg(A_NOT,S_L,right.location.register);
  196. emit_reg_reg(op,opsize,right.location.register,left.location.register);
  197. end;
  198. end
  199. else
  200. begin
  201. { right.location is not a LOC_REGISTER }
  202. if (nodetype=subn) and (nf_swaped in flags) then
  203. begin
  204. if extra_not then
  205. emit_reg(A_NOT,opsize,left.location.register);
  206. {$ifdef newra}
  207. r:=rg.getregisterint(exprasmlist,OS_INT);
  208. {$else}
  209. r.enum:=R_INTREGISTER;
  210. r.number:=NR_EDI;
  211. rg.getexplicitregisterint(exprasmlist,NR_EDI);
  212. {$endif}
  213. cg.a_load_loc_reg(exprasmlist,right.location,r);
  214. emit_reg_reg(op,opsize,left.location.register,r);
  215. emit_reg_reg(A_MOV,opsize,r,left.location.register);
  216. rg.ungetregisterint(exprasmlist,r);
  217. end
  218. else
  219. begin
  220. { Optimizations when right.location is a constant value }
  221. if (op=A_CMP) and
  222. (nodetype in [equaln,unequaln]) and
  223. (right.location.loc=LOC_CONSTANT) and
  224. (right.location.value=0) then
  225. begin
  226. emit_reg_reg(A_TEST,opsize,left.location.register,left.location.register);
  227. end
  228. else
  229. if (op=A_ADD) and
  230. (right.location.loc=LOC_CONSTANT) and
  231. (right.location.value=1) and
  232. not(cs_check_overflow in aktlocalswitches) then
  233. begin
  234. emit_reg(A_INC,opsize,left.location.register);
  235. end
  236. else
  237. if (op=A_SUB) and
  238. (right.location.loc=LOC_CONSTANT) and
  239. (right.location.value=1) and
  240. not(cs_check_overflow in aktlocalswitches) then
  241. begin
  242. emit_reg(A_DEC,opsize,left.location.register);
  243. end
  244. else
  245. if (op=A_IMUL) and
  246. (right.location.loc=LOC_CONSTANT) and
  247. (ispowerof2(right.location.value,power)) and
  248. not(cs_check_overflow in aktlocalswitches) then
  249. begin
  250. emit_const_reg(A_SHL,opsize,power,left.location.register);
  251. end
  252. else
  253. begin
  254. if extra_not then
  255. begin
  256. {$ifdef newra}
  257. r:=rg.getregisterint(exprasmlist,OS_INT);
  258. {$else}
  259. rg.getexplicitregisterint(exprasmlist,NR_EDI);
  260. r.enum:=R_INTREGISTER;
  261. r.number:=NR_EDI;
  262. {$endif}
  263. cg.a_load_loc_reg(exprasmlist,right.location,r);
  264. emit_reg(A_NOT,S_L,r);
  265. emit_reg_reg(A_AND,S_L,r,left.location.register);
  266. rg.ungetregisterint(exprasmlist,r);
  267. end
  268. else
  269. begin
  270. emit_op_right_left(op,opsize);
  271. end;
  272. end;
  273. end;
  274. end;
  275. { only in case of overflow operations }
  276. { produce overflow code }
  277. { we must put it here directly, because sign of operation }
  278. { is in unsigned VAR!! }
  279. if mboverflow then
  280. begin
  281. if cs_check_overflow in aktlocalswitches then
  282. begin
  283. objectlibrary.getlabel(hl4);
  284. if unsigned then
  285. emitjmp(C_NB,hl4)
  286. else
  287. emitjmp(C_NO,hl4);
  288. cg.a_call_name(exprasmlist,'FPC_OVERFLOW');
  289. cg.a_label(exprasmlist,hl4);
  290. end;
  291. end;
  292. end;
  293. {*****************************************************************************
  294. Addstring
  295. *****************************************************************************}
  296. { note: if you implemented an fpc_shortstr_concat similar to the }
  297. { one in i386.inc, you have to override first_addstring like in }
  298. { ti386addnode.first_string and implement the shortstring concat }
  299. { manually! The generic routine is different from the i386 one (JM) }
  300. function ti386addnode.first_addstring : tnode;
  301. begin
  302. { special cases for shortstrings, handled in pass_2 (JM) }
  303. { can't handle fpc_shortstr_compare with compilerproc either because it }
  304. { returns its results in the flags instead of in eax }
  305. if (nodetype = addn) and
  306. is_shortstring(resulttype.def) then
  307. begin
  308. expectloc:=LOC_CREFERENCE;
  309. calcregisters(self,0,0,0);
  310. result := nil;
  311. exit;
  312. end
  313. else
  314. if (nodetype in [ltn,lten,gtn,gten,equaln,unequaln]) and
  315. is_shortstring(left.resulttype.def) and
  316. not(((left.nodetype=stringconstn) and (str_length(left)=0)) or
  317. ((right.nodetype=stringconstn) and (str_length(right)=0))) then
  318. begin
  319. expectloc:=LOC_FLAGS;
  320. calcregisters(self,0,0,0);
  321. result := nil;
  322. exit;
  323. end;
  324. { otherwise, use the generic code }
  325. result := inherited first_addstring;
  326. end;
  327. procedure ti386addnode.second_addstring;
  328. var
  329. href : treference;
  330. cmpop : boolean;
  331. pushed : Tpushedsavedint;
  332. regstopush : Tsupregset;
  333. begin
  334. { string operations are not commutative }
  335. if nf_swaped in flags then
  336. swapleftright;
  337. case tstringdef(left.resulttype.def).string_typ of
  338. st_shortstring:
  339. begin
  340. case nodetype of
  341. addn:
  342. begin
  343. cmpop:=false;
  344. secondpass(left);
  345. { if str_concat is set in expr
  346. s:=s+ ... no need to create a temp string (PM) }
  347. { the tempstring can also come from a typeconversion }
  348. { or a function result, so simply check for a }
  349. { temp of 256 bytes(JM) }
  350. if not(tg.istemp(left.location.reference) and
  351. (tg.SizeOfTemp(exprasmlist,left.location.reference) = 256)) and
  352. not(nf_use_strconcat in flags) then
  353. begin
  354. tg.GetTemp(exprasmlist,256,tt_normal,href);
  355. cg.g_copyshortstring(exprasmlist,left.location.reference,href,255,true,false);
  356. { location is released by copyshortstring }
  357. location_freetemp(exprasmlist,left.location);
  358. location_reset(left.location,LOC_CREFERENCE,def_cgsize(resulttype.def));
  359. left.location.reference:=href;
  360. end;
  361. secondpass(right);
  362. { on the right we do not need the register anymore too }
  363. { Instead of releasing them already, simply do not }
  364. { push them (so the release is in the right place, }
  365. { because emitpushreferenceaddr doesn't need extra }
  366. { registers) (JM) }
  367. regstopush := all_intregisters;
  368. remove_non_regvars_from_loc(right.location,regstopush);
  369. rg.saveusedintregisters(exprasmlist,pushed,regstopush);
  370. { push the maximum possible length of the result }
  371. cg.a_paramaddr_ref(exprasmlist,left.location.reference,paramanager.getintparaloc(2));
  372. { the optimizer can more easily put the }
  373. { deallocations in the right place if it happens }
  374. { too early than when it happens too late (if }
  375. { the pushref needs a "lea (..),edi; push edi") }
  376. location_release(exprasmlist,right.location);
  377. cg.a_paramaddr_ref(exprasmlist,right.location.reference,paramanager.getintparaloc(1));
  378. rg.saveintregvars(exprasmlist,regstopush);
  379. cg.a_call_name(exprasmlist,'FPC_SHORTSTR_CONCAT');
  380. tg.ungetiftemp(exprasmlist,right.location.reference);
  381. rg.restoreusedintregisters(exprasmlist,pushed);
  382. end;
  383. ltn,lten,gtn,gten,equaln,unequaln :
  384. begin
  385. cmpop := true;
  386. rg.saveusedintregisters(exprasmlist,pushed,all_intregisters);
  387. secondpass(left);
  388. location_release(exprasmlist,left.location);
  389. cg.a_paramaddr_ref(exprasmlist,left.location.reference,paramanager.getintparaloc(2));
  390. secondpass(right);
  391. location_release(exprasmlist,right.location);
  392. cg.a_paramaddr_ref(exprasmlist,right.location.reference,paramanager.getintparaloc(1));
  393. rg.saveintregvars(exprasmlist,all_intregisters);
  394. cg.a_call_name(exprasmlist,'FPC_SHORTSTR_COMPARE');
  395. rg.restoreusedintregisters(exprasmlist,pushed);
  396. location_freetemp(exprasmlist,left.location);
  397. location_freetemp(exprasmlist,right.location);
  398. end;
  399. end;
  400. set_result_location(cmpop,true);
  401. end;
  402. else
  403. { rest should be handled in first pass (JM) }
  404. internalerror(200108303);
  405. end;
  406. end;
  407. {*****************************************************************************
  408. AddBoolean
  409. *****************************************************************************}
  410. procedure ti386addnode.second_addboolean;
  411. var
  412. op : TAsmOp;
  413. opsize : TOpsize;
  414. cmpop,
  415. isjump : boolean;
  416. otl,ofl : tasmlabel;
  417. pushedregs : tmaybesave;
  418. begin
  419. { calculate the operator which is more difficult }
  420. firstcomplex(self);
  421. cmpop:=false;
  422. if (torddef(left.resulttype.def).typ=bool8bit) or
  423. (torddef(right.resulttype.def).typ=bool8bit) then
  424. opsize:=S_B
  425. else
  426. if (torddef(left.resulttype.def).typ=bool16bit) or
  427. (torddef(right.resulttype.def).typ=bool16bit) then
  428. opsize:=S_W
  429. else
  430. opsize:=S_L;
  431. if (cs_full_boolean_eval in aktlocalswitches) or
  432. (nodetype in [unequaln,ltn,lten,gtn,gten,equaln,xorn]) then
  433. begin
  434. if left.nodetype in [ordconstn,realconstn] then
  435. swapleftright;
  436. isjump:=(left.location.loc=LOC_JUMP);
  437. if isjump then
  438. begin
  439. otl:=truelabel;
  440. objectlibrary.getlabel(truelabel);
  441. ofl:=falselabel;
  442. objectlibrary.getlabel(falselabel);
  443. end;
  444. secondpass(left);
  445. if left.location.loc in [LOC_FLAGS,LOC_JUMP] then
  446. location_force_reg(exprasmlist,left.location,opsize_2_cgsize[opsize],false);
  447. if isjump then
  448. begin
  449. truelabel:=otl;
  450. falselabel:=ofl;
  451. end;
  452. {$ifndef newra}
  453. maybe_save(exprasmlist,right.registers32,left.location,pushedregs);
  454. {$endif}
  455. isjump:=(right.location.loc=LOC_JUMP);
  456. if isjump then
  457. begin
  458. otl:=truelabel;
  459. objectlibrary.getlabel(truelabel);
  460. ofl:=falselabel;
  461. objectlibrary.getlabel(falselabel);
  462. end;
  463. secondpass(right);
  464. {$ifndef newra}
  465. maybe_restore(exprasmlist,left.location,pushedregs);
  466. {$endif newra}
  467. if right.location.loc in [LOC_FLAGS,LOC_JUMP] then
  468. location_force_reg(exprasmlist,right.location,opsize_2_cgsize[opsize],false);
  469. if isjump then
  470. begin
  471. truelabel:=otl;
  472. falselabel:=ofl;
  473. end;
  474. { left must be a register }
  475. left_must_be_reg(opsize,false);
  476. { compare the }
  477. case nodetype of
  478. ltn,lten,gtn,gten,
  479. equaln,unequaln :
  480. begin
  481. op:=A_CMP;
  482. cmpop:=true;
  483. end;
  484. xorn :
  485. op:=A_XOR;
  486. orn :
  487. op:=A_OR;
  488. andn :
  489. op:=A_AND;
  490. else
  491. internalerror(200203247);
  492. end;
  493. emit_op_right_left(op,opsize);
  494. location_freetemp(exprasmlist,right.location);
  495. location_release(exprasmlist,right.location);
  496. if cmpop then
  497. begin
  498. location_freetemp(exprasmlist,left.location);
  499. location_release(exprasmlist,left.location);
  500. end;
  501. set_result_location(cmpop,true);
  502. end
  503. else
  504. begin
  505. case nodetype of
  506. andn,
  507. orn :
  508. begin
  509. location_reset(location,LOC_JUMP,OS_NO);
  510. case nodetype of
  511. andn :
  512. begin
  513. otl:=truelabel;
  514. objectlibrary.getlabel(truelabel);
  515. secondpass(left);
  516. maketojumpbool(exprasmlist,left,lr_load_regvars);
  517. cg.a_label(exprasmlist,truelabel);
  518. truelabel:=otl;
  519. end;
  520. orn :
  521. begin
  522. ofl:=falselabel;
  523. objectlibrary.getlabel(falselabel);
  524. secondpass(left);
  525. maketojumpbool(exprasmlist,left,lr_load_regvars);
  526. cg.a_label(exprasmlist,falselabel);
  527. falselabel:=ofl;
  528. end;
  529. else
  530. CGMessage(type_e_mismatch);
  531. end;
  532. secondpass(right);
  533. maketojumpbool(exprasmlist,right,lr_load_regvars);
  534. end;
  535. else
  536. CGMessage(type_e_mismatch);
  537. end;
  538. end;
  539. end;
  540. {*****************************************************************************
  541. AddFloat
  542. *****************************************************************************}
  543. procedure ti386addnode.second_addfloat;
  544. var
  545. op : TAsmOp;
  546. resflags : tresflags;
  547. pushedfpu,
  548. cmpop : boolean;
  549. r,r2:Tregister;
  550. begin
  551. pass_left_and_right(pushedfpu);
  552. cmpop:=false;
  553. case nodetype of
  554. addn :
  555. op:=A_FADDP;
  556. muln :
  557. op:=A_FMULP;
  558. subn :
  559. op:=A_FSUBP;
  560. slashn :
  561. op:=A_FDIVP;
  562. ltn,lten,gtn,gten,
  563. equaln,unequaln :
  564. begin
  565. op:=A_FCOMPP;
  566. cmpop:=true;
  567. end;
  568. else
  569. CGMessage(type_e_mismatch);
  570. end;
  571. if (right.location.loc<>LOC_FPUREGISTER) then
  572. begin
  573. r.enum:=R_ST;
  574. cg.a_loadfpu_loc_reg(exprasmlist,right.location,r);
  575. if (right.location.loc <> LOC_CFPUREGISTER) and
  576. pushedfpu then
  577. location_freetemp(exprasmlist,left.location);
  578. if (left.location.loc<>LOC_FPUREGISTER) then
  579. begin
  580. cg.a_loadfpu_loc_reg(exprasmlist,left.location,r);
  581. if (left.location.loc <> LOC_CFPUREGISTER) and
  582. pushedfpu then
  583. location_freetemp(exprasmlist,left.location);
  584. end
  585. else
  586. begin
  587. { left was on the stack => swap }
  588. toggleflag(nf_swaped);
  589. end;
  590. { releases the right reference }
  591. location_release(exprasmlist,right.location);
  592. end
  593. { the nominator in st0 }
  594. else if (left.location.loc<>LOC_FPUREGISTER) then
  595. begin
  596. r.enum:=R_ST;
  597. cg.a_loadfpu_loc_reg(exprasmlist,left.location,r);
  598. if (left.location.loc <> LOC_CFPUREGISTER) and
  599. pushedfpu then
  600. location_freetemp(exprasmlist,left.location);
  601. end
  602. else
  603. begin
  604. { fpu operands are always in the wrong order on the stack }
  605. toggleflag(nf_swaped);
  606. end;
  607. { releases the left reference }
  608. if (left.location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then
  609. location_release(exprasmlist,left.location);
  610. { if we swaped the tree nodes, then use the reverse operator }
  611. if nf_swaped in flags then
  612. begin
  613. if (nodetype=slashn) then
  614. op:=A_FDIVRP
  615. else if (nodetype=subn) then
  616. op:=A_FSUBRP;
  617. end;
  618. { to avoid the pentium bug
  619. if (op=FDIVP) and (opt_processors=pentium) then
  620. cg.a_call_name(exprasmlist,'EMUL_FDIVP')
  621. else
  622. }
  623. { the Intel assemblers want operands }
  624. if op<>A_FCOMPP then
  625. begin
  626. r.enum:=R_ST;
  627. r2.enum:=R_ST1;
  628. emit_reg_reg(op,S_NO,r,r2);
  629. dec(trgcpu(rg).fpuvaroffset);
  630. end
  631. else
  632. begin
  633. emit_none(op,S_NO);
  634. dec(trgcpu(rg).fpuvaroffset,2);
  635. end;
  636. { on comparison load flags }
  637. if cmpop then
  638. begin
  639. {$ifdef newra}
  640. r:=rg.getexplicitregisterint(exprasmlist,NR_AX);
  641. {$else}
  642. if not(RS_EAX in rg.unusedregsint) then
  643. begin
  644. rg.getexplicitregisterint(exprasmlist,NR_EDI);
  645. r.enum:=R_INTREGISTER;
  646. r.number:=NR_EAX;
  647. r2.enum:=R_INTREGISTER;;
  648. r2.number:=NR_EDI;
  649. emit_reg_reg(A_MOV,S_L,r,r2);
  650. end;
  651. r.enum:=R_INTREGISTER;
  652. r.number:=NR_AX;
  653. {$endif}
  654. emit_reg(A_FNSTSW,S_NO,r);
  655. emit_none(A_SAHF,S_NO);
  656. {$ifdef newra}
  657. rg.ungetregisterint(exprasmlist,r);
  658. {$else}
  659. if not(RS_EAX in rg.unusedregsint) then
  660. begin
  661. r.enum:=R_INTREGISTER;
  662. r.number:=NR_EAX;
  663. r2.enum:=R_INTREGISTER;;
  664. r2.number:=NR_EDI;
  665. emit_reg_reg(A_MOV,S_L,r2,r);
  666. rg.ungetregisterint(exprasmlist,r2);
  667. end;
  668. {$endif}
  669. if nf_swaped in flags then
  670. begin
  671. case nodetype of
  672. equaln : resflags:=F_E;
  673. unequaln : resflags:=F_NE;
  674. ltn : resflags:=F_A;
  675. lten : resflags:=F_AE;
  676. gtn : resflags:=F_B;
  677. gten : resflags:=F_BE;
  678. end;
  679. end
  680. else
  681. begin
  682. case nodetype of
  683. equaln : resflags:=F_E;
  684. unequaln : resflags:=F_NE;
  685. ltn : resflags:=F_B;
  686. lten : resflags:=F_BE;
  687. gtn : resflags:=F_A;
  688. gten : resflags:=F_AE;
  689. end;
  690. end;
  691. location_reset(location,LOC_FLAGS,OS_NO);
  692. location.resflags:=resflags;
  693. end
  694. else
  695. begin
  696. location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
  697. location.register.enum:=R_ST;
  698. end;
  699. end;
  700. {*****************************************************************************
  701. AddSmallSet
  702. *****************************************************************************}
  703. procedure ti386addnode.second_addsmallset;
  704. var
  705. opsize : TOpSize;
  706. op : TAsmOp;
  707. cmpop,
  708. pushedfpu,
  709. extra_not,
  710. noswap : boolean;
  711. begin
  712. pass_left_and_right(pushedfpu);
  713. { when a setdef is passed, it has to be a smallset }
  714. if ((left.resulttype.def.deftype=setdef) and
  715. (tsetdef(left.resulttype.def).settype<>smallset)) or
  716. ((right.resulttype.def.deftype=setdef) and
  717. (tsetdef(right.resulttype.def).settype<>smallset)) then
  718. internalerror(200203301);
  719. cmpop:=false;
  720. noswap:=false;
  721. extra_not:=false;
  722. opsize:=S_L;
  723. case nodetype of
  724. addn :
  725. begin
  726. { this is a really ugly hack!!!!!!!!!! }
  727. { this could be done later using EDI }
  728. { as it is done for subn }
  729. { instead of two registers!!!! }
  730. { adding elements is not commutative }
  731. if (nf_swaped in flags) and (left.nodetype=setelementn) then
  732. swapleftright;
  733. { are we adding set elements ? }
  734. if right.nodetype=setelementn then
  735. begin
  736. { no range support for smallsets! }
  737. if assigned(tsetelementnode(right).right) then
  738. internalerror(43244);
  739. { bts requires both elements to be registers }
  740. location_force_reg(exprasmlist,left.location,opsize_2_cgsize[opsize],false);
  741. location_force_reg(exprasmlist,right.location,opsize_2_cgsize[opsize],true);
  742. op:=A_BTS;
  743. noswap:=true;
  744. end
  745. else
  746. op:=A_OR;
  747. end;
  748. symdifn :
  749. op:=A_XOR;
  750. muln :
  751. op:=A_AND;
  752. subn :
  753. begin
  754. op:=A_AND;
  755. if (not(nf_swaped in flags)) and
  756. (right.location.loc=LOC_CONSTANT) then
  757. right.location.value := not(right.location.value)
  758. else if (nf_swaped in flags) and
  759. (left.location.loc=LOC_CONSTANT) then
  760. left.location.value := not(left.location.value)
  761. else
  762. extra_not:=true;
  763. end;
  764. equaln,
  765. unequaln :
  766. begin
  767. op:=A_CMP;
  768. cmpop:=true;
  769. end;
  770. lten,gten:
  771. begin
  772. If (not(nf_swaped in flags) and
  773. (nodetype = lten)) or
  774. ((nf_swaped in flags) and
  775. (nodetype = gten)) then
  776. swapleftright;
  777. location_force_reg(exprasmlist,left.location,opsize_2_cgsize[opsize],true);
  778. emit_op_right_left(A_AND,opsize);
  779. op:=A_CMP;
  780. cmpop:=true;
  781. { warning: ugly hack, we need a JE so change the node to equaln }
  782. nodetype:=equaln;
  783. end;
  784. xorn :
  785. op:=A_XOR;
  786. orn :
  787. op:=A_OR;
  788. andn :
  789. op:=A_AND;
  790. else
  791. begin
  792. { no < or > support for sets }
  793. CGMessage(type_e_mismatch);
  794. end;
  795. end;
  796. { left must be a register }
  797. left_must_be_reg(opsize,noswap);
  798. emit_generic_code(op,opsize,true,extra_not,false);
  799. location_freetemp(exprasmlist,right.location);
  800. location_release(exprasmlist,right.location);
  801. if cmpop then
  802. begin
  803. location_freetemp(exprasmlist,left.location);
  804. location_release(exprasmlist,left.location);
  805. end;
  806. set_result_location(cmpop,true);
  807. end;
  808. {*****************************************************************************
  809. Add64bit
  810. *****************************************************************************}
  811. procedure ti386addnode.second_add64bit;
  812. var
  813. op : TOpCG;
  814. op1,op2 : TAsmOp;
  815. opsize : TOpSize;
  816. hregister,
  817. hregister2 : tregister;
  818. href : treference;
  819. hl4 : tasmlabel;
  820. pushedfpu,
  821. mboverflow,
  822. cmpop,
  823. unsigned : boolean;
  824. r:Tregister;
  825. procedure firstjmp64bitcmp;
  826. var
  827. oldnodetype : tnodetype;
  828. begin
  829. load_all_regvars(exprasmlist);
  830. { the jump the sequence is a little bit hairy }
  831. case nodetype of
  832. ltn,gtn:
  833. begin
  834. emitjmp(flags_to_cond(getresflags(unsigned)),truelabel);
  835. { cheat a little bit for the negative test }
  836. toggleflag(nf_swaped);
  837. emitjmp(flags_to_cond(getresflags(unsigned)),falselabel);
  838. toggleflag(nf_swaped);
  839. end;
  840. lten,gten:
  841. begin
  842. oldnodetype:=nodetype;
  843. if nodetype=lten then
  844. nodetype:=ltn
  845. else
  846. nodetype:=gtn;
  847. emitjmp(flags_to_cond(getresflags(unsigned)),truelabel);
  848. { cheat for the negative test }
  849. if nodetype=ltn then
  850. nodetype:=gtn
  851. else
  852. nodetype:=ltn;
  853. emitjmp(flags_to_cond(getresflags(unsigned)),falselabel);
  854. nodetype:=oldnodetype;
  855. end;
  856. equaln:
  857. emitjmp(C_NE,falselabel);
  858. unequaln:
  859. emitjmp(C_NE,truelabel);
  860. end;
  861. end;
  862. procedure secondjmp64bitcmp;
  863. begin
  864. { the jump the sequence is a little bit hairy }
  865. case nodetype of
  866. ltn,gtn,lten,gten:
  867. begin
  868. { the comparisaion of the low dword have to be }
  869. { always unsigned! }
  870. emitjmp(flags_to_cond(getresflags(true)),truelabel);
  871. cg.a_jmp_always(exprasmlist,falselabel);
  872. end;
  873. equaln:
  874. begin
  875. emitjmp(C_NE,falselabel);
  876. cg.a_jmp_always(exprasmlist,truelabel);
  877. end;
  878. unequaln:
  879. begin
  880. emitjmp(C_NE,truelabel);
  881. cg.a_jmp_always(exprasmlist,falselabel);
  882. end;
  883. end;
  884. end;
  885. begin
  886. firstcomplex(self);
  887. pass_left_and_right(pushedfpu);
  888. op1:=A_NONE;
  889. op2:=A_NONE;
  890. mboverflow:=false;
  891. cmpop:=false;
  892. opsize:=S_L;
  893. unsigned:=((left.resulttype.def.deftype=orddef) and
  894. (torddef(left.resulttype.def).typ=u64bit)) or
  895. ((right.resulttype.def.deftype=orddef) and
  896. (torddef(right.resulttype.def).typ=u64bit));
  897. case nodetype of
  898. addn :
  899. begin
  900. op:=OP_ADD;
  901. mboverflow:=true;
  902. end;
  903. subn :
  904. begin
  905. op:=OP_SUB;
  906. op1:=A_SUB;
  907. op2:=A_SBB;
  908. mboverflow:=true;
  909. end;
  910. ltn,lten,
  911. gtn,gten,
  912. equaln,unequaln:
  913. begin
  914. op:=OP_NONE;
  915. cmpop:=true;
  916. end;
  917. xorn:
  918. op:=OP_XOR;
  919. orn:
  920. op:=OP_OR;
  921. andn:
  922. op:=OP_AND;
  923. muln:
  924. begin
  925. { should be handled in pass_1 (JM) }
  926. internalerror(200109051);
  927. end;
  928. else
  929. CGMessage(type_e_mismatch);
  930. end;
  931. { left and right no register? }
  932. { then one must be demanded }
  933. if (left.location.loc<>LOC_REGISTER) then
  934. begin
  935. if (right.location.loc<>LOC_REGISTER) then
  936. begin
  937. { we can reuse a CREGISTER for comparison }
  938. if not((left.location.loc=LOC_CREGISTER) and cmpop) then
  939. begin
  940. if (left.location.loc<>LOC_CREGISTER) then
  941. begin
  942. location_freetemp(exprasmlist,left.location);
  943. location_release(exprasmlist,left.location);
  944. end;
  945. hregister:=rg.getregisterint(exprasmlist,OS_INT);
  946. hregister2:=rg.getregisterint(exprasmlist,OS_INT);
  947. cg64.a_load64_loc_reg(exprasmlist,left.location,joinreg64(hregister,hregister2));
  948. location_reset(left.location,LOC_REGISTER,OS_64);
  949. left.location.registerlow:=hregister;
  950. left.location.registerhigh:=hregister2;
  951. end;
  952. end
  953. else
  954. begin
  955. location_swap(left.location,right.location);
  956. toggleflag(nf_swaped);
  957. end;
  958. end;
  959. { at this point, left.location.loc should be LOC_REGISTER }
  960. if right.location.loc=LOC_REGISTER then
  961. begin
  962. { when swapped another result register }
  963. if (nodetype=subn) and (nf_swaped in flags) then
  964. begin
  965. cg64.a_op64_reg_reg(exprasmlist,op,
  966. left.location.register64,
  967. right.location.register64);
  968. location_swap(left.location,right.location);
  969. toggleflag(nf_swaped);
  970. end
  971. else if cmpop then
  972. begin
  973. emit_reg_reg(A_CMP,S_L,right.location.registerhigh,left.location.registerhigh);
  974. firstjmp64bitcmp;
  975. emit_reg_reg(A_CMP,S_L,right.location.registerlow,left.location.registerlow);
  976. secondjmp64bitcmp;
  977. end
  978. else
  979. begin
  980. cg64.a_op64_reg_reg(exprasmlist,op,
  981. right.location.register64,
  982. left.location.register64);
  983. end;
  984. location_release(exprasmlist,right.location);
  985. end
  986. else
  987. begin
  988. { right.location<>LOC_REGISTER }
  989. if (nodetype=subn) and (nf_swaped in flags) then
  990. begin
  991. {$ifdef newra}
  992. r:=rg.getregisterint(exprasmlist,OS_INT);
  993. {$else}
  994. rg.getexplicitregisterint(exprasmlist,NR_EDI);
  995. r.enum:=R_INTREGISTER;
  996. r.number:=NR_EDI;
  997. {$endif}
  998. cg64.a_load64low_loc_reg(exprasmlist,right.location,r);
  999. emit_reg_reg(op1,opsize,left.location.registerlow,r);
  1000. emit_reg_reg(A_MOV,opsize,r,left.location.registerlow);
  1001. cg64.a_load64high_loc_reg(exprasmlist,right.location,r);
  1002. { the carry flag is still ok }
  1003. emit_reg_reg(op2,opsize,left.location.registerhigh,r);
  1004. emit_reg_reg(A_MOV,opsize,r,left.location.registerhigh);
  1005. rg.ungetregisterint(exprasmlist,r);
  1006. if right.location.loc<>LOC_CREGISTER then
  1007. begin
  1008. location_freetemp(exprasmlist,right.location);
  1009. location_release(exprasmlist,right.location);
  1010. end;
  1011. end
  1012. else if cmpop then
  1013. begin
  1014. case right.location.loc of
  1015. LOC_CREGISTER :
  1016. begin
  1017. emit_reg_reg(A_CMP,S_L,right.location.registerhigh,left.location.registerhigh);
  1018. firstjmp64bitcmp;
  1019. emit_reg_reg(A_CMP,S_L,right.location.registerlow,left.location.registerlow);
  1020. secondjmp64bitcmp;
  1021. end;
  1022. LOC_CREFERENCE,
  1023. LOC_REFERENCE :
  1024. begin
  1025. href:=right.location.reference;
  1026. inc(href.offset,4);
  1027. emit_ref_reg(A_CMP,S_L,href,left.location.registerhigh);
  1028. firstjmp64bitcmp;
  1029. emit_ref_reg(A_CMP,S_L,right.location.reference,left.location.registerlow);
  1030. secondjmp64bitcmp;
  1031. cg.a_jmp_always(exprasmlist,falselabel);
  1032. location_freetemp(exprasmlist,right.location);
  1033. location_release(exprasmlist,right.location);
  1034. end;
  1035. LOC_CONSTANT :
  1036. begin
  1037. exprasmlist.concat(taicpu.op_const_reg(A_CMP,S_L,hi(right.location.valueqword),left.location.registerhigh));
  1038. firstjmp64bitcmp;
  1039. exprasmlist.concat(taicpu.op_const_reg(A_CMP,S_L,lo(right.location.valueqword),left.location.registerlow));
  1040. secondjmp64bitcmp;
  1041. end;
  1042. else
  1043. internalerror(200203282);
  1044. end;
  1045. end
  1046. else
  1047. begin
  1048. cg64.a_op64_loc_reg(exprasmlist,op,right.location,
  1049. left.location.register64);
  1050. if (right.location.loc<>LOC_CREGISTER) then
  1051. begin
  1052. location_freetemp(exprasmlist,right.location);
  1053. location_release(exprasmlist,right.location);
  1054. end;
  1055. end;
  1056. end;
  1057. if (left.location.loc<>LOC_CREGISTER) and cmpop then
  1058. begin
  1059. location_freetemp(exprasmlist,left.location);
  1060. location_release(exprasmlist,left.location);
  1061. end;
  1062. { only in case of overflow operations }
  1063. { produce overflow code }
  1064. { we must put it here directly, because sign of operation }
  1065. { is in unsigned VAR!! }
  1066. if mboverflow then
  1067. begin
  1068. if cs_check_overflow in aktlocalswitches then
  1069. begin
  1070. objectlibrary.getlabel(hl4);
  1071. if unsigned then
  1072. emitjmp(C_NB,hl4)
  1073. else
  1074. emitjmp(C_NO,hl4);
  1075. cg.a_call_name(exprasmlist,'FPC_OVERFLOW');
  1076. cg.a_label(exprasmlist,hl4);
  1077. end;
  1078. end;
  1079. { we have LOC_JUMP as result }
  1080. if cmpop then
  1081. location_reset(location,LOC_JUMP,OS_NO)
  1082. else
  1083. location_copy(location,left.location);
  1084. end;
  1085. {*****************************************************************************
  1086. AddMMX
  1087. *****************************************************************************}
  1088. {$ifdef SUPPORT_MMX}
  1089. procedure ti386addnode.second_addmmx;
  1090. var
  1091. op : TAsmOp;
  1092. pushedfpu,
  1093. cmpop : boolean;
  1094. mmxbase : tmmxtype;
  1095. r,hregister : tregister;
  1096. begin
  1097. pass_left_and_right(pushedfpu);
  1098. cmpop:=false;
  1099. mmxbase:=mmx_type(left.resulttype.def);
  1100. case nodetype of
  1101. addn :
  1102. begin
  1103. if (cs_mmx_saturation in aktlocalswitches) then
  1104. begin
  1105. case mmxbase of
  1106. mmxs8bit:
  1107. op:=A_PADDSB;
  1108. mmxu8bit:
  1109. op:=A_PADDUSB;
  1110. mmxs16bit,mmxfixed16:
  1111. op:=A_PADDSB;
  1112. mmxu16bit:
  1113. op:=A_PADDUSW;
  1114. end;
  1115. end
  1116. else
  1117. begin
  1118. case mmxbase of
  1119. mmxs8bit,mmxu8bit:
  1120. op:=A_PADDB;
  1121. mmxs16bit,mmxu16bit,mmxfixed16:
  1122. op:=A_PADDW;
  1123. mmxs32bit,mmxu32bit:
  1124. op:=A_PADDD;
  1125. end;
  1126. end;
  1127. end;
  1128. muln :
  1129. begin
  1130. case mmxbase of
  1131. mmxs16bit,mmxu16bit:
  1132. op:=A_PMULLW;
  1133. mmxfixed16:
  1134. op:=A_PMULHW;
  1135. end;
  1136. end;
  1137. subn :
  1138. begin
  1139. if (cs_mmx_saturation in aktlocalswitches) then
  1140. begin
  1141. case mmxbase of
  1142. mmxs8bit:
  1143. op:=A_PSUBSB;
  1144. mmxu8bit:
  1145. op:=A_PSUBUSB;
  1146. mmxs16bit,mmxfixed16:
  1147. op:=A_PSUBSB;
  1148. mmxu16bit:
  1149. op:=A_PSUBUSW;
  1150. end;
  1151. end
  1152. else
  1153. begin
  1154. case mmxbase of
  1155. mmxs8bit,mmxu8bit:
  1156. op:=A_PSUBB;
  1157. mmxs16bit,mmxu16bit,mmxfixed16:
  1158. op:=A_PSUBW;
  1159. mmxs32bit,mmxu32bit:
  1160. op:=A_PSUBD;
  1161. end;
  1162. end;
  1163. end;
  1164. xorn:
  1165. op:=A_PXOR;
  1166. orn:
  1167. op:=A_POR;
  1168. andn:
  1169. op:=A_PAND;
  1170. else
  1171. CGMessage(type_e_mismatch);
  1172. end;
  1173. { left and right no register? }
  1174. { then one must be demanded }
  1175. if (left.location.loc<>LOC_MMXREGISTER) then
  1176. begin
  1177. if (right.location.loc=LOC_MMXREGISTER) then
  1178. begin
  1179. location_swap(left.location,right.location);
  1180. toggleflag(nf_swaped);
  1181. end
  1182. else
  1183. begin
  1184. { register variable ? }
  1185. if (left.location.loc=LOC_CMMXREGISTER) then
  1186. begin
  1187. hregister:=rg.getregistermm(exprasmlist);
  1188. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  1189. end
  1190. else
  1191. begin
  1192. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  1193. internalerror(200203245);
  1194. location_release(exprasmlist,left.location);
  1195. hregister:=rg.getregistermm(exprasmlist);
  1196. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  1197. end;
  1198. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  1199. left.location.register:=hregister;
  1200. end;
  1201. end;
  1202. { at this point, left.location.loc should be LOC_MMXREGISTER }
  1203. if right.location.loc<>LOC_MMXREGISTER then
  1204. begin
  1205. if (nodetype=subn) and (nf_swaped in flags) then
  1206. begin
  1207. r.enum:=R_MM7;
  1208. if right.location.loc=LOC_CMMXREGISTER then
  1209. begin
  1210. emit_reg_reg(A_MOVQ,S_NO,right.location.register,r);
  1211. emit_reg_reg(op,S_NO,left.location.register,r);
  1212. emit_reg_reg(A_MOVQ,S_NO,r,left.location.register);
  1213. end
  1214. else
  1215. begin
  1216. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  1217. internalerror(200203247);
  1218. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,r);
  1219. emit_reg_reg(op,S_NO,left.location.register,r);
  1220. emit_reg_reg(A_MOVQ,S_NO,r,left.location.register);
  1221. location_release(exprasmlist,right.location);
  1222. end;
  1223. end
  1224. else
  1225. begin
  1226. if (right.location.loc=LOC_CMMXREGISTER) then
  1227. begin
  1228. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  1229. end
  1230. else
  1231. begin
  1232. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  1233. internalerror(200203246);
  1234. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  1235. location_release(exprasmlist,right.location);
  1236. end;
  1237. end;
  1238. end
  1239. else
  1240. begin
  1241. { right.location=LOC_MMXREGISTER }
  1242. if (nodetype=subn) and (nf_swaped in flags) then
  1243. begin
  1244. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  1245. location_swap(left.location,right.location);
  1246. toggleflag(nf_swaped);
  1247. end
  1248. else
  1249. begin
  1250. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  1251. end;
  1252. end;
  1253. location_freetemp(exprasmlist,right.location);
  1254. location_release(exprasmlist,right.location);
  1255. if cmpop then
  1256. begin
  1257. location_freetemp(exprasmlist,left.location);
  1258. location_release(exprasmlist,left.location);
  1259. end;
  1260. set_result_location(cmpop,true);
  1261. end;
  1262. {$endif SUPPORT_MMX}
  1263. {*****************************************************************************
  1264. MUL
  1265. *****************************************************************************}
  1266. {$ifdef newra}
  1267. procedure ti386addnode.second_mul;
  1268. var r,r_eax:Tregister;
  1269. begin
  1270. {The location.register will be filled in later (JM)}
  1271. location_reset(location,LOC_REGISTER,OS_INT);
  1272. {Get a temp register and load the left value into it
  1273. and free the location.}
  1274. r:=rg.getregisterint(exprasmlist,OS_INT);
  1275. cg.a_load_loc_reg(exprasmlist,left.location,r);
  1276. location_release(exprasmlist,left.location);
  1277. {Allocate EAX.}
  1278. rg.getexplicitregisterint(exprasmlist,NR_EAX);
  1279. r_eax.enum:=R_INTREGISTER;
  1280. r_eax.number:=NR_EAX;
  1281. {Load the right value.}
  1282. cg.a_load_loc_reg(exprasmlist,right.location,r_eax);
  1283. location_release(exprasmlist,right.location);
  1284. {The mul instruction frees register r.}
  1285. rg.ungetregisterint(exprasmlist,r);
  1286. {Also allocate EDX, since it is also modified by a mul (JM).}
  1287. rg.getexplicitregisterint(exprasmlist,NR_EDX);
  1288. emit_reg(A_MUL,S_L,r);
  1289. {Free EDX}
  1290. r.enum:=R_INTREGISTER;
  1291. r.number:=NR_EDX;
  1292. rg.ungetregisterint(exprasmlist,r);
  1293. {Free EAX}
  1294. rg.ungetregisterint(exprasmlist,r_eax);
  1295. {Allocate a new register and store the result in EAX in it.}
  1296. location.register:=rg.getregisterint(exprasmlist,OS_INT);
  1297. emit_reg_reg(A_MOV,S_L,r_eax,location.register);
  1298. location_freetemp(exprasmlist,left.location);
  1299. location_freetemp(exprasmlist,right.location);
  1300. end;
  1301. {$else}
  1302. procedure ti386addnode.second_mul;
  1303. var popeax,popedx:boolean;
  1304. regstopush:Tsupregset;
  1305. r:Tregister;
  1306. begin
  1307. popeax:=false;
  1308. popedx:=false;
  1309. { here you need to free the symbol first }
  1310. { left.location and right.location must }
  1311. { only be freed when they are really released, }
  1312. { because the optimizer NEEDS correct regalloc }
  1313. { info!!! (JM) }
  1314. { the location.register will be filled in later (JM) }
  1315. location_reset(location,LOC_REGISTER,OS_INT);
  1316. regstopush := all_intregisters;
  1317. remove_non_regvars_from_loc(right.location,regstopush);
  1318. remove_non_regvars_from_loc(left.location,regstopush);
  1319. { now, regstopush does NOT contain EAX and/or EDX if they are }
  1320. { used in either the left or the right location, excepts if }
  1321. {they are regvars. It DOES contain them if they are used in }
  1322. { another location (JM) }
  1323. r.enum:=R_INTREGISTER;
  1324. if not(RS_EAX in rg.unusedregsint) and
  1325. (RS_EAX in regstopush) then
  1326. begin
  1327. r.number:=NR_EAX;
  1328. emit_reg(A_PUSH,S_L,r);
  1329. popeax:=true;
  1330. end;
  1331. if not(RS_EDX in rg.unusedregsint) and
  1332. (RS_EDX in regstopush) then
  1333. begin
  1334. r.number:=NR_EDX;
  1335. emit_reg(A_PUSH,S_L,r);
  1336. popedx:=true;
  1337. end;
  1338. { left.location can be R_EAX !!! }
  1339. rg.getexplicitregisterint(exprasmlist,NR_EDI);
  1340. { load the left value }
  1341. r.number:=NR_EDI;
  1342. cg.a_load_loc_reg(exprasmlist,left.location,r);
  1343. location_release(exprasmlist,left.location);
  1344. { allocate EAX }
  1345. r.number:=NR_EAX;
  1346. if RS_EAX in rg.unusedregsint then
  1347. exprasmList.concat(tai_regalloc.Alloc(r));
  1348. { load he right value }
  1349. cg.a_load_loc_reg(exprasmlist,right.location,r);
  1350. location_release(exprasmlist,right.location);
  1351. { allocate EAX if it isn't yet allocated (JM) }
  1352. if (RS_EAX in rg.unusedregsint) then
  1353. exprasmlist.concat(tai_regalloc.Alloc(r));
  1354. { also allocate EDX, since it is also modified by }
  1355. { a mul (JM) }
  1356. r.number:=NR_EDX;
  1357. if RS_EDX in rg.unusedregsint then
  1358. exprasmlist.concat(tai_regalloc.Alloc(r));
  1359. r.number:=NR_EDI;
  1360. emit_reg(A_MUL,S_L,r);
  1361. rg.ungetregisterint(exprasmlist,r);
  1362. r.enum:=R_INTREGISTER;
  1363. r.number:=NR_EDX;
  1364. if RS_EDX in rg.unusedregsint then
  1365. exprasmlist.concat(tai_regalloc.DeAlloc(r));
  1366. r.number:=NR_EAX;
  1367. if RS_EAX in rg.unusedregsint then
  1368. exprasmlist.concat(tai_regalloc.DeAlloc(r));
  1369. location.register:=rg.getregisterint(exprasmlist,OS_INT);
  1370. r.number:=NR_EAX;
  1371. emit_reg_reg(A_MOV,S_L,r,location.register);
  1372. r.number:=NR_EDX;
  1373. if popedx then
  1374. emit_reg(A_POP,S_L,r);
  1375. r.number:=NR_EAX;
  1376. if popeax then
  1377. emit_reg(A_POP,S_L,r);
  1378. location_freetemp(exprasmlist,left.location);
  1379. location_freetemp(exprasmlist,right.location);
  1380. end;
  1381. {$endif}
  1382. {*****************************************************************************
  1383. pass_2
  1384. *****************************************************************************}
  1385. procedure ti386addnode.pass_2;
  1386. { is also being used for xor, and "mul", "sub, or and comparative }
  1387. { operators }
  1388. var
  1389. pushedfpu,
  1390. mboverflow,cmpop : boolean;
  1391. op : tasmop;
  1392. opsize : topsize;
  1393. { true, if unsigned types are compared }
  1394. unsigned : boolean;
  1395. { is_in_dest if the result is put directly into }
  1396. { the resulting refernce or varregister }
  1397. {is_in_dest : boolean;}
  1398. { true, if for sets subtractions the extra not should generated }
  1399. extra_not : boolean;
  1400. begin
  1401. { to make it more readable, string and set (not smallset!) have their
  1402. own procedures }
  1403. case left.resulttype.def.deftype of
  1404. orddef :
  1405. begin
  1406. { handling boolean expressions }
  1407. if is_boolean(left.resulttype.def) and
  1408. is_boolean(right.resulttype.def) then
  1409. begin
  1410. second_addboolean;
  1411. exit;
  1412. end
  1413. { 64bit operations }
  1414. else if is_64bitint(left.resulttype.def) then
  1415. begin
  1416. second_add64bit;
  1417. exit;
  1418. end;
  1419. end;
  1420. stringdef :
  1421. begin
  1422. second_addstring;
  1423. exit;
  1424. end;
  1425. setdef :
  1426. begin
  1427. { normalsets are already handled in pass1 }
  1428. if (tsetdef(left.resulttype.def).settype<>smallset) then
  1429. internalerror(200109041);
  1430. second_addsmallset;
  1431. exit;
  1432. end;
  1433. arraydef :
  1434. begin
  1435. {$ifdef SUPPORT_MMX}
  1436. if is_mmx_able_array(left.resulttype.def) then
  1437. begin
  1438. second_addmmx;
  1439. exit;
  1440. end;
  1441. {$endif SUPPORT_MMX}
  1442. end;
  1443. floatdef :
  1444. begin
  1445. second_addfloat;
  1446. exit;
  1447. end;
  1448. end;
  1449. { defaults }
  1450. {is_in_dest:=false;}
  1451. extra_not:=false;
  1452. mboverflow:=false;
  1453. cmpop:=false;
  1454. unsigned:=not(is_signed(left.resulttype.def)) or
  1455. not(is_signed(right.resulttype.def));
  1456. opsize:=def_opsize(left.resulttype.def);
  1457. pass_left_and_right(pushedfpu);
  1458. if (left.resulttype.def.deftype=pointerdef) or
  1459. (right.resulttype.def.deftype=pointerdef) or
  1460. (is_class_or_interface(right.resulttype.def) and is_class_or_interface(left.resulttype.def)) or
  1461. (left.resulttype.def.deftype=classrefdef) or
  1462. (left.resulttype.def.deftype=procvardef) or
  1463. ((left.resulttype.def.deftype=enumdef) and
  1464. (left.resulttype.def.size=4)) or
  1465. ((left.resulttype.def.deftype=orddef) and
  1466. (torddef(left.resulttype.def).typ in [s32bit,u32bit])) or
  1467. ((right.resulttype.def.deftype=orddef) and
  1468. (torddef(right.resulttype.def).typ in [s32bit,u32bit])) then
  1469. begin
  1470. case nodetype of
  1471. addn :
  1472. begin
  1473. op:=A_ADD;
  1474. mboverflow:=true;
  1475. end;
  1476. muln :
  1477. begin
  1478. if unsigned then
  1479. op:=A_MUL
  1480. else
  1481. op:=A_IMUL;
  1482. mboverflow:=true;
  1483. end;
  1484. subn :
  1485. begin
  1486. op:=A_SUB;
  1487. mboverflow:=true;
  1488. end;
  1489. ltn,lten,
  1490. gtn,gten,
  1491. equaln,unequaln :
  1492. begin
  1493. op:=A_CMP;
  1494. cmpop:=true;
  1495. end;
  1496. xorn :
  1497. op:=A_XOR;
  1498. orn :
  1499. op:=A_OR;
  1500. andn :
  1501. op:=A_AND;
  1502. else
  1503. CGMessage(type_e_mismatch);
  1504. end;
  1505. { filter MUL, which requires special handling }
  1506. if op=A_MUL then
  1507. begin
  1508. second_mul;
  1509. exit;
  1510. end;
  1511. { Convert flags to register first }
  1512. if (left.location.loc=LOC_FLAGS) then
  1513. location_force_reg(exprasmlist,left.location,opsize_2_cgsize[opsize],false);
  1514. if (right.location.loc=LOC_FLAGS) then
  1515. location_force_reg(exprasmlist,right.location,opsize_2_cgsize[opsize],false);
  1516. left_must_be_reg(opsize,false);
  1517. emit_generic_code(op,opsize,unsigned,extra_not,mboverflow);
  1518. location_freetemp(exprasmlist,right.location);
  1519. location_release(exprasmlist,right.location);
  1520. if cmpop and
  1521. (left.location.loc<>LOC_CREGISTER) then
  1522. begin
  1523. location_freetemp(exprasmlist,left.location);
  1524. location_release(exprasmlist,left.location);
  1525. end;
  1526. set_result_location(cmpop,unsigned);
  1527. end
  1528. { 8/16 bit enum,char,wchar types }
  1529. else
  1530. if ((left.resulttype.def.deftype=orddef) and
  1531. (torddef(left.resulttype.def).typ in [uchar,uwidechar])) or
  1532. ((left.resulttype.def.deftype=enumdef) and
  1533. ((left.resulttype.def.size=1) or
  1534. (left.resulttype.def.size=2))) then
  1535. begin
  1536. case nodetype of
  1537. ltn,lten,gtn,gten,
  1538. equaln,unequaln :
  1539. cmpop:=true;
  1540. else
  1541. CGMessage(type_e_mismatch);
  1542. end;
  1543. left_must_be_reg(opsize,false);
  1544. emit_op_right_left(A_CMP,opsize);
  1545. location_freetemp(exprasmlist,right.location);
  1546. location_release(exprasmlist,right.location);
  1547. if left.location.loc<>LOC_CREGISTER then
  1548. begin
  1549. location_freetemp(exprasmlist,left.location);
  1550. location_release(exprasmlist,left.location);
  1551. end;
  1552. set_result_location(true,true);
  1553. end
  1554. else
  1555. CGMessage(type_e_mismatch);
  1556. end;
  1557. begin
  1558. caddnode:=ti386addnode;
  1559. end.
  1560. {
  1561. $Log$
  1562. Revision 1.64 2003-04-23 09:51:16 daniel
  1563. * Removed usage of edi in a lot of places when new register allocator used
  1564. + Added newra versions of g_concatcopy and secondadd_float
  1565. Revision 1.63 2003/04/22 23:50:23 peter
  1566. * firstpass uses expectloc
  1567. * checks if there are differences between the expectloc and
  1568. location.loc from secondpass in EXTDEBUG
  1569. Revision 1.62 2003/04/22 10:09:35 daniel
  1570. + Implemented the actual register allocator
  1571. + Scratch registers unavailable when new register allocator used
  1572. + maybe_save/maybe_restore unavailable when new register allocator used
  1573. Revision 1.61 2003/04/17 10:02:48 daniel
  1574. * Tweaked register allocate/deallocate positition to less interferences
  1575. are generated.
  1576. Revision 1.60 2003/03/28 19:16:57 peter
  1577. * generic constructor working for i386
  1578. * remove fixed self register
  1579. * esi added as address register for i386
  1580. Revision 1.59 2003/03/13 19:52:23 jonas
  1581. * and more new register allocator fixes (in the i386 code generator this
  1582. time). At least now the ppc cross compiler can compile the linux
  1583. system unit again, but I haven't tested it.
  1584. Revision 1.58 2003/03/08 20:36:41 daniel
  1585. + Added newra version of Ti386shlshrnode
  1586. + Added interference graph construction code
  1587. Revision 1.57 2003/03/08 13:59:17 daniel
  1588. * Work to handle new register notation in ag386nsm
  1589. + Added newra version of Ti386moddivnode
  1590. Revision 1.56 2003/03/08 10:53:48 daniel
  1591. * Created newra version of secondmul in n386add.pas
  1592. Revision 1.55 2003/02/19 22:00:15 daniel
  1593. * Code generator converted to new register notation
  1594. - Horribily outdated todo.txt removed
  1595. Revision 1.54 2003/01/13 18:37:44 daniel
  1596. * Work on register conversion
  1597. Revision 1.53 2003/01/08 18:43:57 daniel
  1598. * Tregister changed into a record
  1599. Revision 1.52 2002/11/25 17:43:26 peter
  1600. * splitted defbase in defutil,symutil,defcmp
  1601. * merged isconvertable and is_equal into compare_defs(_ext)
  1602. * made operator search faster by walking the list only once
  1603. Revision 1.51 2002/11/15 01:58:56 peter
  1604. * merged changes from 1.0.7 up to 04-11
  1605. - -V option for generating bug report tracing
  1606. - more tracing for option parsing
  1607. - errors for cdecl and high()
  1608. - win32 import stabs
  1609. - win32 records<=8 are returned in eax:edx (turned off by default)
  1610. - heaptrc update
  1611. - more info for temp management in .s file with EXTDEBUG
  1612. Revision 1.50 2002/10/20 13:11:27 jonas
  1613. * re-enabled optimized version of comparisons with the empty string that
  1614. I accidentally disabled in revision 1.26
  1615. Revision 1.49 2002/08/23 16:14:49 peter
  1616. * tempgen cleanup
  1617. * tt_noreuse temp type added that will be used in genentrycode
  1618. Revision 1.48 2002/08/14 18:41:48 jonas
  1619. - remove valuelow/valuehigh fields from tlocation, because they depend
  1620. on the endianess of the host operating system -> difficult to get
  1621. right. Use lo/hi(location.valueqword) instead (remember to use
  1622. valueqword and not value!!)
  1623. Revision 1.47 2002/08/11 14:32:29 peter
  1624. * renamed current_library to objectlibrary
  1625. Revision 1.46 2002/08/11 13:24:16 peter
  1626. * saving of asmsymbols in ppu supported
  1627. * asmsymbollist global is removed and moved into a new class
  1628. tasmlibrarydata that will hold the info of a .a file which
  1629. corresponds with a single module. Added librarydata to tmodule
  1630. to keep the library info stored for the module. In the future the
  1631. objectfiles will also be stored to the tasmlibrarydata class
  1632. * all getlabel/newasmsymbol and friends are moved to the new class
  1633. Revision 1.45 2002/07/26 11:17:52 jonas
  1634. * the optimization of converting a multiplication with a power of two to
  1635. a shl is moved from n386add/secondpass to nadd/resulttypepass
  1636. Revision 1.44 2002/07/20 11:58:00 florian
  1637. * types.pas renamed to defbase.pas because D6 contains a types
  1638. unit so this would conflicts if D6 programms are compiled
  1639. + Willamette/SSE2 instructions to assembler added
  1640. Revision 1.43 2002/07/11 14:41:32 florian
  1641. * start of the new generic parameter handling
  1642. Revision 1.42 2002/07/07 09:52:33 florian
  1643. * powerpc target fixed, very simple units can be compiled
  1644. * some basic stuff for better callparanode handling, far from being finished
  1645. Revision 1.41 2002/07/01 18:46:31 peter
  1646. * internal linker
  1647. * reorganized aasm layer
  1648. Revision 1.40 2002/07/01 16:23:55 peter
  1649. * cg64 patch
  1650. * basics for currency
  1651. * asnode updates for class and interface (not finished)
  1652. Revision 1.39 2002/05/18 13:34:22 peter
  1653. * readded missing revisions
  1654. Revision 1.38 2002/05/16 19:46:51 carl
  1655. + defines.inc -> fpcdefs.inc to avoid conflicts if compiling by hand
  1656. + try to fix temp allocation (still in ifdef)
  1657. + generic constructor calls
  1658. + start of tassembler / tmodulebase class cleanup
  1659. Revision 1.36 2002/05/13 19:54:37 peter
  1660. * removed n386ld and n386util units
  1661. * maybe_save/maybe_restore added instead of the old maybe_push
  1662. Revision 1.35 2002/05/12 16:53:17 peter
  1663. * moved entry and exitcode to ncgutil and cgobj
  1664. * foreach gets extra argument for passing local data to the
  1665. iterator function
  1666. * -CR checks also class typecasts at runtime by changing them
  1667. into as
  1668. * fixed compiler to cycle with the -CR option
  1669. * fixed stabs with elf writer, finally the global variables can
  1670. be watched
  1671. * removed a lot of routines from cga unit and replaced them by
  1672. calls to cgobj
  1673. * u32bit-s32bit updates for and,or,xor nodes. When one element is
  1674. u32bit then the other is typecasted also to u32bit without giving
  1675. a rangecheck warning/error.
  1676. * fixed pascal calling method with reversing also the high tree in
  1677. the parast, detected by tcalcst3 test
  1678. Revision 1.34 2002/04/25 20:16:40 peter
  1679. * moved more routines from cga/n386util
  1680. Revision 1.33 2002/04/05 15:09:13 jonas
  1681. * fixed web bug 1915
  1682. Revision 1.32 2002/04/04 19:06:10 peter
  1683. * removed unused units
  1684. * use tlocation.size in cg.a_*loc*() routines
  1685. Revision 1.31 2002/04/02 17:11:35 peter
  1686. * tlocation,treference update
  1687. * LOC_CONSTANT added for better constant handling
  1688. * secondadd splitted in multiple routines
  1689. * location_force_reg added for loading a location to a register
  1690. of a specified size
  1691. * secondassignment parses now first the right and then the left node
  1692. (this is compatible with Kylix). This saves a lot of push/pop especially
  1693. with string operations
  1694. * adapted some routines to use the new cg methods
  1695. Revision 1.29 2002/03/04 19:10:13 peter
  1696. * removed compiler warnings
  1697. }