n386add.pas 64 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848
  1. {
  2. $Id$
  3. Copyright (c) 2000-2002 by Florian Klaempfl
  4. Code generation for add nodes on the i386
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit n386add;
  19. {$i fpcdefs.inc}
  20. interface
  21. uses
  22. node,nadd,cpubase,cginfo;
  23. type
  24. ti386addnode = class(taddnode)
  25. procedure pass_2;override;
  26. protected
  27. function first_addstring : tnode; override;
  28. private
  29. procedure pass_left_and_right(var pushedfpu:boolean);
  30. function getresflags(unsigned : boolean) : tresflags;
  31. procedure left_must_be_reg(opsize:TOpSize;noswap:boolean);
  32. procedure emit_op_right_left(op:TAsmOp;opsize:TOpSize);
  33. procedure emit_generic_code(op:TAsmOp;opsize:TOpSize;unsigned,extra_not,mboverflow:boolean);
  34. procedure set_result_location(cmpop,unsigned:boolean);
  35. procedure second_addstring;
  36. procedure second_addboolean;
  37. procedure second_addfloat;
  38. procedure second_addsmallset;
  39. procedure second_mul;
  40. {$ifdef SUPPORT_MMX}
  41. procedure second_addmmx;
  42. {$endif SUPPORT_MMX}
  43. procedure second_add64bit;
  44. end;
  45. implementation
  46. uses
  47. globtype,systems,
  48. cutils,verbose,globals,
  49. symconst,symdef,paramgr,
  50. aasmbase,aasmtai,aasmcpu,defutil,htypechk,
  51. cgbase,pass_2,regvars,
  52. ncon,nset,
  53. cga,ncgutil,tgobj,rgobj,cgobj,cg64f32,rgcpu;
  54. {*****************************************************************************
  55. Helpers
  56. *****************************************************************************}
  57. const
  58. opsize_2_cgsize : array[S_B..S_L] of tcgsize = (OS_8,OS_16,OS_32);
  59. procedure ti386addnode.pass_left_and_right(var pushedfpu:boolean);
  60. var
  61. pushedregs : tmaybesave;
  62. begin
  63. { calculate the operator which is more difficult }
  64. firstcomplex(self);
  65. { in case of constant put it to the left }
  66. if (left.nodetype=ordconstn) then
  67. swapleftright;
  68. secondpass(left);
  69. { are too few registers free? }
  70. {$ifndef newra}
  71. maybe_save(exprasmlist,right.registers32,left.location,pushedregs);
  72. {$endif newra}
  73. if location.loc=LOC_FPUREGISTER then
  74. pushedfpu:=maybe_pushfpu(exprasmlist,right.registersfpu,left.location)
  75. else
  76. pushedfpu:=false;
  77. secondpass(right);
  78. {$ifndef newra}
  79. maybe_restore(exprasmlist,left.location,pushedregs);
  80. {$endif}
  81. end;
  82. function ti386addnode.getresflags(unsigned : boolean) : tresflags;
  83. begin
  84. case nodetype of
  85. equaln : getresflags:=F_E;
  86. unequaln : getresflags:=F_NE;
  87. else
  88. if not(unsigned) then
  89. begin
  90. if nf_swaped in flags then
  91. case nodetype of
  92. ltn : getresflags:=F_G;
  93. lten : getresflags:=F_GE;
  94. gtn : getresflags:=F_L;
  95. gten : getresflags:=F_LE;
  96. end
  97. else
  98. case nodetype of
  99. ltn : getresflags:=F_L;
  100. lten : getresflags:=F_LE;
  101. gtn : getresflags:=F_G;
  102. gten : getresflags:=F_GE;
  103. end;
  104. end
  105. else
  106. begin
  107. if nf_swaped in flags then
  108. case nodetype of
  109. ltn : getresflags:=F_A;
  110. lten : getresflags:=F_AE;
  111. gtn : getresflags:=F_B;
  112. gten : getresflags:=F_BE;
  113. end
  114. else
  115. case nodetype of
  116. ltn : getresflags:=F_B;
  117. lten : getresflags:=F_BE;
  118. gtn : getresflags:=F_A;
  119. gten : getresflags:=F_AE;
  120. end;
  121. end;
  122. end;
  123. end;
  124. procedure ti386addnode.left_must_be_reg(opsize:TOpSize;noswap:boolean);
  125. begin
  126. { left location is not a register? }
  127. if (left.location.loc<>LOC_REGISTER) then
  128. begin
  129. { if right is register then we can swap the locations }
  130. if (not noswap) and
  131. (right.location.loc=LOC_REGISTER) then
  132. begin
  133. location_swap(left.location,right.location);
  134. toggleflag(nf_swaped);
  135. end
  136. else
  137. begin
  138. { maybe we can reuse a constant register when the
  139. operation is a comparison that doesn't change the
  140. value of the register }
  141. location_force_reg(exprasmlist,left.location,opsize_2_cgsize[opsize],(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  142. end;
  143. end;
  144. end;
  145. procedure ti386addnode.emit_op_right_left(op:TAsmOp;opsize:TOpsize);
  146. begin
  147. { left must be a register }
  148. case right.location.loc of
  149. LOC_REGISTER,
  150. LOC_CREGISTER :
  151. exprasmlist.concat(taicpu.op_reg_reg(op,opsize,right.location.register,left.location.register));
  152. LOC_REFERENCE,
  153. LOC_CREFERENCE :
  154. exprasmlist.concat(taicpu.op_ref_reg(op,opsize,right.location.reference,left.location.register));
  155. LOC_CONSTANT :
  156. exprasmlist.concat(taicpu.op_const_reg(op,opsize,right.location.value,left.location.register));
  157. else
  158. internalerror(200203232);
  159. end;
  160. end;
  161. procedure ti386addnode.set_result_location(cmpop,unsigned:boolean);
  162. begin
  163. if cmpop then
  164. begin
  165. location_reset(location,LOC_FLAGS,OS_NO);
  166. location.resflags:=getresflags(unsigned);
  167. end
  168. else
  169. location_copy(location,left.location);
  170. end;
  171. procedure ti386addnode.emit_generic_code(op:TAsmOp;opsize:TOpSize;unsigned,extra_not,mboverflow:boolean);
  172. var
  173. power : longint;
  174. hl4 : tasmlabel;
  175. r : Tregister;
  176. begin
  177. { at this point, left.location.loc should be LOC_REGISTER }
  178. if right.location.loc=LOC_REGISTER then
  179. begin
  180. { right.location is a LOC_REGISTER }
  181. { when swapped another result register }
  182. if (nodetype=subn) and (nf_swaped in flags) then
  183. begin
  184. if extra_not then
  185. emit_reg(A_NOT,S_L,left.location.register);
  186. emit_reg_reg(op,opsize,left.location.register,right.location.register);
  187. { newly swapped also set swapped flag }
  188. location_swap(left.location,right.location);
  189. toggleflag(nf_swaped);
  190. end
  191. else
  192. begin
  193. if extra_not then
  194. emit_reg(A_NOT,S_L,right.location.register);
  195. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  196. location_swap(left.location,right.location);
  197. emit_reg_reg(op,opsize,right.location.register,left.location.register);
  198. end;
  199. end
  200. else
  201. begin
  202. { right.location is not a LOC_REGISTER }
  203. if (nodetype=subn) and (nf_swaped in flags) then
  204. begin
  205. if extra_not then
  206. emit_reg(A_NOT,opsize,left.location.register);
  207. {$ifdef newra}
  208. r:=rg.getregisterint(exprasmlist,OS_INT);
  209. {$else}
  210. r.enum:=R_INTREGISTER;
  211. r.number:=NR_EDI;
  212. rg.getexplicitregisterint(exprasmlist,NR_EDI);
  213. {$endif}
  214. cg.a_load_loc_reg(exprasmlist,OS_INT,right.location,r);
  215. emit_reg_reg(op,opsize,left.location.register,r);
  216. emit_reg_reg(A_MOV,opsize,r,left.location.register);
  217. rg.ungetregisterint(exprasmlist,r);
  218. end
  219. else
  220. begin
  221. { Optimizations when right.location is a constant value }
  222. if (op=A_CMP) and
  223. (nodetype in [equaln,unequaln]) and
  224. (right.location.loc=LOC_CONSTANT) and
  225. (right.location.value=0) then
  226. begin
  227. emit_reg_reg(A_TEST,opsize,left.location.register,left.location.register);
  228. end
  229. else
  230. if (op=A_ADD) and
  231. (right.location.loc=LOC_CONSTANT) and
  232. (right.location.value=1) and
  233. not(cs_check_overflow in aktlocalswitches) then
  234. begin
  235. emit_reg(A_INC,opsize,left.location.register);
  236. end
  237. else
  238. if (op=A_SUB) and
  239. (right.location.loc=LOC_CONSTANT) and
  240. (right.location.value=1) and
  241. not(cs_check_overflow in aktlocalswitches) then
  242. begin
  243. emit_reg(A_DEC,opsize,left.location.register);
  244. end
  245. else
  246. if (op=A_IMUL) and
  247. (right.location.loc=LOC_CONSTANT) and
  248. (ispowerof2(right.location.value,power)) and
  249. not(cs_check_overflow in aktlocalswitches) then
  250. begin
  251. emit_const_reg(A_SHL,opsize,power,left.location.register);
  252. end
  253. else
  254. begin
  255. if extra_not then
  256. begin
  257. {$ifdef newra}
  258. r:=rg.getregisterint(exprasmlist,OS_INT);
  259. {$else}
  260. rg.getexplicitregisterint(exprasmlist,NR_EDI);
  261. r.enum:=R_INTREGISTER;
  262. r.number:=NR_EDI;
  263. {$endif}
  264. cg.a_load_loc_reg(exprasmlist,OS_INT,right.location,r);
  265. emit_reg(A_NOT,S_L,r);
  266. emit_reg_reg(A_AND,S_L,r,left.location.register);
  267. rg.ungetregisterint(exprasmlist,r);
  268. end
  269. else
  270. begin
  271. emit_op_right_left(op,opsize);
  272. end;
  273. end;
  274. end;
  275. end;
  276. { only in case of overflow operations }
  277. { produce overflow code }
  278. { we must put it here directly, because sign of operation }
  279. { is in unsigned VAR!! }
  280. if mboverflow then
  281. begin
  282. if cs_check_overflow in aktlocalswitches then
  283. begin
  284. objectlibrary.getlabel(hl4);
  285. if unsigned then
  286. emitjmp(C_NB,hl4)
  287. else
  288. emitjmp(C_NO,hl4);
  289. cg.a_call_name(exprasmlist,'FPC_OVERFLOW');
  290. cg.a_label(exprasmlist,hl4);
  291. end;
  292. end;
  293. end;
  294. {*****************************************************************************
  295. Addstring
  296. *****************************************************************************}
  297. { note: if you implemented an fpc_shortstr_concat similar to the }
  298. { one in i386.inc, you have to override first_addstring like in }
  299. { ti386addnode.first_string and implement the shortstring concat }
  300. { manually! The generic routine is different from the i386 one (JM) }
  301. function ti386addnode.first_addstring : tnode;
  302. begin
  303. { special cases for shortstrings, handled in pass_2 (JM) }
  304. { can't handle fpc_shortstr_compare with compilerproc either because it }
  305. { returns its results in the flags instead of in eax }
  306. if (nodetype in [ltn,lten,gtn,gten,equaln,unequaln]) and
  307. is_shortstring(left.resulttype.def) and
  308. not(((left.nodetype=stringconstn) and (str_length(left)=0)) or
  309. ((right.nodetype=stringconstn) and (str_length(right)=0))) then
  310. begin
  311. expectloc:=LOC_FLAGS;
  312. calcregisters(self,0,0,0);
  313. result := nil;
  314. exit;
  315. end;
  316. { otherwise, use the generic code }
  317. result := inherited first_addstring;
  318. end;
  319. procedure ti386addnode.second_addstring;
  320. var
  321. {$ifdef newra}
  322. r : Tregister;
  323. i : Tsuperregister;
  324. {$else}
  325. pushed : Tpushedsavedint;
  326. {$endif}
  327. regstopush : Tsupregset;
  328. begin
  329. { string operations are not commutative }
  330. if nf_swaped in flags then
  331. swapleftright;
  332. case tstringdef(left.resulttype.def).string_typ of
  333. st_shortstring:
  334. begin
  335. case nodetype of
  336. ltn,lten,gtn,gten,equaln,unequaln :
  337. begin
  338. {$ifndef newra}
  339. rg.saveusedintregisters(exprasmlist,pushed,VOLATILE_INTREGISTERS);
  340. {$endif newra}
  341. secondpass(left);
  342. location_release(exprasmlist,left.location);
  343. cg.a_paramaddr_ref(exprasmlist,left.location.reference,paramanager.getintparaloc(exprasmlist,2));
  344. secondpass(right);
  345. location_release(exprasmlist,right.location);
  346. cg.a_paramaddr_ref(exprasmlist,right.location.reference,paramanager.getintparaloc(exprasmlist,1));
  347. {$ifdef newra}
  348. rg.allocexplicitregistersint(exprasmlist,[first_supreg..last_supreg]-[RS_FRAME_POINTER_REG,RS_STACK_POINTER_REG]);
  349. {$else}
  350. rg.saveintregvars(exprasmlist,regstopush);
  351. {$endif}
  352. cg.a_call_name(exprasmlist,'FPC_SHORTSTR_COMPARE');
  353. paramanager.freeintparaloc(exprasmlist,2);
  354. paramanager.freeintparaloc(exprasmlist,1);
  355. {$ifdef newra}
  356. rg.deallocexplicitregistersint(exprasmlist,[first_supreg..last_supreg]-[RS_FRAME_POINTER_REG,RS_STACK_POINTER_REG]);
  357. {$else}
  358. rg.restoreusedintregisters(exprasmlist,pushed);
  359. {$endif}
  360. location_freetemp(exprasmlist,left.location);
  361. location_freetemp(exprasmlist,right.location);
  362. end;
  363. end;
  364. set_result_location(true,true);
  365. end;
  366. else
  367. { rest should be handled in first pass (JM) }
  368. internalerror(200108303);
  369. end;
  370. end;
  371. {*****************************************************************************
  372. AddBoolean
  373. *****************************************************************************}
  374. procedure ti386addnode.second_addboolean;
  375. var
  376. op : TAsmOp;
  377. opsize : TOpsize;
  378. cmpop,
  379. isjump : boolean;
  380. otl,ofl : tasmlabel;
  381. pushedregs : tmaybesave;
  382. begin
  383. { calculate the operator which is more difficult }
  384. firstcomplex(self);
  385. cmpop:=false;
  386. if (torddef(left.resulttype.def).typ=bool8bit) or
  387. (torddef(right.resulttype.def).typ=bool8bit) then
  388. opsize:=S_B
  389. else
  390. if (torddef(left.resulttype.def).typ=bool16bit) or
  391. (torddef(right.resulttype.def).typ=bool16bit) then
  392. opsize:=S_W
  393. else
  394. opsize:=S_L;
  395. if (cs_full_boolean_eval in aktlocalswitches) or
  396. (nodetype in [unequaln,ltn,lten,gtn,gten,equaln,xorn]) then
  397. begin
  398. if left.nodetype in [ordconstn,realconstn] then
  399. swapleftright;
  400. isjump:=(left.location.loc=LOC_JUMP);
  401. if isjump then
  402. begin
  403. otl:=truelabel;
  404. objectlibrary.getlabel(truelabel);
  405. ofl:=falselabel;
  406. objectlibrary.getlabel(falselabel);
  407. end;
  408. secondpass(left);
  409. if left.location.loc in [LOC_FLAGS,LOC_JUMP] then
  410. location_force_reg(exprasmlist,left.location,opsize_2_cgsize[opsize],false);
  411. if isjump then
  412. begin
  413. truelabel:=otl;
  414. falselabel:=ofl;
  415. end;
  416. {$ifndef newra}
  417. maybe_save(exprasmlist,right.registers32,left.location,pushedregs);
  418. {$endif}
  419. isjump:=(right.location.loc=LOC_JUMP);
  420. if isjump then
  421. begin
  422. otl:=truelabel;
  423. objectlibrary.getlabel(truelabel);
  424. ofl:=falselabel;
  425. objectlibrary.getlabel(falselabel);
  426. end;
  427. secondpass(right);
  428. {$ifndef newra}
  429. maybe_restore(exprasmlist,left.location,pushedregs);
  430. {$endif newra}
  431. if right.location.loc in [LOC_FLAGS,LOC_JUMP] then
  432. location_force_reg(exprasmlist,right.location,opsize_2_cgsize[opsize],false);
  433. if isjump then
  434. begin
  435. truelabel:=otl;
  436. falselabel:=ofl;
  437. end;
  438. { left must be a register }
  439. left_must_be_reg(opsize,false);
  440. { compare the }
  441. case nodetype of
  442. ltn,lten,gtn,gten,
  443. equaln,unequaln :
  444. begin
  445. op:=A_CMP;
  446. cmpop:=true;
  447. end;
  448. xorn :
  449. op:=A_XOR;
  450. orn :
  451. op:=A_OR;
  452. andn :
  453. op:=A_AND;
  454. else
  455. internalerror(200203247);
  456. end;
  457. emit_op_right_left(op,opsize);
  458. location_freetemp(exprasmlist,right.location);
  459. location_release(exprasmlist,right.location);
  460. if cmpop then
  461. begin
  462. location_freetemp(exprasmlist,left.location);
  463. location_release(exprasmlist,left.location);
  464. end;
  465. set_result_location(cmpop,true);
  466. end
  467. else
  468. begin
  469. case nodetype of
  470. andn,
  471. orn :
  472. begin
  473. location_reset(location,LOC_JUMP,OS_NO);
  474. case nodetype of
  475. andn :
  476. begin
  477. otl:=truelabel;
  478. objectlibrary.getlabel(truelabel);
  479. secondpass(left);
  480. maketojumpbool(exprasmlist,left,lr_load_regvars);
  481. cg.a_label(exprasmlist,truelabel);
  482. truelabel:=otl;
  483. end;
  484. orn :
  485. begin
  486. ofl:=falselabel;
  487. objectlibrary.getlabel(falselabel);
  488. secondpass(left);
  489. maketojumpbool(exprasmlist,left,lr_load_regvars);
  490. cg.a_label(exprasmlist,falselabel);
  491. falselabel:=ofl;
  492. end;
  493. else
  494. internalerror(2003042212);
  495. end;
  496. secondpass(right);
  497. maketojumpbool(exprasmlist,right,lr_load_regvars);
  498. end;
  499. else
  500. internalerror(2003042213);
  501. end;
  502. end;
  503. end;
  504. {*****************************************************************************
  505. AddFloat
  506. *****************************************************************************}
  507. procedure ti386addnode.second_addfloat;
  508. var
  509. op : TAsmOp;
  510. resflags : tresflags;
  511. pushedfpu,
  512. cmpop : boolean;
  513. r,r2:Tregister;
  514. begin
  515. pass_left_and_right(pushedfpu);
  516. cmpop:=false;
  517. case nodetype of
  518. addn :
  519. op:=A_FADDP;
  520. muln :
  521. op:=A_FMULP;
  522. subn :
  523. op:=A_FSUBP;
  524. slashn :
  525. op:=A_FDIVP;
  526. ltn,lten,gtn,gten,
  527. equaln,unequaln :
  528. begin
  529. op:=A_FCOMPP;
  530. cmpop:=true;
  531. end;
  532. else
  533. internalerror(2003042214);
  534. end;
  535. if (right.location.loc<>LOC_FPUREGISTER) then
  536. begin
  537. r.enum:=R_ST;
  538. cg.a_loadfpu_loc_reg(exprasmlist,right.location,r);
  539. if (right.location.loc <> LOC_CFPUREGISTER) and
  540. pushedfpu then
  541. location_freetemp(exprasmlist,left.location);
  542. if (left.location.loc<>LOC_FPUREGISTER) then
  543. begin
  544. cg.a_loadfpu_loc_reg(exprasmlist,left.location,r);
  545. if (left.location.loc <> LOC_CFPUREGISTER) and
  546. pushedfpu then
  547. location_freetemp(exprasmlist,left.location);
  548. end
  549. else
  550. begin
  551. { left was on the stack => swap }
  552. toggleflag(nf_swaped);
  553. end;
  554. { releases the right reference }
  555. location_release(exprasmlist,right.location);
  556. end
  557. { the nominator in st0 }
  558. else if (left.location.loc<>LOC_FPUREGISTER) then
  559. begin
  560. r.enum:=R_ST;
  561. cg.a_loadfpu_loc_reg(exprasmlist,left.location,r);
  562. if (left.location.loc <> LOC_CFPUREGISTER) and
  563. pushedfpu then
  564. location_freetemp(exprasmlist,left.location);
  565. end
  566. else
  567. begin
  568. { fpu operands are always in the wrong order on the stack }
  569. toggleflag(nf_swaped);
  570. end;
  571. { releases the left reference }
  572. if (left.location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then
  573. location_release(exprasmlist,left.location);
  574. { if we swaped the tree nodes, then use the reverse operator }
  575. if nf_swaped in flags then
  576. begin
  577. if (nodetype=slashn) then
  578. op:=A_FDIVRP
  579. else if (nodetype=subn) then
  580. op:=A_FSUBRP;
  581. end;
  582. { to avoid the pentium bug
  583. if (op=FDIVP) and (opt_processors=pentium) then
  584. cg.a_call_name(exprasmlist,'EMUL_FDIVP')
  585. else
  586. }
  587. { the Intel assemblers want operands }
  588. if op<>A_FCOMPP then
  589. begin
  590. r.enum:=R_ST;
  591. r2.enum:=R_ST1;
  592. emit_reg_reg(op,S_NO,r,r2);
  593. dec(trgcpu(rg).fpuvaroffset);
  594. end
  595. else
  596. begin
  597. emit_none(op,S_NO);
  598. dec(trgcpu(rg).fpuvaroffset,2);
  599. end;
  600. { on comparison load flags }
  601. if cmpop then
  602. begin
  603. {$ifdef newra}
  604. r:=rg.getexplicitregisterint(exprasmlist,NR_AX);
  605. {$else}
  606. if not(RS_EAX in rg.unusedregsint) then
  607. begin
  608. rg.getexplicitregisterint(exprasmlist,NR_EDI);
  609. r.enum:=R_INTREGISTER;
  610. r.number:=NR_EAX;
  611. r2.enum:=R_INTREGISTER;;
  612. r2.number:=NR_EDI;
  613. emit_reg_reg(A_MOV,S_L,r,r2);
  614. end;
  615. r.enum:=R_INTREGISTER;
  616. r.number:=NR_AX;
  617. {$endif}
  618. emit_reg(A_FNSTSW,S_NO,r);
  619. emit_none(A_SAHF,S_NO);
  620. {$ifdef newra}
  621. rg.ungetregisterint(exprasmlist,r);
  622. {$else}
  623. if not(RS_EAX in rg.unusedregsint) then
  624. begin
  625. r.enum:=R_INTREGISTER;
  626. r.number:=NR_EAX;
  627. r2.enum:=R_INTREGISTER;;
  628. r2.number:=NR_EDI;
  629. emit_reg_reg(A_MOV,S_L,r2,r);
  630. rg.ungetregisterint(exprasmlist,r2);
  631. end;
  632. {$endif}
  633. if nf_swaped in flags then
  634. begin
  635. case nodetype of
  636. equaln : resflags:=F_E;
  637. unequaln : resflags:=F_NE;
  638. ltn : resflags:=F_A;
  639. lten : resflags:=F_AE;
  640. gtn : resflags:=F_B;
  641. gten : resflags:=F_BE;
  642. end;
  643. end
  644. else
  645. begin
  646. case nodetype of
  647. equaln : resflags:=F_E;
  648. unequaln : resflags:=F_NE;
  649. ltn : resflags:=F_B;
  650. lten : resflags:=F_BE;
  651. gtn : resflags:=F_A;
  652. gten : resflags:=F_AE;
  653. end;
  654. end;
  655. location_reset(location,LOC_FLAGS,OS_NO);
  656. location.resflags:=resflags;
  657. end
  658. else
  659. begin
  660. location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
  661. location.register.enum:=R_ST;
  662. end;
  663. end;
  664. {*****************************************************************************
  665. AddSmallSet
  666. *****************************************************************************}
  667. procedure ti386addnode.second_addsmallset;
  668. var
  669. opsize : TOpSize;
  670. op : TAsmOp;
  671. cmpop,
  672. pushedfpu,
  673. extra_not,
  674. noswap : boolean;
  675. begin
  676. pass_left_and_right(pushedfpu);
  677. { when a setdef is passed, it has to be a smallset }
  678. if ((left.resulttype.def.deftype=setdef) and
  679. (tsetdef(left.resulttype.def).settype<>smallset)) or
  680. ((right.resulttype.def.deftype=setdef) and
  681. (tsetdef(right.resulttype.def).settype<>smallset)) then
  682. internalerror(200203301);
  683. cmpop:=false;
  684. noswap:=false;
  685. extra_not:=false;
  686. opsize:=S_L;
  687. case nodetype of
  688. addn :
  689. begin
  690. { this is a really ugly hack!!!!!!!!!! }
  691. { this could be done later using EDI }
  692. { as it is done for subn }
  693. { instead of two registers!!!! }
  694. { adding elements is not commutative }
  695. if (nf_swaped in flags) and (left.nodetype=setelementn) then
  696. swapleftright;
  697. { are we adding set elements ? }
  698. if right.nodetype=setelementn then
  699. begin
  700. { no range support for smallsets! }
  701. if assigned(tsetelementnode(right).right) then
  702. internalerror(43244);
  703. { bts requires both elements to be registers }
  704. location_force_reg(exprasmlist,left.location,opsize_2_cgsize[opsize],false);
  705. location_force_reg(exprasmlist,right.location,opsize_2_cgsize[opsize],true);
  706. op:=A_BTS;
  707. noswap:=true;
  708. end
  709. else
  710. op:=A_OR;
  711. end;
  712. symdifn :
  713. op:=A_XOR;
  714. muln :
  715. op:=A_AND;
  716. subn :
  717. begin
  718. op:=A_AND;
  719. if (not(nf_swaped in flags)) and
  720. (right.location.loc=LOC_CONSTANT) then
  721. right.location.value := not(right.location.value)
  722. else if (nf_swaped in flags) and
  723. (left.location.loc=LOC_CONSTANT) then
  724. left.location.value := not(left.location.value)
  725. else
  726. extra_not:=true;
  727. end;
  728. equaln,
  729. unequaln :
  730. begin
  731. op:=A_CMP;
  732. cmpop:=true;
  733. end;
  734. lten,gten:
  735. begin
  736. If (not(nf_swaped in flags) and
  737. (nodetype = lten)) or
  738. ((nf_swaped in flags) and
  739. (nodetype = gten)) then
  740. swapleftright;
  741. location_force_reg(exprasmlist,left.location,opsize_2_cgsize[opsize],true);
  742. emit_op_right_left(A_AND,opsize);
  743. op:=A_CMP;
  744. cmpop:=true;
  745. { warning: ugly hack, we need a JE so change the node to equaln }
  746. nodetype:=equaln;
  747. end;
  748. xorn :
  749. op:=A_XOR;
  750. orn :
  751. op:=A_OR;
  752. andn :
  753. op:=A_AND;
  754. else
  755. internalerror(2003042215);
  756. end;
  757. { left must be a register }
  758. left_must_be_reg(opsize,noswap);
  759. emit_generic_code(op,opsize,true,extra_not,false);
  760. location_freetemp(exprasmlist,right.location);
  761. location_release(exprasmlist,right.location);
  762. if cmpop then
  763. begin
  764. location_freetemp(exprasmlist,left.location);
  765. location_release(exprasmlist,left.location);
  766. end;
  767. set_result_location(cmpop,true);
  768. end;
  769. {*****************************************************************************
  770. Add64bit
  771. *****************************************************************************}
  772. procedure ti386addnode.second_add64bit;
  773. var
  774. op : TOpCG;
  775. op1,op2 : TAsmOp;
  776. opsize : TOpSize;
  777. hregister,
  778. hregister2 : tregister;
  779. href : treference;
  780. hl4 : tasmlabel;
  781. pushedfpu,
  782. mboverflow,
  783. cmpop,
  784. unsigned,delete:boolean;
  785. r:Tregister;
  786. procedure firstjmp64bitcmp;
  787. var
  788. oldnodetype : tnodetype;
  789. begin
  790. load_all_regvars(exprasmlist);
  791. { the jump the sequence is a little bit hairy }
  792. case nodetype of
  793. ltn,gtn:
  794. begin
  795. emitjmp(flags_to_cond(getresflags(unsigned)),truelabel);
  796. { cheat a little bit for the negative test }
  797. toggleflag(nf_swaped);
  798. emitjmp(flags_to_cond(getresflags(unsigned)),falselabel);
  799. toggleflag(nf_swaped);
  800. end;
  801. lten,gten:
  802. begin
  803. oldnodetype:=nodetype;
  804. if nodetype=lten then
  805. nodetype:=ltn
  806. else
  807. nodetype:=gtn;
  808. emitjmp(flags_to_cond(getresflags(unsigned)),truelabel);
  809. { cheat for the negative test }
  810. if nodetype=ltn then
  811. nodetype:=gtn
  812. else
  813. nodetype:=ltn;
  814. emitjmp(flags_to_cond(getresflags(unsigned)),falselabel);
  815. nodetype:=oldnodetype;
  816. end;
  817. equaln:
  818. emitjmp(C_NE,falselabel);
  819. unequaln:
  820. emitjmp(C_NE,truelabel);
  821. end;
  822. end;
  823. procedure secondjmp64bitcmp;
  824. begin
  825. { the jump the sequence is a little bit hairy }
  826. case nodetype of
  827. ltn,gtn,lten,gten:
  828. begin
  829. { the comparisaion of the low dword have to be }
  830. { always unsigned! }
  831. emitjmp(flags_to_cond(getresflags(true)),truelabel);
  832. cg.a_jmp_always(exprasmlist,falselabel);
  833. end;
  834. equaln:
  835. begin
  836. emitjmp(C_NE,falselabel);
  837. cg.a_jmp_always(exprasmlist,truelabel);
  838. end;
  839. unequaln:
  840. begin
  841. emitjmp(C_NE,truelabel);
  842. cg.a_jmp_always(exprasmlist,falselabel);
  843. end;
  844. end;
  845. end;
  846. begin
  847. firstcomplex(self);
  848. pass_left_and_right(pushedfpu);
  849. op1:=A_NONE;
  850. op2:=A_NONE;
  851. mboverflow:=false;
  852. cmpop:=false;
  853. opsize:=S_L;
  854. unsigned:=((left.resulttype.def.deftype=orddef) and
  855. (torddef(left.resulttype.def).typ=u64bit)) or
  856. ((right.resulttype.def.deftype=orddef) and
  857. (torddef(right.resulttype.def).typ=u64bit));
  858. case nodetype of
  859. addn :
  860. begin
  861. op:=OP_ADD;
  862. mboverflow:=true;
  863. end;
  864. subn :
  865. begin
  866. op:=OP_SUB;
  867. op1:=A_SUB;
  868. op2:=A_SBB;
  869. mboverflow:=true;
  870. end;
  871. ltn,lten,
  872. gtn,gten,
  873. equaln,unequaln:
  874. begin
  875. op:=OP_NONE;
  876. cmpop:=true;
  877. end;
  878. xorn:
  879. op:=OP_XOR;
  880. orn:
  881. op:=OP_OR;
  882. andn:
  883. op:=OP_AND;
  884. else
  885. begin
  886. { everything should be handled in pass_1 (JM) }
  887. internalerror(200109051);
  888. end;
  889. end;
  890. { left and right no register? }
  891. { then one must be demanded }
  892. if (left.location.loc<>LOC_REGISTER) then
  893. begin
  894. if (right.location.loc<>LOC_REGISTER) then
  895. begin
  896. { we can reuse a CREGISTER for comparison }
  897. if not((left.location.loc=LOC_CREGISTER) and cmpop) then
  898. begin
  899. {$ifdef newra}
  900. delete:=left.location.loc<>LOC_CREGISTER;
  901. {$else}
  902. if (left.location.loc<>LOC_CREGISTER) then
  903. begin
  904. location_freetemp(exprasmlist,left.location);
  905. location_release(exprasmlist,left.location);
  906. end;
  907. {$endif}
  908. hregister:=rg.getregisterint(exprasmlist,OS_INT);
  909. hregister2:=rg.getregisterint(exprasmlist,OS_INT);
  910. {$ifdef newra}
  911. cg64.a_load64_loc_reg(exprasmlist,left.location,joinreg64(hregister,hregister2),delete);
  912. {$else}
  913. cg64.a_load64_loc_reg(exprasmlist,left.location,joinreg64(hregister,hregister2));
  914. {$endif}
  915. location_reset(left.location,LOC_REGISTER,OS_64);
  916. left.location.registerlow:=hregister;
  917. left.location.registerhigh:=hregister2;
  918. end;
  919. end
  920. else
  921. begin
  922. location_swap(left.location,right.location);
  923. toggleflag(nf_swaped);
  924. end;
  925. end;
  926. { at this point, left.location.loc should be LOC_REGISTER }
  927. if right.location.loc=LOC_REGISTER then
  928. begin
  929. { when swapped another result register }
  930. if (nodetype=subn) and (nf_swaped in flags) then
  931. begin
  932. cg64.a_op64_reg_reg(exprasmlist,op,
  933. left.location.register64,
  934. right.location.register64);
  935. location_swap(left.location,right.location);
  936. toggleflag(nf_swaped);
  937. end
  938. else if cmpop then
  939. begin
  940. emit_reg_reg(A_CMP,S_L,right.location.registerhigh,left.location.registerhigh);
  941. firstjmp64bitcmp;
  942. emit_reg_reg(A_CMP,S_L,right.location.registerlow,left.location.registerlow);
  943. secondjmp64bitcmp;
  944. end
  945. else
  946. begin
  947. cg64.a_op64_reg_reg(exprasmlist,op,
  948. right.location.register64,
  949. left.location.register64);
  950. end;
  951. location_release(exprasmlist,right.location);
  952. end
  953. else
  954. begin
  955. { right.location<>LOC_REGISTER }
  956. if (nodetype=subn) and (nf_swaped in flags) then
  957. begin
  958. {$ifdef newra}
  959. r:=rg.getregisterint(exprasmlist,OS_INT);
  960. {$else}
  961. rg.getexplicitregisterint(exprasmlist,NR_EDI);
  962. r.enum:=R_INTREGISTER;
  963. r.number:=NR_EDI;
  964. {$endif}
  965. cg64.a_load64low_loc_reg(exprasmlist,right.location,r);
  966. emit_reg_reg(op1,opsize,left.location.registerlow,r);
  967. emit_reg_reg(A_MOV,opsize,r,left.location.registerlow);
  968. cg64.a_load64high_loc_reg(exprasmlist,right.location,r);
  969. { the carry flag is still ok }
  970. emit_reg_reg(op2,opsize,left.location.registerhigh,r);
  971. emit_reg_reg(A_MOV,opsize,r,left.location.registerhigh);
  972. rg.ungetregisterint(exprasmlist,r);
  973. if right.location.loc<>LOC_CREGISTER then
  974. begin
  975. location_freetemp(exprasmlist,right.location);
  976. location_release(exprasmlist,right.location);
  977. end;
  978. end
  979. else if cmpop then
  980. begin
  981. case right.location.loc of
  982. LOC_CREGISTER :
  983. begin
  984. emit_reg_reg(A_CMP,S_L,right.location.registerhigh,left.location.registerhigh);
  985. firstjmp64bitcmp;
  986. emit_reg_reg(A_CMP,S_L,right.location.registerlow,left.location.registerlow);
  987. secondjmp64bitcmp;
  988. end;
  989. LOC_CREFERENCE,
  990. LOC_REFERENCE :
  991. begin
  992. href:=right.location.reference;
  993. inc(href.offset,4);
  994. emit_ref_reg(A_CMP,S_L,href,left.location.registerhigh);
  995. firstjmp64bitcmp;
  996. emit_ref_reg(A_CMP,S_L,right.location.reference,left.location.registerlow);
  997. secondjmp64bitcmp;
  998. cg.a_jmp_always(exprasmlist,falselabel);
  999. location_freetemp(exprasmlist,right.location);
  1000. location_release(exprasmlist,right.location);
  1001. end;
  1002. LOC_CONSTANT :
  1003. begin
  1004. exprasmlist.concat(taicpu.op_const_reg(A_CMP,S_L,hi(right.location.valueqword),left.location.registerhigh));
  1005. firstjmp64bitcmp;
  1006. exprasmlist.concat(taicpu.op_const_reg(A_CMP,S_L,lo(right.location.valueqword),left.location.registerlow));
  1007. secondjmp64bitcmp;
  1008. end;
  1009. else
  1010. internalerror(200203282);
  1011. end;
  1012. end
  1013. else
  1014. begin
  1015. cg64.a_op64_loc_reg(exprasmlist,op,right.location,
  1016. left.location.register64);
  1017. if (right.location.loc<>LOC_CREGISTER) then
  1018. begin
  1019. location_freetemp(exprasmlist,right.location);
  1020. location_release(exprasmlist,right.location);
  1021. end;
  1022. end;
  1023. end;
  1024. if (left.location.loc<>LOC_CREGISTER) and cmpop then
  1025. begin
  1026. location_freetemp(exprasmlist,left.location);
  1027. location_release(exprasmlist,left.location);
  1028. end;
  1029. { only in case of overflow operations }
  1030. { produce overflow code }
  1031. { we must put it here directly, because sign of operation }
  1032. { is in unsigned VAR!! }
  1033. if mboverflow then
  1034. begin
  1035. if cs_check_overflow in aktlocalswitches then
  1036. begin
  1037. objectlibrary.getlabel(hl4);
  1038. if unsigned then
  1039. emitjmp(C_NB,hl4)
  1040. else
  1041. emitjmp(C_NO,hl4);
  1042. cg.a_call_name(exprasmlist,'FPC_OVERFLOW');
  1043. cg.a_label(exprasmlist,hl4);
  1044. end;
  1045. end;
  1046. { we have LOC_JUMP as result }
  1047. if cmpop then
  1048. location_reset(location,LOC_JUMP,OS_NO)
  1049. else
  1050. location_copy(location,left.location);
  1051. end;
  1052. {*****************************************************************************
  1053. AddMMX
  1054. *****************************************************************************}
  1055. {$ifdef SUPPORT_MMX}
  1056. procedure ti386addnode.second_addmmx;
  1057. var
  1058. op : TAsmOp;
  1059. pushedfpu,
  1060. cmpop : boolean;
  1061. mmxbase : tmmxtype;
  1062. r,hregister : tregister;
  1063. begin
  1064. pass_left_and_right(pushedfpu);
  1065. cmpop:=false;
  1066. mmxbase:=mmx_type(left.resulttype.def);
  1067. case nodetype of
  1068. addn :
  1069. begin
  1070. if (cs_mmx_saturation in aktlocalswitches) then
  1071. begin
  1072. case mmxbase of
  1073. mmxs8bit:
  1074. op:=A_PADDSB;
  1075. mmxu8bit:
  1076. op:=A_PADDUSB;
  1077. mmxs16bit,mmxfixed16:
  1078. op:=A_PADDSB;
  1079. mmxu16bit:
  1080. op:=A_PADDUSW;
  1081. end;
  1082. end
  1083. else
  1084. begin
  1085. case mmxbase of
  1086. mmxs8bit,mmxu8bit:
  1087. op:=A_PADDB;
  1088. mmxs16bit,mmxu16bit,mmxfixed16:
  1089. op:=A_PADDW;
  1090. mmxs32bit,mmxu32bit:
  1091. op:=A_PADDD;
  1092. end;
  1093. end;
  1094. end;
  1095. muln :
  1096. begin
  1097. case mmxbase of
  1098. mmxs16bit,mmxu16bit:
  1099. op:=A_PMULLW;
  1100. mmxfixed16:
  1101. op:=A_PMULHW;
  1102. end;
  1103. end;
  1104. subn :
  1105. begin
  1106. if (cs_mmx_saturation in aktlocalswitches) then
  1107. begin
  1108. case mmxbase of
  1109. mmxs8bit:
  1110. op:=A_PSUBSB;
  1111. mmxu8bit:
  1112. op:=A_PSUBUSB;
  1113. mmxs16bit,mmxfixed16:
  1114. op:=A_PSUBSB;
  1115. mmxu16bit:
  1116. op:=A_PSUBUSW;
  1117. end;
  1118. end
  1119. else
  1120. begin
  1121. case mmxbase of
  1122. mmxs8bit,mmxu8bit:
  1123. op:=A_PSUBB;
  1124. mmxs16bit,mmxu16bit,mmxfixed16:
  1125. op:=A_PSUBW;
  1126. mmxs32bit,mmxu32bit:
  1127. op:=A_PSUBD;
  1128. end;
  1129. end;
  1130. end;
  1131. xorn:
  1132. op:=A_PXOR;
  1133. orn:
  1134. op:=A_POR;
  1135. andn:
  1136. op:=A_PAND;
  1137. else
  1138. internalerror(2003042214);
  1139. end;
  1140. { left and right no register? }
  1141. { then one must be demanded }
  1142. if (left.location.loc<>LOC_MMXREGISTER) then
  1143. begin
  1144. if (right.location.loc=LOC_MMXREGISTER) then
  1145. begin
  1146. location_swap(left.location,right.location);
  1147. toggleflag(nf_swaped);
  1148. end
  1149. else
  1150. begin
  1151. { register variable ? }
  1152. if (left.location.loc=LOC_CMMXREGISTER) then
  1153. begin
  1154. hregister:=rg.getregistermm(exprasmlist);
  1155. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  1156. end
  1157. else
  1158. begin
  1159. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  1160. internalerror(200203245);
  1161. location_release(exprasmlist,left.location);
  1162. hregister:=rg.getregistermm(exprasmlist);
  1163. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  1164. end;
  1165. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  1166. left.location.register:=hregister;
  1167. end;
  1168. end;
  1169. { at this point, left.location.loc should be LOC_MMXREGISTER }
  1170. if right.location.loc<>LOC_MMXREGISTER then
  1171. begin
  1172. if (nodetype=subn) and (nf_swaped in flags) then
  1173. begin
  1174. r.enum:=R_MM7;
  1175. if right.location.loc=LOC_CMMXREGISTER then
  1176. begin
  1177. emit_reg_reg(A_MOVQ,S_NO,right.location.register,r);
  1178. emit_reg_reg(op,S_NO,left.location.register,r);
  1179. emit_reg_reg(A_MOVQ,S_NO,r,left.location.register);
  1180. end
  1181. else
  1182. begin
  1183. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  1184. internalerror(200203247);
  1185. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,r);
  1186. emit_reg_reg(op,S_NO,left.location.register,r);
  1187. emit_reg_reg(A_MOVQ,S_NO,r,left.location.register);
  1188. location_release(exprasmlist,right.location);
  1189. end;
  1190. end
  1191. else
  1192. begin
  1193. if (right.location.loc=LOC_CMMXREGISTER) then
  1194. begin
  1195. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  1196. end
  1197. else
  1198. begin
  1199. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  1200. internalerror(200203246);
  1201. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  1202. location_release(exprasmlist,right.location);
  1203. end;
  1204. end;
  1205. end
  1206. else
  1207. begin
  1208. { right.location=LOC_MMXREGISTER }
  1209. if (nodetype=subn) and (nf_swaped in flags) then
  1210. begin
  1211. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  1212. location_swap(left.location,right.location);
  1213. toggleflag(nf_swaped);
  1214. end
  1215. else
  1216. begin
  1217. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  1218. end;
  1219. end;
  1220. location_freetemp(exprasmlist,right.location);
  1221. location_release(exprasmlist,right.location);
  1222. if cmpop then
  1223. begin
  1224. location_freetemp(exprasmlist,left.location);
  1225. location_release(exprasmlist,left.location);
  1226. end;
  1227. set_result_location(cmpop,true);
  1228. end;
  1229. {$endif SUPPORT_MMX}
  1230. {*****************************************************************************
  1231. MUL
  1232. *****************************************************************************}
  1233. {$ifdef newra}
  1234. procedure ti386addnode.second_mul;
  1235. var r,r_eax:Tregister;
  1236. begin
  1237. {The location.register will be filled in later (JM)}
  1238. location_reset(location,LOC_REGISTER,OS_INT);
  1239. {Get a temp register and load the left value into it
  1240. and free the location.}
  1241. r:=rg.getregisterint(exprasmlist,OS_INT);
  1242. cg.a_load_loc_reg(exprasmlist,OS_INT,left.location,r);
  1243. location_release(exprasmlist,left.location);
  1244. {Allocate EAX.}
  1245. rg.getexplicitregisterint(exprasmlist,NR_EAX);
  1246. r_eax.enum:=R_INTREGISTER;
  1247. r_eax.number:=NR_EAX;
  1248. {Load the right value.}
  1249. cg.a_load_loc_reg(exprasmlist,OS_INT,right.location,r_eax);
  1250. location_release(exprasmlist,right.location);
  1251. {The mul instruction frees register r.}
  1252. rg.ungetregisterint(exprasmlist,r);
  1253. {Also allocate EDX, since it is also modified by a mul (JM).}
  1254. rg.getexplicitregisterint(exprasmlist,NR_EDX);
  1255. emit_reg(A_MUL,S_L,r);
  1256. {Free EDX}
  1257. r.enum:=R_INTREGISTER;
  1258. r.number:=NR_EDX;
  1259. rg.ungetregisterint(exprasmlist,r);
  1260. {Free EAX}
  1261. rg.ungetregisterint(exprasmlist,r_eax);
  1262. {Allocate a new register and store the result in EAX in it.}
  1263. location.register:=rg.getregisterint(exprasmlist,OS_INT);
  1264. emit_reg_reg(A_MOV,S_L,r_eax,location.register);
  1265. location_freetemp(exprasmlist,left.location);
  1266. location_freetemp(exprasmlist,right.location);
  1267. end;
  1268. {$else}
  1269. procedure ti386addnode.second_mul;
  1270. var popeax,popedx:boolean;
  1271. regstopush:Tsupregset;
  1272. r:Tregister;
  1273. begin
  1274. popeax:=false;
  1275. popedx:=false;
  1276. { here you need to free the symbol first }
  1277. { left.location and right.location must }
  1278. { only be freed when they are really released, }
  1279. { because the optimizer NEEDS correct regalloc }
  1280. { info!!! (JM) }
  1281. { the location.register will be filled in later (JM) }
  1282. location_reset(location,LOC_REGISTER,OS_INT);
  1283. regstopush := VOLATILE_INTREGISTERS;
  1284. remove_non_regvars_from_loc(right.location,regstopush);
  1285. remove_non_regvars_from_loc(left.location,regstopush);
  1286. { now, regstopush does NOT contain EAX and/or EDX if they are }
  1287. { used in either the left or the right location, excepts if }
  1288. {they are regvars. It DOES contain them if they are used in }
  1289. { another location (JM) }
  1290. r.enum:=R_INTREGISTER;
  1291. if not(RS_EAX in rg.unusedregsint) and
  1292. (RS_EAX in regstopush) then
  1293. begin
  1294. r.number:=NR_EAX;
  1295. emit_reg(A_PUSH,S_L,r);
  1296. popeax:=true;
  1297. end;
  1298. if not(RS_EDX in rg.unusedregsint) and
  1299. (RS_EDX in regstopush) then
  1300. begin
  1301. r.number:=NR_EDX;
  1302. emit_reg(A_PUSH,S_L,r);
  1303. popedx:=true;
  1304. end;
  1305. { left.location can be R_EAX !!! }
  1306. rg.getexplicitregisterint(exprasmlist,NR_EDI);
  1307. { load the left value }
  1308. r.number:=NR_EDI;
  1309. cg.a_load_loc_reg(exprasmlist,OS_INT,left.location,r);
  1310. location_release(exprasmlist,left.location);
  1311. { allocate EAX }
  1312. r.number:=NR_EAX;
  1313. if RS_EAX in rg.unusedregsint then
  1314. exprasmList.concat(tai_regalloc.Alloc(r));
  1315. { load he right value }
  1316. cg.a_load_loc_reg(exprasmlist,OS_INT,right.location,r);
  1317. location_release(exprasmlist,right.location);
  1318. { allocate EAX if it isn't yet allocated (JM) }
  1319. if (RS_EAX in rg.unusedregsint) then
  1320. exprasmlist.concat(tai_regalloc.Alloc(r));
  1321. { also allocate EDX, since it is also modified by }
  1322. { a mul (JM) }
  1323. r.number:=NR_EDX;
  1324. if RS_EDX in rg.unusedregsint then
  1325. exprasmlist.concat(tai_regalloc.Alloc(r));
  1326. r.number:=NR_EDI;
  1327. emit_reg(A_MUL,S_L,r);
  1328. rg.ungetregisterint(exprasmlist,r);
  1329. r.enum:=R_INTREGISTER;
  1330. r.number:=NR_EDX;
  1331. if RS_EDX in rg.unusedregsint then
  1332. exprasmlist.concat(tai_regalloc.DeAlloc(r));
  1333. r.number:=NR_EAX;
  1334. if RS_EAX in rg.unusedregsint then
  1335. exprasmlist.concat(tai_regalloc.DeAlloc(r));
  1336. location.register:=rg.getregisterint(exprasmlist,OS_INT);
  1337. r.number:=NR_EAX;
  1338. emit_reg_reg(A_MOV,S_L,r,location.register);
  1339. r.number:=NR_EDX;
  1340. if popedx then
  1341. emit_reg(A_POP,S_L,r);
  1342. r.number:=NR_EAX;
  1343. if popeax then
  1344. emit_reg(A_POP,S_L,r);
  1345. location_freetemp(exprasmlist,left.location);
  1346. location_freetemp(exprasmlist,right.location);
  1347. end;
  1348. {$endif}
  1349. {*****************************************************************************
  1350. pass_2
  1351. *****************************************************************************}
  1352. procedure ti386addnode.pass_2;
  1353. { is also being used for xor, and "mul", "sub, or and comparative }
  1354. { operators }
  1355. var
  1356. pushedfpu,
  1357. mboverflow,cmpop : boolean;
  1358. op : tasmop;
  1359. opsize : topsize;
  1360. { true, if unsigned types are compared }
  1361. unsigned : boolean;
  1362. { is_in_dest if the result is put directly into }
  1363. { the resulting refernce or varregister }
  1364. {is_in_dest : boolean;}
  1365. { true, if for sets subtractions the extra not should generated }
  1366. extra_not : boolean;
  1367. begin
  1368. { to make it more readable, string and set (not smallset!) have their
  1369. own procedures }
  1370. case left.resulttype.def.deftype of
  1371. orddef :
  1372. begin
  1373. { handling boolean expressions }
  1374. if is_boolean(left.resulttype.def) and
  1375. is_boolean(right.resulttype.def) then
  1376. begin
  1377. second_addboolean;
  1378. exit;
  1379. end
  1380. { 64bit operations }
  1381. else if is_64bit(left.resulttype.def) then
  1382. begin
  1383. second_add64bit;
  1384. exit;
  1385. end;
  1386. end;
  1387. stringdef :
  1388. begin
  1389. second_addstring;
  1390. exit;
  1391. end;
  1392. setdef :
  1393. begin
  1394. { normalsets are already handled in pass1 }
  1395. if (tsetdef(left.resulttype.def).settype<>smallset) then
  1396. internalerror(200109041);
  1397. second_addsmallset;
  1398. exit;
  1399. end;
  1400. arraydef :
  1401. begin
  1402. {$ifdef SUPPORT_MMX}
  1403. if is_mmx_able_array(left.resulttype.def) then
  1404. begin
  1405. second_addmmx;
  1406. exit;
  1407. end;
  1408. {$endif SUPPORT_MMX}
  1409. end;
  1410. floatdef :
  1411. begin
  1412. second_addfloat;
  1413. exit;
  1414. end;
  1415. end;
  1416. { defaults }
  1417. {is_in_dest:=false;}
  1418. extra_not:=false;
  1419. mboverflow:=false;
  1420. cmpop:=false;
  1421. unsigned:=not(is_signed(left.resulttype.def)) or
  1422. not(is_signed(right.resulttype.def));
  1423. opsize:=def_opsize(left.resulttype.def);
  1424. pass_left_and_right(pushedfpu);
  1425. if (left.resulttype.def.deftype=pointerdef) or
  1426. (right.resulttype.def.deftype=pointerdef) or
  1427. (is_class_or_interface(right.resulttype.def) and is_class_or_interface(left.resulttype.def)) or
  1428. (left.resulttype.def.deftype=classrefdef) or
  1429. (left.resulttype.def.deftype=procvardef) or
  1430. ((left.resulttype.def.deftype=enumdef) and
  1431. (left.resulttype.def.size=4)) or
  1432. ((left.resulttype.def.deftype=orddef) and
  1433. (torddef(left.resulttype.def).typ in [s32bit,u32bit])) or
  1434. ((right.resulttype.def.deftype=orddef) and
  1435. (torddef(right.resulttype.def).typ in [s32bit,u32bit])) then
  1436. begin
  1437. case nodetype of
  1438. addn :
  1439. begin
  1440. op:=A_ADD;
  1441. mboverflow:=true;
  1442. end;
  1443. muln :
  1444. begin
  1445. if unsigned then
  1446. op:=A_MUL
  1447. else
  1448. op:=A_IMUL;
  1449. mboverflow:=true;
  1450. end;
  1451. subn :
  1452. begin
  1453. op:=A_SUB;
  1454. mboverflow:=true;
  1455. end;
  1456. ltn,lten,
  1457. gtn,gten,
  1458. equaln,unequaln :
  1459. begin
  1460. op:=A_CMP;
  1461. cmpop:=true;
  1462. end;
  1463. xorn :
  1464. op:=A_XOR;
  1465. orn :
  1466. op:=A_OR;
  1467. andn :
  1468. op:=A_AND;
  1469. else
  1470. internalerror(200304229);
  1471. end;
  1472. { filter MUL, which requires special handling }
  1473. if op=A_MUL then
  1474. begin
  1475. second_mul;
  1476. exit;
  1477. end;
  1478. { Convert flags to register first }
  1479. if (left.location.loc=LOC_FLAGS) then
  1480. location_force_reg(exprasmlist,left.location,opsize_2_cgsize[opsize],false);
  1481. if (right.location.loc=LOC_FLAGS) then
  1482. location_force_reg(exprasmlist,right.location,opsize_2_cgsize[opsize],false);
  1483. left_must_be_reg(opsize,false);
  1484. emit_generic_code(op,opsize,unsigned,extra_not,mboverflow);
  1485. location_freetemp(exprasmlist,right.location);
  1486. location_release(exprasmlist,right.location);
  1487. if cmpop and
  1488. (left.location.loc<>LOC_CREGISTER) then
  1489. begin
  1490. location_freetemp(exprasmlist,left.location);
  1491. location_release(exprasmlist,left.location);
  1492. end;
  1493. set_result_location(cmpop,unsigned);
  1494. end
  1495. { 8/16 bit enum,char,wchar types }
  1496. else
  1497. if ((left.resulttype.def.deftype=orddef) and
  1498. (torddef(left.resulttype.def).typ in [uchar,uwidechar])) or
  1499. ((left.resulttype.def.deftype=enumdef) and
  1500. ((left.resulttype.def.size=1) or
  1501. (left.resulttype.def.size=2))) then
  1502. begin
  1503. case nodetype of
  1504. ltn,lten,gtn,gten,
  1505. equaln,unequaln :
  1506. cmpop:=true;
  1507. else
  1508. internalerror(2003042210);
  1509. end;
  1510. left_must_be_reg(opsize,false);
  1511. emit_op_right_left(A_CMP,opsize);
  1512. location_freetemp(exprasmlist,right.location);
  1513. location_release(exprasmlist,right.location);
  1514. if left.location.loc<>LOC_CREGISTER then
  1515. begin
  1516. location_freetemp(exprasmlist,left.location);
  1517. location_release(exprasmlist,left.location);
  1518. end;
  1519. set_result_location(true,true);
  1520. end
  1521. else
  1522. internalerror(2003042211);
  1523. end;
  1524. begin
  1525. caddnode:=ti386addnode;
  1526. end.
  1527. {
  1528. $Log$
  1529. Revision 1.75 2003-08-03 20:38:00 daniel
  1530. * Made code generator reverse or/add/and/xor/imul instructions when
  1531. possible to reduce the slowdown of spills.
  1532. Revision 1.74 2003/08/03 20:19:43 daniel
  1533. - Removed cmpop from Ti386addnode.second_addstring
  1534. Revision 1.73 2003/07/06 15:31:21 daniel
  1535. * Fixed register allocator. *Lots* of fixes.
  1536. Revision 1.72 2003/06/17 16:51:30 peter
  1537. * cycle fixes
  1538. Revision 1.71 2003/06/07 18:57:04 jonas
  1539. + added freeintparaloc
  1540. * ppc get/freeintparaloc now check whether the parameter regs are
  1541. properly allocated/deallocated (and get an extra list para)
  1542. * ppc a_call_* now internalerrors if pi_do_call is not yet set
  1543. * fixed lot of missing pi_do_call's
  1544. Revision 1.70 2003/06/03 13:01:59 daniel
  1545. * Register allocator finished
  1546. Revision 1.69 2003/05/30 23:49:18 jonas
  1547. * a_load_loc_reg now has an extra size parameter for the destination
  1548. register (properly fixes what I worked around in revision 1.106 of
  1549. ncgutil.pas)
  1550. Revision 1.68 2003/05/26 19:38:28 peter
  1551. * generic fpc_shorstr_concat
  1552. + fpc_shortstr_append_shortstr optimization
  1553. Revision 1.67 2003/05/22 21:32:29 peter
  1554. * removed some unit dependencies
  1555. Revision 1.66 2003/04/26 09:12:55 peter
  1556. * add string returns in LOC_REFERENCE
  1557. Revision 1.65 2003/04/23 20:16:04 peter
  1558. + added currency support based on int64
  1559. + is_64bit for use in cg units instead of is_64bitint
  1560. * removed cgmessage from n386add, replace with internalerrors
  1561. Revision 1.64 2003/04/23 09:51:16 daniel
  1562. * Removed usage of edi in a lot of places when new register allocator used
  1563. + Added newra versions of g_concatcopy and secondadd_float
  1564. Revision 1.63 2003/04/22 23:50:23 peter
  1565. * firstpass uses expectloc
  1566. * checks if there are differences between the expectloc and
  1567. location.loc from secondpass in EXTDEBUG
  1568. Revision 1.62 2003/04/22 10:09:35 daniel
  1569. + Implemented the actual register allocator
  1570. + Scratch registers unavailable when new register allocator used
  1571. + maybe_save/maybe_restore unavailable when new register allocator used
  1572. Revision 1.61 2003/04/17 10:02:48 daniel
  1573. * Tweaked register allocate/deallocate positition to less interferences
  1574. are generated.
  1575. Revision 1.60 2003/03/28 19:16:57 peter
  1576. * generic constructor working for i386
  1577. * remove fixed self register
  1578. * esi added as address register for i386
  1579. Revision 1.59 2003/03/13 19:52:23 jonas
  1580. * and more new register allocator fixes (in the i386 code generator this
  1581. time). At least now the ppc cross compiler can compile the linux
  1582. system unit again, but I haven't tested it.
  1583. Revision 1.58 2003/03/08 20:36:41 daniel
  1584. + Added newra version of Ti386shlshrnode
  1585. + Added interference graph construction code
  1586. Revision 1.57 2003/03/08 13:59:17 daniel
  1587. * Work to handle new register notation in ag386nsm
  1588. + Added newra version of Ti386moddivnode
  1589. Revision 1.56 2003/03/08 10:53:48 daniel
  1590. * Created newra version of secondmul in n386add.pas
  1591. Revision 1.55 2003/02/19 22:00:15 daniel
  1592. * Code generator converted to new register notation
  1593. - Horribily outdated todo.txt removed
  1594. Revision 1.54 2003/01/13 18:37:44 daniel
  1595. * Work on register conversion
  1596. Revision 1.53 2003/01/08 18:43:57 daniel
  1597. * Tregister changed into a record
  1598. Revision 1.52 2002/11/25 17:43:26 peter
  1599. * splitted defbase in defutil,symutil,defcmp
  1600. * merged isconvertable and is_equal into compare_defs(_ext)
  1601. * made operator search faster by walking the list only once
  1602. Revision 1.51 2002/11/15 01:58:56 peter
  1603. * merged changes from 1.0.7 up to 04-11
  1604. - -V option for generating bug report tracing
  1605. - more tracing for option parsing
  1606. - errors for cdecl and high()
  1607. - win32 import stabs
  1608. - win32 records<=8 are returned in eax:edx (turned off by default)
  1609. - heaptrc update
  1610. - more info for temp management in .s file with EXTDEBUG
  1611. Revision 1.50 2002/10/20 13:11:27 jonas
  1612. * re-enabled optimized version of comparisons with the empty string that
  1613. I accidentally disabled in revision 1.26
  1614. Revision 1.49 2002/08/23 16:14:49 peter
  1615. * tempgen cleanup
  1616. * tt_noreuse temp type added that will be used in genentrycode
  1617. Revision 1.48 2002/08/14 18:41:48 jonas
  1618. - remove valuelow/valuehigh fields from tlocation, because they depend
  1619. on the endianess of the host operating system -> difficult to get
  1620. right. Use lo/hi(location.valueqword) instead (remember to use
  1621. valueqword and not value!!)
  1622. Revision 1.47 2002/08/11 14:32:29 peter
  1623. * renamed current_library to objectlibrary
  1624. Revision 1.46 2002/08/11 13:24:16 peter
  1625. * saving of asmsymbols in ppu supported
  1626. * asmsymbollist global is removed and moved into a new class
  1627. tasmlibrarydata that will hold the info of a .a file which
  1628. corresponds with a single module. Added librarydata to tmodule
  1629. to keep the library info stored for the module. In the future the
  1630. objectfiles will also be stored to the tasmlibrarydata class
  1631. * all getlabel/newasmsymbol and friends are moved to the new class
  1632. Revision 1.45 2002/07/26 11:17:52 jonas
  1633. * the optimization of converting a multiplication with a power of two to
  1634. a shl is moved from n386add/secondpass to nadd/resulttypepass
  1635. Revision 1.44 2002/07/20 11:58:00 florian
  1636. * types.pas renamed to defbase.pas because D6 contains a types
  1637. unit so this would conflicts if D6 programms are compiled
  1638. + Willamette/SSE2 instructions to assembler added
  1639. Revision 1.43 2002/07/11 14:41:32 florian
  1640. * start of the new generic parameter handling
  1641. Revision 1.42 2002/07/07 09:52:33 florian
  1642. * powerpc target fixed, very simple units can be compiled
  1643. * some basic stuff for better callparanode handling, far from being finished
  1644. Revision 1.41 2002/07/01 18:46:31 peter
  1645. * internal linker
  1646. * reorganized aasm layer
  1647. Revision 1.40 2002/07/01 16:23:55 peter
  1648. * cg64 patch
  1649. * basics for currency
  1650. * asnode updates for class and interface (not finished)
  1651. Revision 1.39 2002/05/18 13:34:22 peter
  1652. * readded missing revisions
  1653. Revision 1.38 2002/05/16 19:46:51 carl
  1654. + defines.inc -> fpcdefs.inc to avoid conflicts if compiling by hand
  1655. + try to fix temp allocation (still in ifdef)
  1656. + generic constructor calls
  1657. + start of tassembler / tmodulebase class cleanup
  1658. Revision 1.36 2002/05/13 19:54:37 peter
  1659. * removed n386ld and n386util units
  1660. * maybe_save/maybe_restore added instead of the old maybe_push
  1661. Revision 1.35 2002/05/12 16:53:17 peter
  1662. * moved entry and exitcode to ncgutil and cgobj
  1663. * foreach gets extra argument for passing local data to the
  1664. iterator function
  1665. * -CR checks also class typecasts at runtime by changing them
  1666. into as
  1667. * fixed compiler to cycle with the -CR option
  1668. * fixed stabs with elf writer, finally the global variables can
  1669. be watched
  1670. * removed a lot of routines from cga unit and replaced them by
  1671. calls to cgobj
  1672. * u32bit-s32bit updates for and,or,xor nodes. When one element is
  1673. u32bit then the other is typecasted also to u32bit without giving
  1674. a rangecheck warning/error.
  1675. * fixed pascal calling method with reversing also the high tree in
  1676. the parast, detected by tcalcst3 test
  1677. Revision 1.34 2002/04/25 20:16:40 peter
  1678. * moved more routines from cga/n386util
  1679. Revision 1.33 2002/04/05 15:09:13 jonas
  1680. * fixed web bug 1915
  1681. Revision 1.32 2002/04/04 19:06:10 peter
  1682. * removed unused units
  1683. * use tlocation.size in cg.a_*loc*() routines
  1684. Revision 1.31 2002/04/02 17:11:35 peter
  1685. * tlocation,treference update
  1686. * LOC_CONSTANT added for better constant handling
  1687. * secondadd splitted in multiple routines
  1688. * location_force_reg added for loading a location to a register
  1689. of a specified size
  1690. * secondassignment parses now first the right and then the left node
  1691. (this is compatible with Kylix). This saves a lot of push/pop especially
  1692. with string operations
  1693. * adapted some routines to use the new cg methods
  1694. Revision 1.29 2002/03/04 19:10:13 peter
  1695. * removed compiler warnings
  1696. }