n386add.pas 63 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752
  1. {
  2. $Id$
  3. Copyright (c) 2000 by Florian Klaempfl
  4. Code generation for add nodes on the i386
  5. This program is free software; you can redistribute it and/or modify
  6. it under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 2 of the License, or
  8. (at your option) any later version.
  9. This program is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with this program; if not, write to the Free Software
  15. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  16. ****************************************************************************
  17. }
  18. unit n386add;
  19. {$i defines.inc}
  20. interface
  21. uses
  22. node,nadd,cpubase,cginfo;
  23. type
  24. ti386addnode = class(taddnode)
  25. procedure pass_2;override;
  26. protected
  27. function first_addstring : tnode; override;
  28. private
  29. procedure pass_left_and_right(var pushedfpu:boolean);
  30. function getresflags(unsigned : boolean) : tresflags;
  31. procedure left_must_be_reg(opsize:TOpSize;noswap:boolean);
  32. procedure emit_op_right_left(op:TAsmOp;opsize:TOpSize);
  33. procedure emit_generic_code(op:TAsmOp;opsize:TOpSize;unsigned,extra_not,mboverflow:boolean);
  34. procedure set_result_location(cmpop,unsigned:boolean);
  35. procedure second_addstring;
  36. procedure second_addboolean;
  37. procedure second_addfloat;
  38. procedure second_addsmallset;
  39. {$ifdef SUPPORT_MMX}
  40. procedure second_addmmx;
  41. {$endif SUPPORT_MMX}
  42. procedure second_add64bit;
  43. end;
  44. implementation
  45. uses
  46. globtype,systems,
  47. cutils,verbose,globals,
  48. symconst,symdef,aasm,types,htypechk,
  49. cgbase,pass_2,regvars,
  50. cpuasm,
  51. ncon,nset,
  52. tainst,cga,ncgutil,n386util,tgobj,rgobj,rgcpu,cgobj,cg64f32;
  53. {*****************************************************************************
  54. Helpers
  55. *****************************************************************************}
  56. const
  57. opsize_2_cgsize : array[S_B..S_L] of tcgsize = (OS_8,OS_16,OS_32);
  58. procedure ti386addnode.pass_left_and_right(var pushedfpu:boolean);
  59. var
  60. pushed : boolean;
  61. begin
  62. { calculate the operator which is more difficult }
  63. firstcomplex(self);
  64. { in case of constant put it to the left }
  65. if (left.nodetype=ordconstn) then
  66. swapleftright;
  67. secondpass(left);
  68. { are too few registers free? }
  69. pushed:=maybe_push(right.registers32,left,is_64bitint(left.resulttype.def));
  70. if location.loc=LOC_FPUREGISTER then
  71. pushedfpu:=maybe_pushfpu(right.registersfpu,left)
  72. else
  73. pushedfpu:=false;
  74. secondpass(right);
  75. if pushed then
  76. restore(left,is_64bitint(left.resulttype.def));
  77. end;
  78. function ti386addnode.getresflags(unsigned : boolean) : tresflags;
  79. begin
  80. case nodetype of
  81. equaln : getresflags:=F_E;
  82. unequaln : getresflags:=F_NE;
  83. else
  84. if not(unsigned) then
  85. begin
  86. if nf_swaped in flags then
  87. case nodetype of
  88. ltn : getresflags:=F_G;
  89. lten : getresflags:=F_GE;
  90. gtn : getresflags:=F_L;
  91. gten : getresflags:=F_LE;
  92. end
  93. else
  94. case nodetype of
  95. ltn : getresflags:=F_L;
  96. lten : getresflags:=F_LE;
  97. gtn : getresflags:=F_G;
  98. gten : getresflags:=F_GE;
  99. end;
  100. end
  101. else
  102. begin
  103. if nf_swaped in flags then
  104. case nodetype of
  105. ltn : getresflags:=F_A;
  106. lten : getresflags:=F_AE;
  107. gtn : getresflags:=F_B;
  108. gten : getresflags:=F_BE;
  109. end
  110. else
  111. case nodetype of
  112. ltn : getresflags:=F_B;
  113. lten : getresflags:=F_BE;
  114. gtn : getresflags:=F_A;
  115. gten : getresflags:=F_AE;
  116. end;
  117. end;
  118. end;
  119. end;
  120. procedure ti386addnode.left_must_be_reg(opsize:TOpSize;noswap:boolean);
  121. begin
  122. { left location is not a register? }
  123. if (left.location.loc<>LOC_REGISTER) then
  124. begin
  125. { if right is register then we can swap the locations }
  126. if (not noswap) and
  127. (right.location.loc=LOC_REGISTER) then
  128. begin
  129. location_swap(left.location,right.location);
  130. toggleflag(nf_swaped);
  131. end
  132. else
  133. begin
  134. { maybe we can reuse a constant register when the
  135. operation is a comparison that doesn't change the
  136. value of the register }
  137. location_force_reg(left.location,opsize_2_cgsize[opsize],(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  138. end;
  139. end;
  140. end;
  141. procedure ti386addnode.emit_op_right_left(op:TAsmOp;opsize:TOpsize);
  142. begin
  143. { left must be a register }
  144. case right.location.loc of
  145. LOC_REGISTER,
  146. LOC_CREGISTER :
  147. exprasmlist.concat(taicpu.op_reg_reg(op,opsize,right.location.register,left.location.register));
  148. LOC_REFERENCE,
  149. LOC_CREFERENCE :
  150. exprasmlist.concat(taicpu.op_ref_reg(op,opsize,right.location.reference,left.location.register));
  151. LOC_CONSTANT :
  152. exprasmlist.concat(taicpu.op_const_reg(op,opsize,right.location.value,left.location.register));
  153. else
  154. internalerror(200203232);
  155. end;
  156. end;
  157. procedure ti386addnode.set_result_location(cmpop,unsigned:boolean);
  158. begin
  159. if cmpop then
  160. begin
  161. location_reset(location,LOC_FLAGS,OS_NO);
  162. location.resflags:=getresflags(unsigned);
  163. end
  164. else
  165. location_copy(location,left.location);
  166. end;
  167. procedure ti386addnode.emit_generic_code(op:TAsmOp;opsize:TOpSize;unsigned,extra_not,mboverflow:boolean);
  168. var
  169. power : longint;
  170. hl4 : tasmlabel;
  171. begin
  172. { at this point, left.location.loc should be LOC_REGISTER }
  173. if right.location.loc=LOC_REGISTER then
  174. begin
  175. { right.location is a LOC_REGISTER }
  176. { when swapped another result register }
  177. if (nodetype=subn) and (nf_swaped in flags) then
  178. begin
  179. if extra_not then
  180. emit_reg(A_NOT,S_L,left.location.register);
  181. emit_reg_reg(op,opsize,left.location.register,right.location.register);
  182. { newly swapped also set swapped flag }
  183. location_swap(left.location,right.location);
  184. toggleflag(nf_swaped);
  185. end
  186. else
  187. begin
  188. if extra_not then
  189. emit_reg(A_NOT,S_L,right.location.register);
  190. emit_reg_reg(op,opsize,right.location.register,left.location.register);
  191. end;
  192. end
  193. else
  194. begin
  195. { right.location is not a LOC_REGISTER }
  196. if (nodetype=subn) and (nf_swaped in flags) then
  197. begin
  198. if extra_not then
  199. emit_reg(A_NOT,opsize,left.location.register);
  200. rg.getexplicitregisterint(exprasmlist,R_EDI);
  201. cg.a_load_loc_reg(exprasmlist,right.location,R_EDI);
  202. emit_reg_reg(op,opsize,left.location.register,R_EDI);
  203. emit_reg_reg(A_MOV,opsize,R_EDI,left.location.register);
  204. rg.ungetregisterint(exprasmlist,R_EDI);
  205. end
  206. else
  207. begin
  208. { Optimizations when right.location is a constant value }
  209. if (op=A_CMP) and
  210. (nodetype in [equaln,unequaln]) and
  211. (right.location.loc=LOC_CONSTANT) and
  212. (right.location.value=0) then
  213. begin
  214. emit_reg_reg(A_TEST,opsize,left.location.register,left.location.register);
  215. end
  216. else
  217. if (op=A_ADD) and
  218. (right.location.loc=LOC_CONSTANT) and
  219. (right.location.value=1) and
  220. not(cs_check_overflow in aktlocalswitches) then
  221. begin
  222. emit_reg(A_INC,opsize,left.location.register);
  223. end
  224. else
  225. if (op=A_SUB) and
  226. (right.location.loc=LOC_CONSTANT) and
  227. (right.location.value=1) and
  228. not(cs_check_overflow in aktlocalswitches) then
  229. begin
  230. emit_reg(A_DEC,opsize,left.location.register);
  231. end
  232. else
  233. if (op=A_IMUL) and
  234. (right.location.loc=LOC_CONSTANT) and
  235. (ispowerof2(right.location.value,power)) and
  236. not(cs_check_overflow in aktlocalswitches) then
  237. begin
  238. emit_const_reg(A_SHL,opsize,power,left.location.register);
  239. end
  240. else
  241. begin
  242. if extra_not then
  243. begin
  244. rg.getexplicitregisterint(exprasmlist,R_EDI);
  245. cg.a_load_loc_reg(exprasmlist,right.location,R_EDI);
  246. emit_reg(A_NOT,S_L,R_EDI);
  247. emit_reg_reg(A_AND,S_L,R_EDI,left.location.register);
  248. rg.ungetregisterint(exprasmlist,R_EDI);
  249. end
  250. else
  251. begin
  252. emit_op_right_left(op,opsize);
  253. end;
  254. end;
  255. end;
  256. end;
  257. { only in case of overflow operations }
  258. { produce overflow code }
  259. { we must put it here directly, because sign of operation }
  260. { is in unsigned VAR!! }
  261. if mboverflow then
  262. begin
  263. if cs_check_overflow in aktlocalswitches then
  264. begin
  265. getlabel(hl4);
  266. if unsigned then
  267. emitjmp(C_NB,hl4)
  268. else
  269. emitjmp(C_NO,hl4);
  270. emitcall('FPC_OVERFLOW');
  271. emitlab(hl4);
  272. end;
  273. end;
  274. end;
  275. {*****************************************************************************
  276. Addstring
  277. *****************************************************************************}
  278. { note: if you implemented an fpc_shortstr_concat similar to the }
  279. { one in i386.inc, you have to override first_addstring like in }
  280. { ti386addnode.first_string and implement the shortstring concat }
  281. { manually! The generic routine is different from the i386 one (JM) }
  282. function ti386addnode.first_addstring : tnode;
  283. begin
  284. { special cases for shortstrings, handled in pass_2 (JM) }
  285. { can't handle fpc_shortstr_compare with compilerproc either because it }
  286. { returns its results in the flags instead of in eax }
  287. if (((nodetype = addn) and
  288. is_shortstring(resulttype.def)) or
  289. ((nodetype in [ltn,lten,gtn,gten,equaln,unequaln]) and
  290. is_shortstring(left.resulttype.def))) then
  291. begin
  292. if nodetype = addn then
  293. location_reset(location,LOC_CREFERENCE,def_cgsize(resulttype.def))
  294. else
  295. location_reset(location,LOC_FLAGS,OS_NO);
  296. calcregisters(self,0,0,0);
  297. result := nil;
  298. exit;
  299. end;
  300. { otherwise, use the generic code }
  301. result := inherited first_addstring;
  302. end;
  303. procedure ti386addnode.second_addstring;
  304. var
  305. href : treference;
  306. cmpop : boolean;
  307. pushedregs : tpushedsaved;
  308. regstopush : tregisterset;
  309. begin
  310. { string operations are not commutative }
  311. if nf_swaped in flags then
  312. swapleftright;
  313. case tstringdef(left.resulttype.def).string_typ of
  314. st_shortstring:
  315. begin
  316. case nodetype of
  317. addn:
  318. begin
  319. cmpop:=false;
  320. secondpass(left);
  321. { if str_concat is set in expr
  322. s:=s+ ... no need to create a temp string (PM) }
  323. { the tempstring can also come from a typeconversion }
  324. { or a function result, so simply check for a }
  325. { temp of 256 bytes(JM) }
  326. if not(tg.istemp(left.location.reference) and
  327. (tg.getsizeoftemp(left.location.reference) = 256)) and
  328. not(nf_use_strconcat in flags) then
  329. begin
  330. tg.gettempofsizereference(exprasmlist,256,href);
  331. cg.g_copyshortstring(exprasmlist,left.location.reference,href,255,true,false);
  332. { location is released by copyshortstring }
  333. location_freetemp(exprasmlist,left.location);
  334. location_reset(left.location,LOC_CREFERENCE,def_cgsize(resulttype.def));
  335. left.location.reference:=href;
  336. end;
  337. secondpass(right);
  338. { on the right we do not need the register anymore too }
  339. { Instead of releasing them already, simply do not }
  340. { push them (so the release is in the right place, }
  341. { because emitpushreferenceaddr doesn't need extra }
  342. { registers) (JM) }
  343. regstopush := all_registers;
  344. remove_non_regvars_from_loc(right.location,regstopush);
  345. rg.saveusedregisters(exprasmlist,pushedregs,regstopush);
  346. { push the maximum possible length of the result }
  347. emitpushreferenceaddr(left.location.reference);
  348. { the optimizer can more easily put the }
  349. { deallocations in the right place if it happens }
  350. { too early than when it happens too late (if }
  351. { the pushref needs a "lea (..),edi; push edi") }
  352. location_release(exprasmlist,right.location);
  353. emitpushreferenceaddr(right.location.reference);
  354. rg.saveregvars(exprasmlist,regstopush);
  355. emitcall('FPC_SHORTSTR_CONCAT');
  356. tg.ungetiftemp(exprasmlist,right.location.reference);
  357. maybe_loadself;
  358. rg.restoreusedregisters(exprasmlist,pushedregs);
  359. location_copy(location,left.location);
  360. end;
  361. ltn,lten,gtn,gten,equaln,unequaln :
  362. begin
  363. cmpop := true;
  364. rg.saveusedregisters(exprasmlist,pushedregs,all_registers);
  365. secondpass(left);
  366. location_release(exprasmlist,left.location);
  367. emitpushreferenceaddr(left.location.reference);
  368. secondpass(right);
  369. location_release(exprasmlist,right.location);
  370. emitpushreferenceaddr(right.location.reference);
  371. rg.saveregvars(exprasmlist,all_registers);
  372. emitcall('FPC_SHORTSTR_COMPARE');
  373. maybe_loadself;
  374. rg.restoreusedregisters(exprasmlist,pushedregs);
  375. location_freetemp(exprasmlist,left.location);
  376. location_freetemp(exprasmlist,right.location);
  377. end;
  378. end;
  379. set_result_location(cmpop,true);
  380. end;
  381. else
  382. { rest should be handled in first pass (JM) }
  383. internalerror(200108303);
  384. end;
  385. end;
  386. {*****************************************************************************
  387. AddBoolean
  388. *****************************************************************************}
  389. procedure ti386addnode.second_addboolean;
  390. var
  391. op : TAsmOp;
  392. opsize : TOpsize;
  393. cmpop,
  394. pushed,
  395. isjump : boolean;
  396. otl,ofl : tasmlabel;
  397. begin
  398. { calculate the operator which is more difficult }
  399. firstcomplex(self);
  400. cmpop:=false;
  401. if (torddef(left.resulttype.def).typ=bool8bit) or
  402. (torddef(right.resulttype.def).typ=bool8bit) then
  403. opsize:=S_B
  404. else
  405. if (torddef(left.resulttype.def).typ=bool16bit) or
  406. (torddef(right.resulttype.def).typ=bool16bit) then
  407. opsize:=S_W
  408. else
  409. opsize:=S_L;
  410. if (cs_full_boolean_eval in aktlocalswitches) or
  411. (nodetype in [unequaln,ltn,lten,gtn,gten,equaln,xorn]) then
  412. begin
  413. if left.nodetype in [ordconstn,realconstn] then
  414. swapleftright;
  415. isjump:=(left.location.loc=LOC_JUMP);
  416. if isjump then
  417. begin
  418. otl:=truelabel;
  419. getlabel(truelabel);
  420. ofl:=falselabel;
  421. getlabel(falselabel);
  422. end;
  423. secondpass(left);
  424. if left.location.loc in [LOC_FLAGS,LOC_JUMP] then
  425. location_force_reg(left.location,opsize_2_cgsize[opsize],false);
  426. if isjump then
  427. begin
  428. truelabel:=otl;
  429. falselabel:=ofl;
  430. end;
  431. pushed:=maybe_push(right.registers32,left,false);
  432. isjump:=(right.location.loc=LOC_JUMP);
  433. if isjump then
  434. begin
  435. otl:=truelabel;
  436. getlabel(truelabel);
  437. ofl:=falselabel;
  438. getlabel(falselabel);
  439. end;
  440. secondpass(right);
  441. if pushed then
  442. restore(left,false);
  443. if right.location.loc in [LOC_FLAGS,LOC_JUMP] then
  444. location_force_reg(right.location,opsize_2_cgsize[opsize],false);
  445. if isjump then
  446. begin
  447. truelabel:=otl;
  448. falselabel:=ofl;
  449. end;
  450. { left must be a register }
  451. left_must_be_reg(opsize,false);
  452. { compare the }
  453. case nodetype of
  454. ltn,lten,gtn,gten,
  455. equaln,unequaln :
  456. begin
  457. op:=A_CMP;
  458. cmpop:=true;
  459. end;
  460. xorn :
  461. op:=A_XOR;
  462. orn :
  463. op:=A_OR;
  464. andn :
  465. op:=A_AND;
  466. else
  467. internalerror(200203247);
  468. end;
  469. emit_op_right_left(op,opsize);
  470. location_freetemp(exprasmlist,right.location);
  471. location_release(exprasmlist,right.location);
  472. if cmpop then
  473. begin
  474. location_freetemp(exprasmlist,left.location);
  475. location_release(exprasmlist,left.location);
  476. end;
  477. set_result_location(cmpop,true);
  478. end
  479. else
  480. begin
  481. case nodetype of
  482. andn,
  483. orn :
  484. begin
  485. location_reset(location,LOC_JUMP,OS_NO);
  486. case nodetype of
  487. andn :
  488. begin
  489. otl:=truelabel;
  490. getlabel(truelabel);
  491. secondpass(left);
  492. maketojumpbool(left,lr_load_regvars);
  493. emitlab(truelabel);
  494. truelabel:=otl;
  495. end;
  496. orn :
  497. begin
  498. ofl:=falselabel;
  499. getlabel(falselabel);
  500. secondpass(left);
  501. maketojumpbool(left,lr_load_regvars);
  502. emitlab(falselabel);
  503. falselabel:=ofl;
  504. end;
  505. else
  506. CGMessage(type_e_mismatch);
  507. end;
  508. secondpass(right);
  509. maketojumpbool(right,lr_load_regvars);
  510. end;
  511. else
  512. CGMessage(type_e_mismatch);
  513. end;
  514. end;
  515. end;
  516. {*****************************************************************************
  517. AddFloat
  518. *****************************************************************************}
  519. procedure ti386addnode.second_addfloat;
  520. var
  521. op : TAsmOp;
  522. resflags : tresflags;
  523. pushedfpu,
  524. cmpop : boolean;
  525. begin
  526. pass_left_and_right(pushedfpu);
  527. cmpop:=false;
  528. case nodetype of
  529. addn :
  530. op:=A_FADDP;
  531. muln :
  532. op:=A_FMULP;
  533. subn :
  534. op:=A_FSUBP;
  535. slashn :
  536. op:=A_FDIVP;
  537. ltn,lten,gtn,gten,
  538. equaln,unequaln :
  539. begin
  540. op:=A_FCOMPP;
  541. cmpop:=true;
  542. end;
  543. else
  544. CGMessage(type_e_mismatch);
  545. end;
  546. if (right.location.loc<>LOC_FPUREGISTER) then
  547. begin
  548. cg.a_loadfpu_loc_reg(exprasmlist,
  549. right.location,R_ST);
  550. if (right.location.loc <> LOC_CFPUREGISTER) and
  551. pushedfpu then
  552. location_freetemp(exprasmlist,left.location);
  553. if (left.location.loc<>LOC_FPUREGISTER) then
  554. begin
  555. cg.a_loadfpu_loc_reg(exprasmlist,left.location,R_ST);
  556. if (left.location.loc <> LOC_CFPUREGISTER) and
  557. pushedfpu then
  558. location_freetemp(exprasmlist,left.location);
  559. end
  560. else
  561. begin
  562. { left was on the stack => swap }
  563. toggleflag(nf_swaped);
  564. end;
  565. { releases the right reference }
  566. location_release(exprasmlist,right.location);
  567. end
  568. { the nominator in st0 }
  569. else if (left.location.loc<>LOC_FPUREGISTER) then
  570. begin
  571. cg.a_loadfpu_loc_reg(exprasmlist,left.location,R_ST);
  572. if (left.location.loc <> LOC_CFPUREGISTER) and
  573. pushedfpu then
  574. location_freetemp(exprasmlist,left.location);
  575. end
  576. else
  577. begin
  578. { fpu operands are always in the wrong order on the stack }
  579. toggleflag(nf_swaped);
  580. end;
  581. { releases the left reference }
  582. if (left.location.loc in [LOC_CREFERENCE,LOC_REFERENCE]) then
  583. location_release(exprasmlist,left.location);
  584. { if we swaped the tree nodes, then use the reverse operator }
  585. if nf_swaped in flags then
  586. begin
  587. if (nodetype=slashn) then
  588. op:=A_FDIVRP
  589. else if (nodetype=subn) then
  590. op:=A_FSUBRP;
  591. end;
  592. { to avoid the pentium bug
  593. if (op=FDIVP) and (opt_processors=pentium) then
  594. emitcall('EMUL_FDIVP')
  595. else
  596. }
  597. { the Intel assemblers want operands }
  598. if op<>A_FCOMPP then
  599. begin
  600. emit_reg_reg(op,S_NO,R_ST,R_ST1);
  601. dec(trgcpu(rg).fpuvaroffset);
  602. end
  603. else
  604. begin
  605. emit_none(op,S_NO);
  606. dec(trgcpu(rg).fpuvaroffset,2);
  607. end;
  608. { on comparison load flags }
  609. if cmpop then
  610. begin
  611. if not(R_EAX in rg.unusedregsint) then
  612. begin
  613. rg.getexplicitregisterint(exprasmlist,R_EDI);
  614. emit_reg_reg(A_MOV,S_L,R_EAX,R_EDI);
  615. end;
  616. emit_reg(A_FNSTSW,S_NO,R_AX);
  617. emit_none(A_SAHF,S_NO);
  618. if not(R_EAX in rg.unusedregsint) then
  619. begin
  620. emit_reg_reg(A_MOV,S_L,R_EDI,R_EAX);
  621. rg.ungetregisterint(exprasmlist,R_EDI);
  622. end;
  623. if nf_swaped in flags then
  624. begin
  625. case nodetype of
  626. equaln : resflags:=F_E;
  627. unequaln : resflags:=F_NE;
  628. ltn : resflags:=F_A;
  629. lten : resflags:=F_AE;
  630. gtn : resflags:=F_B;
  631. gten : resflags:=F_BE;
  632. end;
  633. end
  634. else
  635. begin
  636. case nodetype of
  637. equaln : resflags:=F_E;
  638. unequaln : resflags:=F_NE;
  639. ltn : resflags:=F_B;
  640. lten : resflags:=F_BE;
  641. gtn : resflags:=F_A;
  642. gten : resflags:=F_AE;
  643. end;
  644. end;
  645. location_reset(location,LOC_FLAGS,OS_NO);
  646. location.resflags:=resflags;
  647. end
  648. else
  649. begin
  650. location_reset(location,LOC_FPUREGISTER,def_cgsize(resulttype.def));
  651. location.register:=R_ST;
  652. end;
  653. end;
  654. {*****************************************************************************
  655. AddSmallSet
  656. *****************************************************************************}
  657. procedure ti386addnode.second_addsmallset;
  658. var
  659. opsize : TOpSize;
  660. op : TAsmOp;
  661. cmpop,
  662. pushedfpu,
  663. extra_not,
  664. noswap : boolean;
  665. begin
  666. pass_left_and_right(pushedfpu);
  667. { when a setdef is passed, it has to be a smallset }
  668. if ((left.resulttype.def.deftype=setdef) and
  669. (tsetdef(left.resulttype.def).settype<>smallset)) or
  670. ((right.resulttype.def.deftype=setdef) and
  671. (tsetdef(right.resulttype.def).settype<>smallset)) then
  672. internalerror(200203301);
  673. cmpop:=false;
  674. noswap:=false;
  675. extra_not:=false;
  676. opsize:=S_L;
  677. case nodetype of
  678. addn :
  679. begin
  680. { this is a really ugly hack!!!!!!!!!! }
  681. { this could be done later using EDI }
  682. { as it is done for subn }
  683. { instead of two registers!!!! }
  684. { adding elements is not commutative }
  685. if (nf_swaped in flags) and (left.nodetype=setelementn) then
  686. swapleftright;
  687. { are we adding set elements ? }
  688. if right.nodetype=setelementn then
  689. begin
  690. { no range support for smallsets! }
  691. if assigned(tsetelementnode(right).right) then
  692. internalerror(43244);
  693. { bts requires both elements to be registers }
  694. location_force_reg(left.location,opsize_2_cgsize[opsize],false);
  695. location_force_reg(right.location,opsize_2_cgsize[opsize],true);
  696. op:=A_BTS;
  697. noswap:=true;
  698. end
  699. else
  700. op:=A_OR;
  701. end;
  702. symdifn :
  703. op:=A_XOR;
  704. muln :
  705. op:=A_AND;
  706. subn :
  707. begin
  708. op:=A_AND;
  709. if (not(nf_swaped in flags)) and
  710. (right.location.loc=LOC_CONSTANT) then
  711. right.location.value := not(right.location.value)
  712. else if (nf_swaped in flags) and
  713. (left.location.loc=LOC_CONSTANT) then
  714. left.location.value := not(left.location.value)
  715. else
  716. extra_not:=true;
  717. end;
  718. equaln,
  719. unequaln :
  720. begin
  721. op:=A_CMP;
  722. cmpop:=true;
  723. end;
  724. lten,gten:
  725. begin
  726. If (not(nf_swaped in flags) and
  727. (nodetype = lten)) or
  728. ((nf_swaped in flags) and
  729. (nodetype = gten)) then
  730. swapleftright;
  731. location_force_reg(left.location,opsize_2_cgsize[opsize],true);
  732. emit_op_right_left(A_AND,opsize);
  733. op:=A_CMP;
  734. cmpop:=true;
  735. { warning: ugly hack, we need a JE so change the node to equaln }
  736. nodetype:=equaln;
  737. end;
  738. xorn :
  739. op:=A_XOR;
  740. orn :
  741. op:=A_OR;
  742. andn :
  743. op:=A_AND;
  744. else
  745. begin
  746. { no < or > support for sets }
  747. CGMessage(type_e_mismatch);
  748. end;
  749. end;
  750. { left must be a register }
  751. left_must_be_reg(opsize,noswap);
  752. emit_generic_code(op,opsize,true,extra_not,false);
  753. location_freetemp(exprasmlist,right.location);
  754. location_release(exprasmlist,right.location);
  755. if cmpop then
  756. begin
  757. location_freetemp(exprasmlist,left.location);
  758. location_release(exprasmlist,left.location);
  759. end;
  760. set_result_location(cmpop,true);
  761. end;
  762. {*****************************************************************************
  763. Add64bit
  764. *****************************************************************************}
  765. procedure ti386addnode.second_add64bit;
  766. var
  767. op : TOpCG;
  768. op1,op2 : TAsmOp;
  769. opsize : TOpSize;
  770. hregister,
  771. hregister2 : tregister;
  772. href : treference;
  773. hl4 : tasmlabel;
  774. pushedfpu,
  775. mboverflow,
  776. cmpop,
  777. unsigned : boolean;
  778. procedure firstjmp64bitcmp;
  779. var
  780. oldnodetype : tnodetype;
  781. begin
  782. load_all_regvars(exprasmlist);
  783. { the jump the sequence is a little bit hairy }
  784. case nodetype of
  785. ltn,gtn:
  786. begin
  787. emitjmp(flags_to_cond(getresflags(unsigned)),truelabel);
  788. { cheat a little bit for the negative test }
  789. toggleflag(nf_swaped);
  790. emitjmp(flags_to_cond(getresflags(unsigned)),falselabel);
  791. toggleflag(nf_swaped);
  792. end;
  793. lten,gten:
  794. begin
  795. oldnodetype:=nodetype;
  796. if nodetype=lten then
  797. nodetype:=ltn
  798. else
  799. nodetype:=gtn;
  800. emitjmp(flags_to_cond(getresflags(unsigned)),truelabel);
  801. { cheat for the negative test }
  802. if nodetype=ltn then
  803. nodetype:=gtn
  804. else
  805. nodetype:=ltn;
  806. emitjmp(flags_to_cond(getresflags(unsigned)),falselabel);
  807. nodetype:=oldnodetype;
  808. end;
  809. equaln:
  810. emitjmp(C_NE,falselabel);
  811. unequaln:
  812. emitjmp(C_NE,truelabel);
  813. end;
  814. end;
  815. procedure secondjmp64bitcmp;
  816. begin
  817. { the jump the sequence is a little bit hairy }
  818. case nodetype of
  819. ltn,gtn,lten,gten:
  820. begin
  821. { the comparisaion of the low dword have to be }
  822. { always unsigned! }
  823. emitjmp(flags_to_cond(getresflags(true)),truelabel);
  824. emitjmp(C_None,falselabel);
  825. end;
  826. equaln:
  827. begin
  828. emitjmp(C_NE,falselabel);
  829. emitjmp(C_None,truelabel);
  830. end;
  831. unequaln:
  832. begin
  833. emitjmp(C_NE,truelabel);
  834. emitjmp(C_None,falselabel);
  835. end;
  836. end;
  837. end;
  838. begin
  839. firstcomplex(self);
  840. pass_left_and_right(pushedfpu);
  841. op1:=A_NONE;
  842. op2:=A_NONE;
  843. mboverflow:=false;
  844. cmpop:=false;
  845. opsize:=S_L;
  846. unsigned:=((left.resulttype.def.deftype=orddef) and
  847. (torddef(left.resulttype.def).typ=u64bit)) or
  848. ((right.resulttype.def.deftype=orddef) and
  849. (torddef(right.resulttype.def).typ=u64bit));
  850. case nodetype of
  851. addn :
  852. begin
  853. op:=OP_ADD;
  854. mboverflow:=true;
  855. end;
  856. subn :
  857. begin
  858. op:=OP_SUB;
  859. op1:=A_SUB;
  860. op2:=A_SBB;
  861. mboverflow:=true;
  862. end;
  863. ltn,lten,
  864. gtn,gten,
  865. equaln,unequaln:
  866. begin
  867. op:=OP_NONE;
  868. cmpop:=true;
  869. end;
  870. xorn:
  871. op:=OP_XOR;
  872. orn:
  873. op:=OP_OR;
  874. andn:
  875. op:=OP_AND;
  876. muln:
  877. begin
  878. { should be handled in pass_1 (JM) }
  879. internalerror(200109051);
  880. end;
  881. else
  882. CGMessage(type_e_mismatch);
  883. end;
  884. { left and right no register? }
  885. { then one must be demanded }
  886. if (left.location.loc<>LOC_REGISTER) then
  887. begin
  888. if (right.location.loc<>LOC_REGISTER) then
  889. begin
  890. { we can reuse a CREGISTER for comparison }
  891. if not((left.location.loc=LOC_CREGISTER) and cmpop) then
  892. begin
  893. if (left.location.loc<>LOC_CREGISTER) then
  894. begin
  895. location_freetemp(exprasmlist,left.location);
  896. location_release(exprasmlist,left.location);
  897. end;
  898. hregister:=rg.getregisterint(exprasmlist);
  899. hregister2:=rg.getregisterint(exprasmlist);
  900. tcg64f32(cg).a_load64_loc_reg(exprasmlist,left.location,hregister,hregister2);
  901. location_reset(left.location,LOC_REGISTER,OS_64);
  902. left.location.registerlow:=hregister;
  903. left.location.registerhigh:=hregister2;
  904. end;
  905. end
  906. else
  907. begin
  908. location_swap(left.location,right.location);
  909. toggleflag(nf_swaped);
  910. end;
  911. end;
  912. { at this point, left.location.loc should be LOC_REGISTER }
  913. if right.location.loc=LOC_REGISTER then
  914. begin
  915. { when swapped another result register }
  916. if (nodetype=subn) and (nf_swaped in flags) then
  917. begin
  918. tcg64f32(cg).a_op64_reg_reg(exprasmlist,op,
  919. left.location.registerlow,left.location.registerhigh,
  920. right.location.registerlow,right.location.registerhigh);
  921. location_swap(left.location,right.location);
  922. toggleflag(nf_swaped);
  923. end
  924. else if cmpop then
  925. begin
  926. emit_reg_reg(A_CMP,S_L,right.location.registerhigh,left.location.registerhigh);
  927. firstjmp64bitcmp;
  928. emit_reg_reg(A_CMP,S_L,right.location.registerlow,left.location.registerlow);
  929. secondjmp64bitcmp;
  930. end
  931. else
  932. begin
  933. tcg64f32(cg).a_op64_reg_reg(exprasmlist,op,
  934. right.location.registerlow,right.location.registerhigh,
  935. left.location.registerlow,left.location.registerhigh);
  936. end;
  937. location_release(exprasmlist,right.location);
  938. end
  939. else
  940. begin
  941. { right.location<>LOC_REGISTER }
  942. if (nodetype=subn) and (nf_swaped in flags) then
  943. begin
  944. rg.getexplicitregisterint(exprasmlist,R_EDI);
  945. tcg64f32(cg).a_load64low_loc_reg(exprasmlist,right.location,R_EDI);
  946. emit_reg_reg(op1,opsize,left.location.registerlow,R_EDI);
  947. emit_reg_reg(A_MOV,opsize,R_EDI,left.location.registerlow);
  948. tcg64f32(cg).a_load64high_loc_reg(exprasmlist,right.location,R_EDI);
  949. { the carry flag is still ok }
  950. emit_reg_reg(op2,opsize,left.location.registerhigh,R_EDI);
  951. emit_reg_reg(A_MOV,opsize,R_EDI,left.location.registerhigh);
  952. rg.ungetregisterint(exprasmlist,R_EDI);
  953. if right.location.loc<>LOC_CREGISTER then
  954. begin
  955. location_freetemp(exprasmlist,right.location);
  956. location_release(exprasmlist,right.location);
  957. end;
  958. end
  959. else if cmpop then
  960. begin
  961. case right.location.loc of
  962. LOC_CREGISTER :
  963. begin
  964. emit_reg_reg(A_CMP,S_L,right.location.registerhigh,left.location.registerhigh);
  965. firstjmp64bitcmp;
  966. emit_reg_reg(A_CMP,S_L,right.location.registerlow,left.location.registerlow);
  967. secondjmp64bitcmp;
  968. end;
  969. LOC_CREFERENCE,
  970. LOC_REFERENCE :
  971. begin
  972. href:=right.location.reference;
  973. inc(href.offset,4);
  974. emit_ref_reg(A_CMP,S_L,href,left.location.registerhigh);
  975. firstjmp64bitcmp;
  976. emit_ref_reg(A_CMP,S_L,right.location.reference,left.location.registerlow);
  977. secondjmp64bitcmp;
  978. emitjmp(C_None,falselabel);
  979. location_freetemp(exprasmlist,right.location);
  980. location_release(exprasmlist,right.location);
  981. end;
  982. LOC_CONSTANT :
  983. begin
  984. exprasmlist.concat(taicpu.op_const_reg(A_CMP,S_L,right.location.valuehigh,left.location.registerhigh));
  985. firstjmp64bitcmp;
  986. exprasmlist.concat(taicpu.op_const_reg(A_CMP,S_L,right.location.valuelow,left.location.registerlow));
  987. secondjmp64bitcmp;
  988. end;
  989. else
  990. internalerror(200203282);
  991. end;
  992. end
  993. else
  994. begin
  995. tcg64f32(cg).a_op64_loc_reg(exprasmlist,op,right.location,
  996. left.location.registerlow,left.location.registerhigh);
  997. if (right.location.loc<>LOC_CREGISTER) then
  998. begin
  999. location_freetemp(exprasmlist,right.location);
  1000. location_release(exprasmlist,right.location);
  1001. end;
  1002. end;
  1003. end;
  1004. if (left.location.loc<>LOC_CREGISTER) and cmpop then
  1005. begin
  1006. location_freetemp(exprasmlist,left.location);
  1007. location_release(exprasmlist,left.location);
  1008. end;
  1009. { only in case of overflow operations }
  1010. { produce overflow code }
  1011. { we must put it here directly, because sign of operation }
  1012. { is in unsigned VAR!! }
  1013. if mboverflow then
  1014. begin
  1015. if cs_check_overflow in aktlocalswitches then
  1016. begin
  1017. getlabel(hl4);
  1018. if unsigned then
  1019. emitjmp(C_NB,hl4)
  1020. else
  1021. emitjmp(C_NO,hl4);
  1022. emitcall('FPC_OVERFLOW');
  1023. emitlab(hl4);
  1024. end;
  1025. end;
  1026. { we have LOC_JUMP as result }
  1027. if cmpop then
  1028. location_reset(location,LOC_JUMP,OS_NO)
  1029. else
  1030. location_copy(location,left.location);
  1031. end;
  1032. {*****************************************************************************
  1033. AddMMX
  1034. *****************************************************************************}
  1035. {$ifdef SUPPORT_MMX}
  1036. procedure ti386addnode.second_addmmx;
  1037. var
  1038. op : TAsmOp;
  1039. pushedfpu,
  1040. cmpop : boolean;
  1041. mmxbase : tmmxtype;
  1042. hregister : tregister;
  1043. begin
  1044. pass_left_and_right(pushedfpu);
  1045. cmpop:=false;
  1046. mmxbase:=mmx_type(left.resulttype.def);
  1047. case nodetype of
  1048. addn :
  1049. begin
  1050. if (cs_mmx_saturation in aktlocalswitches) then
  1051. begin
  1052. case mmxbase of
  1053. mmxs8bit:
  1054. op:=A_PADDSB;
  1055. mmxu8bit:
  1056. op:=A_PADDUSB;
  1057. mmxs16bit,mmxfixed16:
  1058. op:=A_PADDSB;
  1059. mmxu16bit:
  1060. op:=A_PADDUSW;
  1061. end;
  1062. end
  1063. else
  1064. begin
  1065. case mmxbase of
  1066. mmxs8bit,mmxu8bit:
  1067. op:=A_PADDB;
  1068. mmxs16bit,mmxu16bit,mmxfixed16:
  1069. op:=A_PADDW;
  1070. mmxs32bit,mmxu32bit:
  1071. op:=A_PADDD;
  1072. end;
  1073. end;
  1074. end;
  1075. muln :
  1076. begin
  1077. case mmxbase of
  1078. mmxs16bit,mmxu16bit:
  1079. op:=A_PMULLW;
  1080. mmxfixed16:
  1081. op:=A_PMULHW;
  1082. end;
  1083. end;
  1084. subn :
  1085. begin
  1086. if (cs_mmx_saturation in aktlocalswitches) then
  1087. begin
  1088. case mmxbase of
  1089. mmxs8bit:
  1090. op:=A_PSUBSB;
  1091. mmxu8bit:
  1092. op:=A_PSUBUSB;
  1093. mmxs16bit,mmxfixed16:
  1094. op:=A_PSUBSB;
  1095. mmxu16bit:
  1096. op:=A_PSUBUSW;
  1097. end;
  1098. end
  1099. else
  1100. begin
  1101. case mmxbase of
  1102. mmxs8bit,mmxu8bit:
  1103. op:=A_PSUBB;
  1104. mmxs16bit,mmxu16bit,mmxfixed16:
  1105. op:=A_PSUBW;
  1106. mmxs32bit,mmxu32bit:
  1107. op:=A_PSUBD;
  1108. end;
  1109. end;
  1110. end;
  1111. xorn:
  1112. op:=A_PXOR;
  1113. orn:
  1114. op:=A_POR;
  1115. andn:
  1116. op:=A_PAND;
  1117. else
  1118. CGMessage(type_e_mismatch);
  1119. end;
  1120. { left and right no register? }
  1121. { then one must be demanded }
  1122. if (left.location.loc<>LOC_MMXREGISTER) then
  1123. begin
  1124. if (right.location.loc=LOC_MMXREGISTER) then
  1125. begin
  1126. location_swap(left.location,right.location);
  1127. toggleflag(nf_swaped);
  1128. end
  1129. else
  1130. begin
  1131. { register variable ? }
  1132. if (left.location.loc=LOC_CMMXREGISTER) then
  1133. begin
  1134. hregister:=rg.getregistermm(exprasmlist);
  1135. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  1136. end
  1137. else
  1138. begin
  1139. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  1140. internalerror(200203245);
  1141. location_release(exprasmlist,left.location);
  1142. hregister:=rg.getregistermm(exprasmlist);
  1143. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  1144. end;
  1145. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  1146. left.location.register:=hregister;
  1147. end;
  1148. end;
  1149. { at this point, left.location.loc should be LOC_MMXREGISTER }
  1150. if right.location.loc<>LOC_MMXREGISTER then
  1151. begin
  1152. if (nodetype=subn) and (nf_swaped in flags) then
  1153. begin
  1154. if right.location.loc=LOC_CMMXREGISTER then
  1155. begin
  1156. emit_reg_reg(A_MOVQ,S_NO,right.location.register,R_MM7);
  1157. emit_reg_reg(op,S_NO,left.location.register,R_MM7);
  1158. emit_reg_reg(A_MOVQ,S_NO,R_MM7,left.location.register);
  1159. end
  1160. else
  1161. begin
  1162. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  1163. internalerror(200203247);
  1164. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,R_MM7);
  1165. emit_reg_reg(op,S_NO,left.location.register,R_MM7);
  1166. emit_reg_reg(A_MOVQ,S_NO,R_MM7,left.location.register);
  1167. location_release(exprasmlist,right.location);
  1168. end;
  1169. end
  1170. else
  1171. begin
  1172. if (right.location.loc=LOC_CMMXREGISTER) then
  1173. begin
  1174. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  1175. end
  1176. else
  1177. begin
  1178. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  1179. internalerror(200203246);
  1180. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  1181. location_release(exprasmlist,right.location);
  1182. end;
  1183. end;
  1184. end
  1185. else
  1186. begin
  1187. { right.location=LOC_MMXREGISTER }
  1188. if (nodetype=subn) and (nf_swaped in flags) then
  1189. begin
  1190. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  1191. location_swap(left.location,right.location);
  1192. toggleflag(nf_swaped);
  1193. end
  1194. else
  1195. begin
  1196. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  1197. end;
  1198. end;
  1199. location_freetemp(exprasmlist,right.location);
  1200. location_release(exprasmlist,right.location);
  1201. if cmpop then
  1202. begin
  1203. location_freetemp(exprasmlist,left.location);
  1204. location_release(exprasmlist,left.location);
  1205. end;
  1206. set_result_location(cmpop,true);
  1207. end;
  1208. {$endif SUPPORT_MMX}
  1209. {*****************************************************************************
  1210. pass_2
  1211. *****************************************************************************}
  1212. procedure ti386addnode.pass_2;
  1213. { is also being used for xor, and "mul", "sub, or and comparative }
  1214. { operators }
  1215. var
  1216. popeax,popedx,
  1217. pushedfpu,
  1218. mboverflow,cmpop : boolean;
  1219. op : tasmop;
  1220. power : longint;
  1221. opsize : topsize;
  1222. { true, if unsigned types are compared }
  1223. unsigned : boolean;
  1224. { is_in_dest if the result is put directly into }
  1225. { the resulting refernce or varregister }
  1226. {is_in_dest : boolean;}
  1227. { true, if for sets subtractions the extra not should generated }
  1228. extra_not : boolean;
  1229. regstopush: tregisterset;
  1230. begin
  1231. { to make it more readable, string and set (not smallset!) have their
  1232. own procedures }
  1233. case left.resulttype.def.deftype of
  1234. orddef :
  1235. begin
  1236. { handling boolean expressions }
  1237. if is_boolean(left.resulttype.def) and
  1238. is_boolean(right.resulttype.def) then
  1239. begin
  1240. second_addboolean;
  1241. exit;
  1242. end
  1243. { 64bit operations }
  1244. else if is_64bitint(left.resulttype.def) then
  1245. begin
  1246. second_add64bit;
  1247. exit;
  1248. end;
  1249. end;
  1250. stringdef :
  1251. begin
  1252. second_addstring;
  1253. exit;
  1254. end;
  1255. setdef :
  1256. begin
  1257. { normalsets are already handled in pass1 }
  1258. if (tsetdef(left.resulttype.def).settype<>smallset) then
  1259. internalerror(200109041);
  1260. second_addsmallset;
  1261. exit;
  1262. end;
  1263. arraydef :
  1264. begin
  1265. {$ifdef SUPPORT_MMX}
  1266. if is_mmx_able_array(left.resulttype.def) then
  1267. begin
  1268. second_addmmx;
  1269. exit;
  1270. end;
  1271. {$endif SUPPORT_MMX}
  1272. end;
  1273. floatdef :
  1274. begin
  1275. second_addfloat;
  1276. exit;
  1277. end;
  1278. end;
  1279. { defaults }
  1280. {is_in_dest:=false;}
  1281. extra_not:=false;
  1282. mboverflow:=false;
  1283. cmpop:=false;
  1284. unsigned:=not(is_signed(left.resulttype.def)) or
  1285. not(is_signed(right.resulttype.def));
  1286. opsize:=def_opsize(left.resulttype.def);
  1287. pass_left_and_right(pushedfpu);
  1288. if (left.resulttype.def.deftype=pointerdef) or
  1289. (right.resulttype.def.deftype=pointerdef) or
  1290. (is_class_or_interface(right.resulttype.def) and is_class_or_interface(left.resulttype.def)) or
  1291. (left.resulttype.def.deftype=classrefdef) or
  1292. (left.resulttype.def.deftype=procvardef) or
  1293. ((left.resulttype.def.deftype=enumdef) and
  1294. (left.resulttype.def.size=4)) or
  1295. ((left.resulttype.def.deftype=orddef) and
  1296. (torddef(left.resulttype.def).typ in [s32bit,u32bit])) or
  1297. ((right.resulttype.def.deftype=orddef) and
  1298. (torddef(right.resulttype.def).typ in [s32bit,u32bit])) then
  1299. begin
  1300. case nodetype of
  1301. addn :
  1302. begin
  1303. op:=A_ADD;
  1304. mboverflow:=true;
  1305. end;
  1306. muln :
  1307. begin
  1308. if unsigned then
  1309. op:=A_MUL
  1310. else
  1311. op:=A_IMUL;
  1312. mboverflow:=true;
  1313. end;
  1314. subn :
  1315. begin
  1316. op:=A_SUB;
  1317. mboverflow:=true;
  1318. end;
  1319. ltn,lten,
  1320. gtn,gten,
  1321. equaln,unequaln :
  1322. begin
  1323. op:=A_CMP;
  1324. cmpop:=true;
  1325. end;
  1326. xorn :
  1327. op:=A_XOR;
  1328. orn :
  1329. op:=A_OR;
  1330. andn :
  1331. op:=A_AND;
  1332. else
  1333. CGMessage(type_e_mismatch);
  1334. end;
  1335. { filter MUL, which requires special handling }
  1336. if op=A_MUL then
  1337. begin
  1338. popeax:=false;
  1339. popedx:=false;
  1340. { here you need to free the symbol first }
  1341. { left.location and right.location must }
  1342. { only be freed when they are really released, }
  1343. { because the optimizer NEEDS correct regalloc }
  1344. { info!!! (JM) }
  1345. { the location.register will be filled in later (JM) }
  1346. location_reset(location,LOC_REGISTER,OS_INT);
  1347. {$IfNDef NoShlMul}
  1348. if right.nodetype=ordconstn then
  1349. swapleftright;
  1350. If (left.nodetype = ordconstn) and
  1351. ispowerof2(tordconstnode(left).value, power) and
  1352. not(cs_check_overflow in aktlocalswitches) then
  1353. Begin
  1354. { This release will be moved after the next }
  1355. { instruction by the optimizer. No need to }
  1356. { release left.location, since it's a }
  1357. { constant (JM) }
  1358. location_release(exprasmlist,right.location);
  1359. location.register:=rg.getregisterint(exprasmlist);
  1360. emitloadord2reg(right.location,torddef(u32bittype.def),location.register,false);
  1361. emit_const_reg(A_SHL,S_L,power,location.register)
  1362. End
  1363. Else
  1364. Begin
  1365. {$EndIf NoShlMul}
  1366. regstopush := all_registers;
  1367. remove_non_regvars_from_loc(right.location,regstopush);
  1368. remove_non_regvars_from_loc(left.location,regstopush);
  1369. { now, regstopush does NOT contain EAX and/or EDX if they are }
  1370. { used in either the left or the right location, excepts if }
  1371. {they are regvars. It DOES contain them if they are used in }
  1372. { another location (JM) }
  1373. if not(R_EAX in rg.unusedregsint) and
  1374. (R_EAX in regstopush) then
  1375. begin
  1376. emit_reg(A_PUSH,S_L,R_EAX);
  1377. popeax:=true;
  1378. end;
  1379. if not(R_EDX in rg.unusedregsint) and
  1380. (R_EDX in regstopush) then
  1381. begin
  1382. emit_reg(A_PUSH,S_L,R_EDX);
  1383. popedx:=true;
  1384. end;
  1385. { left.location can be R_EAX !!! }
  1386. rg.getexplicitregisterint(exprasmlist,R_EDI);
  1387. { load the left value }
  1388. emitloadord2reg(left.location,torddef(u32bittype.def),R_EDI,true);
  1389. location_release(exprasmlist,left.location);
  1390. { allocate EAX }
  1391. if R_EAX in rg.unusedregsint then
  1392. exprasmList.concat(Tairegalloc.Alloc(R_EAX));
  1393. { load he right value }
  1394. emitloadord2reg(right.location,torddef(u32bittype.def),R_EAX,true);
  1395. location_release(exprasmlist,right.location);
  1396. { allocate EAX if it isn't yet allocated (JM) }
  1397. if (R_EAX in rg.unusedregsint) then
  1398. exprasmList.concat(Tairegalloc.Alloc(R_EAX));
  1399. { also allocate EDX, since it is also modified by }
  1400. { a mul (JM) }
  1401. if R_EDX in rg.unusedregsint then
  1402. exprasmList.concat(Tairegalloc.Alloc(R_EDX));
  1403. emit_reg(A_MUL,S_L,R_EDI);
  1404. rg.ungetregisterint(exprasmlist,R_EDI);
  1405. if R_EDX in rg.unusedregsint then
  1406. exprasmList.concat(Tairegalloc.DeAlloc(R_EDX));
  1407. if R_EAX in rg.unusedregsint then
  1408. exprasmList.concat(Tairegalloc.DeAlloc(R_EAX));
  1409. location.register:=rg.getregisterint(exprasmlist);
  1410. emit_reg_reg(A_MOV,S_L,R_EAX,location.register);
  1411. if popedx then
  1412. emit_reg(A_POP,S_L,R_EDX);
  1413. if popeax then
  1414. emit_reg(A_POP,S_L,R_EAX);
  1415. {$IfNDef NoShlMul}
  1416. End;
  1417. {$endif NoShlMul}
  1418. location_freetemp(exprasmlist,left.location);
  1419. location_freetemp(exprasmlist,right.location);
  1420. exit;
  1421. end;
  1422. { Convert flags to register first }
  1423. if (left.location.loc=LOC_FLAGS) then
  1424. location_force_reg(left.location,opsize_2_cgsize[opsize],false);
  1425. if (right.location.loc=LOC_FLAGS) then
  1426. location_force_reg(right.location,opsize_2_cgsize[opsize],false);
  1427. left_must_be_reg(opsize,false);
  1428. emit_generic_code(op,opsize,unsigned,extra_not,mboverflow);
  1429. location_freetemp(exprasmlist,right.location);
  1430. location_release(exprasmlist,right.location);
  1431. if cmpop and
  1432. (left.location.loc<>LOC_CREGISTER) then
  1433. begin
  1434. location_freetemp(exprasmlist,left.location);
  1435. location_release(exprasmlist,left.location);
  1436. end;
  1437. set_result_location(cmpop,unsigned);
  1438. end
  1439. { 8/16 bit enum,char,wchar types }
  1440. else
  1441. if ((left.resulttype.def.deftype=orddef) and
  1442. (torddef(left.resulttype.def).typ in [uchar,uwidechar])) or
  1443. ((left.resulttype.def.deftype=enumdef) and
  1444. ((left.resulttype.def.size=1) or
  1445. (left.resulttype.def.size=2))) then
  1446. begin
  1447. case nodetype of
  1448. ltn,lten,gtn,gten,
  1449. equaln,unequaln :
  1450. cmpop:=true;
  1451. else
  1452. CGMessage(type_e_mismatch);
  1453. end;
  1454. left_must_be_reg(opsize,false);
  1455. emit_op_right_left(A_CMP,opsize);
  1456. location_freetemp(exprasmlist,right.location);
  1457. location_release(exprasmlist,right.location);
  1458. if left.location.loc<>LOC_CREGISTER then
  1459. begin
  1460. location_freetemp(exprasmlist,left.location);
  1461. location_release(exprasmlist,left.location);
  1462. end;
  1463. set_result_location(true,true);
  1464. end
  1465. else
  1466. CGMessage(type_e_mismatch);
  1467. end;
  1468. begin
  1469. caddnode:=ti386addnode;
  1470. end.
  1471. {
  1472. $Log$
  1473. Revision 1.34 2002-04-25 20:16:40 peter
  1474. * moved more routines from cga/n386util
  1475. Revision 1.33 2002/04/05 15:09:13 jonas
  1476. * fixed web bug 1915
  1477. Revision 1.32 2002/04/04 19:06:10 peter
  1478. * removed unused units
  1479. * use tlocation.size in cg.a_*loc*() routines
  1480. Revision 1.31 2002/04/02 17:11:35 peter
  1481. * tlocation,treference update
  1482. * LOC_CONSTANT added for better constant handling
  1483. * secondadd splitted in multiple routines
  1484. * location_force_reg added for loading a location to a register
  1485. of a specified size
  1486. * secondassignment parses now first the right and then the left node
  1487. (this is compatible with Kylix). This saves a lot of push/pop especially
  1488. with string operations
  1489. * adapted some routines to use the new cg methods
  1490. Revision 1.29 2002/03/04 19:10:13 peter
  1491. * removed compiler warnings
  1492. Revision 1.28 2001/12/30 17:24:46 jonas
  1493. * range checking is now processor independent (part in cgobj,
  1494. part in cg64f32) and should work correctly again (it needed
  1495. some changes after the changes of the low and high of
  1496. tordef's to int64)
  1497. * maketojumpbool() is now processor independent (in ncgutil)
  1498. * getregister32 is now called :=rg.getregisterint(exprasmlist);
  1499. Revision 1.27 2001/12/29 15:29:58 jonas
  1500. * powerpc/cgcpu.pas compiles :)
  1501. * several powerpc-related fixes
  1502. * cpuasm unit is now based on common tainst unit
  1503. + nppcmat unit for powerpc (almost complete)
  1504. Revision 1.25 2001/10/12 13:51:51 jonas
  1505. * fixed internalerror(10) due to previous fpu overflow fixes ("merged")
  1506. * fixed bug in n386add (introduced after compilerproc changes for string
  1507. operations) where calcregisters wasn't called for shortstring addnodes
  1508. * NOTE: from now on, the location of a binary node must now always be set
  1509. before you call calcregisters() for it
  1510. Revision 1.24 2001/09/17 21:29:13 peter
  1511. * merged netbsd, fpu-overflow from fixes branch
  1512. Revision 1.23 2001/09/05 15:22:09 jonas
  1513. * made multiplying, dividing and mod'ing of int64 and qword processor
  1514. independent with compilerprocs (+ small optimizations by using shift/and
  1515. where possible)
  1516. Revision 1.22 2001/09/04 11:38:55 jonas
  1517. + searchsystype() and searchsystype() functions in symtable
  1518. * changed ninl and nadd to use these functions
  1519. * i386 set comparison functions now return their results in al instead
  1520. of in the flags so that they can be sued as compilerprocs
  1521. - removed all processor specific code from n386add.pas that has to do
  1522. with set handling, it's now all done in nadd.pas
  1523. * fixed fpc_set_contains_sets in genset.inc
  1524. * fpc_set_in_byte is now coded inline in n386set.pas and doesn't use a
  1525. helper anymore
  1526. * some small fixes in compproc.inc/set.inc regarding the declaration of
  1527. internal helper types (fpc_small_set and fpc_normal_set)
  1528. Revision 1.21 2001/09/03 13:27:42 jonas
  1529. * compilerproc implementation of set addition/substraction/...
  1530. * changed the declaration of some set helpers somewhat to accomodate the
  1531. above change
  1532. * i386 still uses the old code for comparisons of sets, because its
  1533. helpers return the results in the flags
  1534. * dummy tc_normal_2_small_set type conversion because I need the original
  1535. resulttype of the set add nodes
  1536. NOTE: you have to start a cycle with 1.0.5!
  1537. Revision 1.20 2001/08/30 15:43:14 jonas
  1538. * converted adding/comparing of strings to compileproc. Note that due
  1539. to the way the shortstring helpers for i386 are written, they are
  1540. still handled by the old code (reason: fpc_shortstr_compare returns
  1541. results in the flags instead of in eax and fpc_shortstr_concat
  1542. has wierd parameter conventions). The compilerproc stuff should work
  1543. fine with the generic implementations though.
  1544. * removed some nested comments warnings
  1545. Revision 1.19 2001/08/29 17:50:45 jonas
  1546. * removed unused var
  1547. Revision 1.18 2001/08/29 12:03:23 jonas
  1548. * fixed wrong regalloc info around FPC_MUL/DIV/MOD_INT64/QWORD calls
  1549. * fixed partial result overwriting with the above calls too
  1550. Revision 1.17 2001/08/26 13:36:55 florian
  1551. * some cg reorganisation
  1552. * some PPC updates
  1553. Revision 1.16 2001/07/08 21:00:16 peter
  1554. * various widestring updates, it works now mostly without charset
  1555. mapping supported
  1556. Revision 1.15 2001/06/25 14:11:37 jonas
  1557. * fixed set bug discovered by Carl (merged)
  1558. Revision 1.14 2001/06/18 20:36:25 peter
  1559. * -Ur switch (merged)
  1560. * masm fixes (merged)
  1561. * quoted filenames for go32v2 and win32
  1562. Revision 1.13 2001/05/27 14:30:56 florian
  1563. + some widestring stuff added
  1564. Revision 1.12 2001/05/06 17:12:14 jonas
  1565. * fixed an IE10 and another bug with [var1..var2] construct
  1566. Revision 1.11 2001/04/13 01:22:18 peter
  1567. * symtable change to classes
  1568. * range check generation and errors fixed, make cycle DEBUG=1 works
  1569. * memory leaks fixed
  1570. Revision 1.10 2001/04/02 21:20:36 peter
  1571. * resulttype rewrite
  1572. Revision 1.9 2000/12/31 11:14:11 jonas
  1573. + implemented/fixed docompare() mathods for all nodes (not tested)
  1574. + nopt.pas, nadd.pas, i386/n386opt.pas: optimized nodes for adding strings
  1575. and constant strings/chars together
  1576. * n386add.pas: don't copy temp strings (of size 256) to another temp string
  1577. when adding
  1578. Revision 1.8 2000/12/25 00:07:32 peter
  1579. + new tlinkedlist class (merge of old tstringqueue,tcontainer and
  1580. tlinkedlist objects)
  1581. Revision 1.7 2000/12/16 15:56:18 jonas
  1582. - removed all ifdef cardinalmulfix code
  1583. Revision 1.6 2000/12/05 11:44:32 jonas
  1584. + new integer regvar handling, should be much more efficient
  1585. Revision 1.5 2000/11/29 00:30:45 florian
  1586. * unused units removed from uses clause
  1587. * some changes for widestrings
  1588. Revision 1.4 2000/11/13 11:30:56 florian
  1589. * some bugs with interfaces and NIL fixed
  1590. Revision 1.3 2000/11/04 14:25:23 florian
  1591. + merged Attila's changes for interfaces, not tested yet
  1592. Revision 1.2 2000/10/31 22:02:56 peter
  1593. * symtable splitted, no real code changes
  1594. Revision 1.1 2000/10/15 09:33:31 peter
  1595. * moved n386*.pas to i386/ cpu_target dir
  1596. Revision 1.6 2000/10/14 10:14:47 peter
  1597. * moehrendorf oct 2000 rewrite
  1598. Revision 1.5 2000/09/30 16:08:45 peter
  1599. * more cg11 updates
  1600. Revision 1.4 2000/09/24 15:06:18 peter
  1601. * use defines.inc
  1602. Revision 1.3 2000/09/22 22:42:52 florian
  1603. * more fixes
  1604. Revision 1.2 2000/09/21 12:24:22 jonas
  1605. * small fix to my changes for full boolean evaluation support (moved
  1606. opsize determination for boolean operations back in boolean
  1607. processing block)
  1608. + full boolean evaluation support (from cg386add)
  1609. Revision 1.1 2000/09/20 21:23:32 florian
  1610. * initial revision
  1611. }