nx86add.pas 49 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. symtype,
  22. cgbase,
  23. cpubase,
  24. node,nadd,ncgadd;
  25. type
  26. tx86addnode = class(tcgaddnode)
  27. protected
  28. function getresflags(unsigned : boolean) : tresflags;
  29. procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  30. procedure force_left_and_right_fpureg;
  31. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  32. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  33. procedure second_cmpfloatvector;
  34. procedure second_addfloatsse;
  35. procedure second_addfloatavx;
  36. public
  37. procedure second_addfloat;override;
  38. {$ifndef i8086}
  39. procedure second_addsmallset;override;
  40. {$endif not i8086}
  41. procedure second_add64bit;override;
  42. procedure second_cmpfloat;override;
  43. procedure second_cmpsmallset;override;
  44. procedure second_cmp64bit;override;
  45. procedure second_cmpordinal;override;
  46. {$ifdef SUPPORT_MMX}
  47. procedure second_opmmx;override;
  48. {$endif SUPPORT_MMX}
  49. procedure second_opvector;override;
  50. end;
  51. implementation
  52. uses
  53. globtype,globals,
  54. verbose,cutils,
  55. cpuinfo,
  56. aasmbase,aasmtai,aasmdata,aasmcpu,
  57. symconst,symdef,
  58. cgobj,hlcgobj,cgx86,cga,cgutils,
  59. paramgr,tgobj,ncgutil,
  60. ncon,nset,ninl,
  61. defutil;
  62. {*****************************************************************************
  63. Helpers
  64. *****************************************************************************}
  65. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  66. var
  67. power : longint;
  68. hl4 : tasmlabel;
  69. r : Tregister;
  70. href : treference;
  71. begin
  72. { at this point, left.location.loc should be LOC_REGISTER }
  73. if right.location.loc=LOC_REGISTER then
  74. begin
  75. { right.location is a LOC_REGISTER }
  76. { when swapped another result register }
  77. if (nodetype=subn) and (nf_swapped in flags) then
  78. begin
  79. if extra_not then
  80. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  81. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  82. { newly swapped also set swapped flag }
  83. location_swap(left.location,right.location);
  84. toggleflag(nf_swapped);
  85. end
  86. else
  87. begin
  88. if extra_not then
  89. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  90. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  91. location_swap(left.location,right.location);
  92. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  93. end;
  94. end
  95. else
  96. begin
  97. { right.location is not a LOC_REGISTER }
  98. if (nodetype=subn) and (nf_swapped in flags) then
  99. begin
  100. if extra_not then
  101. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  102. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  103. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  104. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  105. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  106. end
  107. else
  108. begin
  109. { Optimizations when right.location is a constant value }
  110. if (op=A_CMP) and
  111. (nodetype in [equaln,unequaln]) and
  112. (right.location.loc=LOC_CONSTANT) and
  113. (right.location.value=0) then
  114. begin
  115. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  116. end
  117. else
  118. if (op=A_ADD) and
  119. (right.location.loc=LOC_CONSTANT) and
  120. (right.location.value=1) and
  121. not(cs_check_overflow in current_settings.localswitches) then
  122. begin
  123. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  124. end
  125. else
  126. if (op=A_SUB) and
  127. (right.location.loc=LOC_CONSTANT) and
  128. (right.location.value=1) and
  129. not(cs_check_overflow in current_settings.localswitches) and
  130. UseIncDec then
  131. begin
  132. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  133. end
  134. else
  135. if (op=A_IMUL) and
  136. (right.location.loc=LOC_CONSTANT) and
  137. (ispowerof2(int64(right.location.value),power)) and
  138. not(cs_check_overflow in current_settings.localswitches) then
  139. begin
  140. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  141. end
  142. else if (op=A_IMUL) and
  143. (right.location.loc=LOC_CONSTANT) and
  144. (right.location.value>1) and (ispowerof2(int64(right.location.value)-1,power)) and
  145. (power in [1..3]) and
  146. not(cs_check_overflow in current_settings.localswitches) then
  147. begin
  148. reference_reset_base(href,left.location.register,0,0);
  149. href.index:=left.location.register;
  150. href.scalefactor:=int64(right.location.value)-1;
  151. left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  152. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_LEA,TCgSize2OpSize[opsize],href,left.location.register));
  153. end
  154. else
  155. begin
  156. if extra_not then
  157. begin
  158. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  159. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  160. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  161. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  162. end
  163. else
  164. begin
  165. emit_op_right_left(op,opsize);
  166. end;
  167. end;
  168. end;
  169. end;
  170. { only in case of overflow operations }
  171. { produce overflow code }
  172. { we must put it here directly, because sign of operation }
  173. { is in unsigned VAR!! }
  174. if mboverflow then
  175. begin
  176. if cs_check_overflow in current_settings.localswitches then
  177. begin
  178. current_asmdata.getjumplabel(hl4);
  179. if unsigned then
  180. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  181. else
  182. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  183. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  184. cg.a_label(current_asmdata.CurrAsmList,hl4);
  185. end;
  186. end;
  187. end;
  188. procedure tx86addnode.left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  189. begin
  190. { left location is not a register? }
  191. if (left.location.loc<>LOC_REGISTER) then
  192. begin
  193. { if right is register then we can swap the locations }
  194. if (not noswap) and
  195. (right.location.loc=LOC_REGISTER) then
  196. begin
  197. location_swap(left.location,right.location);
  198. toggleflag(nf_swapped);
  199. end
  200. else
  201. begin
  202. { maybe we can reuse a constant register when the
  203. operation is a comparison that doesn't change the
  204. value of the register }
  205. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  206. end;
  207. end;
  208. if (right.location.loc<>LOC_CONSTANT) and
  209. (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then
  210. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  211. if (left.location.loc<>LOC_CONSTANT) and
  212. (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then
  213. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  214. end;
  215. procedure tx86addnode.force_left_and_right_fpureg;
  216. begin
  217. if (right.location.loc<>LOC_FPUREGISTER) then
  218. begin
  219. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  220. if (left.location.loc<>LOC_FPUREGISTER) then
  221. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  222. else
  223. { left was on the stack => swap }
  224. toggleflag(nf_swapped);
  225. end
  226. { the nominator in st0 }
  227. else if (left.location.loc<>LOC_FPUREGISTER) then
  228. begin
  229. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  230. end
  231. else
  232. begin
  233. { fpu operands are always in the wrong order on the stack }
  234. toggleflag(nf_swapped);
  235. end;
  236. end;
  237. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  238. {$ifdef x86_64}
  239. var
  240. tmpreg : tregister;
  241. {$endif x86_64}
  242. begin
  243. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  244. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  245. { left must be a register }
  246. case right.location.loc of
  247. LOC_REGISTER,
  248. LOC_CREGISTER :
  249. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  250. LOC_REFERENCE,
  251. LOC_CREFERENCE :
  252. begin
  253. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  254. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  255. end;
  256. LOC_CONSTANT :
  257. begin
  258. {$ifdef x86_64}
  259. { x86_64 only supports signed 32 bits constants directly }
  260. if (opsize in [OS_S64,OS_64]) and
  261. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  262. begin
  263. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  264. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  265. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  266. end
  267. else
  268. {$endif x86_64}
  269. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  270. end;
  271. else
  272. internalerror(200203232);
  273. end;
  274. end;
  275. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  276. begin
  277. case nodetype of
  278. equaln : getresflags:=F_E;
  279. unequaln : getresflags:=F_NE;
  280. else
  281. if not(unsigned) then
  282. begin
  283. if nf_swapped in flags then
  284. case nodetype of
  285. ltn : getresflags:=F_G;
  286. lten : getresflags:=F_GE;
  287. gtn : getresflags:=F_L;
  288. gten : getresflags:=F_LE;
  289. else
  290. internalerror(2013120105);
  291. end
  292. else
  293. case nodetype of
  294. ltn : getresflags:=F_L;
  295. lten : getresflags:=F_LE;
  296. gtn : getresflags:=F_G;
  297. gten : getresflags:=F_GE;
  298. else
  299. internalerror(2013120106);
  300. end;
  301. end
  302. else
  303. begin
  304. if nf_swapped in flags then
  305. case nodetype of
  306. ltn : getresflags:=F_A;
  307. lten : getresflags:=F_AE;
  308. gtn : getresflags:=F_B;
  309. gten : getresflags:=F_BE;
  310. else
  311. internalerror(2013120107);
  312. end
  313. else
  314. case nodetype of
  315. ltn : getresflags:=F_B;
  316. lten : getresflags:=F_BE;
  317. gtn : getresflags:=F_A;
  318. gten : getresflags:=F_AE;
  319. else
  320. internalerror(2013120108);
  321. end;
  322. end;
  323. end;
  324. end;
  325. {*****************************************************************************
  326. AddSmallSet
  327. *****************************************************************************}
  328. {$ifndef i8086}
  329. procedure tx86addnode.second_addsmallset;
  330. var
  331. setbase : aint;
  332. opdef : tdef;
  333. opsize : TCGSize;
  334. op : TAsmOp;
  335. extra_not,
  336. noswap : boolean;
  337. all_member_optimization:boolean;
  338. begin
  339. pass_left_right;
  340. noswap:=false;
  341. extra_not:=false;
  342. all_member_optimization:=false;
  343. opdef:=resultdef;
  344. opsize:=int_cgsize(opdef.size);
  345. if (left.resultdef.typ=setdef) then
  346. setbase:=tsetdef(left.resultdef).setbase
  347. else
  348. setbase:=tsetdef(right.resultdef).setbase;
  349. case nodetype of
  350. addn :
  351. begin
  352. { adding elements is not commutative }
  353. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  354. swapleftright;
  355. { are we adding set elements ? }
  356. if right.nodetype=setelementn then
  357. begin
  358. { no range support for smallsets! }
  359. if assigned(tsetelementnode(right).right) then
  360. internalerror(43244);
  361. { btsb isn't supported }
  362. if opsize=OS_8 then
  363. begin
  364. opsize:=OS_32;
  365. opdef:=u32inttype;
  366. end;
  367. { bts requires both elements to be registers }
  368. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  369. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  370. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,right.location,setbase);
  371. op:=A_BTS;
  372. noswap:=true;
  373. end
  374. else
  375. op:=A_OR;
  376. end;
  377. symdifn :
  378. op:=A_XOR;
  379. muln :
  380. op:=A_AND;
  381. subn :
  382. begin
  383. op:=A_AND;
  384. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  385. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  386. all_member_optimization:=true;
  387. if (not(nf_swapped in flags)) and
  388. (right.location.loc=LOC_CONSTANT) then
  389. right.location.value := not(right.location.value)
  390. else if (nf_swapped in flags) and
  391. (left.location.loc=LOC_CONSTANT) then
  392. left.location.value := not(left.location.value)
  393. else
  394. extra_not:=true;
  395. end;
  396. xorn :
  397. op:=A_XOR;
  398. orn :
  399. op:=A_OR;
  400. andn :
  401. op:=A_AND;
  402. else
  403. internalerror(2003042215);
  404. end;
  405. if all_member_optimization then
  406. begin
  407. {A set expression [0..31]-x can be implemented with a simple NOT.}
  408. if nf_swapped in flags then
  409. begin
  410. { newly swapped also set swapped flag }
  411. location_swap(left.location,right.location);
  412. toggleflag(nf_swapped);
  413. end;
  414. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,false);
  415. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  416. location:=right.location;
  417. end
  418. else
  419. begin
  420. { left must be a register }
  421. left_must_be_reg(opdef,opsize,noswap);
  422. emit_generic_code(op,opsize,true,extra_not,false);
  423. location_freetemp(current_asmdata.CurrAsmList,right.location);
  424. { left is always a register and contains the result }
  425. location:=left.location;
  426. end;
  427. { fix the changed opsize we did above because of the missing btsb }
  428. if opsize<>int_cgsize(resultdef.size) then
  429. hlcg.location_force_reg(current_asmdata.CurrAsmList,location,opdef,cgsize_orddef(int_cgsize(resultdef.size)),false);
  430. end;
  431. {$endif not i8086}
  432. procedure tx86addnode.second_cmpsmallset;
  433. var
  434. opdef : tdef;
  435. opsize : TCGSize;
  436. op : TAsmOp;
  437. begin
  438. pass_left_right;
  439. opdef:=left.resultdef;
  440. opsize:=int_cgsize(opdef.size);
  441. case nodetype of
  442. equaln,
  443. unequaln :
  444. op:=A_CMP;
  445. lten,gten:
  446. begin
  447. if (not(nf_swapped in flags) and (nodetype = lten)) or
  448. ((nf_swapped in flags) and (nodetype = gten)) then
  449. swapleftright;
  450. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  451. emit_op_right_left(A_AND,opsize);
  452. op:=A_CMP;
  453. { warning: ugly hack, we need a JE so change the node to equaln }
  454. nodetype:=equaln;
  455. end;
  456. else
  457. internalerror(2003042215);
  458. end;
  459. { left must be a register }
  460. left_must_be_reg(opdef,opsize,false);
  461. emit_generic_code(op,opsize,true,false,false);
  462. location_freetemp(current_asmdata.CurrAsmList,right.location);
  463. location_freetemp(current_asmdata.CurrAsmList,left.location);
  464. location_reset(location,LOC_FLAGS,OS_NO);
  465. location.resflags:=getresflags(true);
  466. end;
  467. {*****************************************************************************
  468. AddMMX
  469. *****************************************************************************}
  470. {$ifdef SUPPORT_MMX}
  471. procedure tx86addnode.second_opmmx;
  472. var
  473. op : TAsmOp;
  474. cmpop : boolean;
  475. mmxbase : tmmxtype;
  476. hreg,
  477. hregister : tregister;
  478. begin
  479. pass_left_right;
  480. cmpop:=false;
  481. mmxbase:=mmx_type(left.resultdef);
  482. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  483. case nodetype of
  484. addn :
  485. begin
  486. if (cs_mmx_saturation in current_settings.localswitches) then
  487. begin
  488. case mmxbase of
  489. mmxs8bit:
  490. op:=A_PADDSB;
  491. mmxu8bit:
  492. op:=A_PADDUSB;
  493. mmxs16bit,mmxfixed16:
  494. op:=A_PADDSW;
  495. mmxu16bit:
  496. op:=A_PADDUSW;
  497. end;
  498. end
  499. else
  500. begin
  501. case mmxbase of
  502. mmxs8bit,mmxu8bit:
  503. op:=A_PADDB;
  504. mmxs16bit,mmxu16bit,mmxfixed16:
  505. op:=A_PADDW;
  506. mmxs32bit,mmxu32bit:
  507. op:=A_PADDD;
  508. end;
  509. end;
  510. end;
  511. muln :
  512. begin
  513. case mmxbase of
  514. mmxs16bit,mmxu16bit:
  515. op:=A_PMULLW;
  516. mmxfixed16:
  517. op:=A_PMULHW;
  518. end;
  519. end;
  520. subn :
  521. begin
  522. if (cs_mmx_saturation in current_settings.localswitches) then
  523. begin
  524. case mmxbase of
  525. mmxs8bit:
  526. op:=A_PSUBSB;
  527. mmxu8bit:
  528. op:=A_PSUBUSB;
  529. mmxs16bit,mmxfixed16:
  530. op:=A_PSUBSB;
  531. mmxu16bit:
  532. op:=A_PSUBUSW;
  533. end;
  534. end
  535. else
  536. begin
  537. case mmxbase of
  538. mmxs8bit,mmxu8bit:
  539. op:=A_PSUBB;
  540. mmxs16bit,mmxu16bit,mmxfixed16:
  541. op:=A_PSUBW;
  542. mmxs32bit,mmxu32bit:
  543. op:=A_PSUBD;
  544. end;
  545. end;
  546. end;
  547. xorn:
  548. op:=A_PXOR;
  549. orn:
  550. op:=A_POR;
  551. andn:
  552. op:=A_PAND;
  553. else
  554. internalerror(2003042214);
  555. end;
  556. { left and right no register? }
  557. { then one must be demanded }
  558. if (left.location.loc<>LOC_MMXREGISTER) then
  559. begin
  560. if (right.location.loc=LOC_MMXREGISTER) then
  561. begin
  562. location_swap(left.location,right.location);
  563. toggleflag(nf_swapped);
  564. end
  565. else
  566. begin
  567. { register variable ? }
  568. if (left.location.loc=LOC_CMMXREGISTER) then
  569. begin
  570. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  571. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  572. end
  573. else
  574. begin
  575. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  576. internalerror(200203245);
  577. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  578. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  579. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  580. end;
  581. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  582. left.location.register:=hregister;
  583. end;
  584. end;
  585. { at this point, left.location.loc should be LOC_MMXREGISTER }
  586. if right.location.loc<>LOC_MMXREGISTER then
  587. begin
  588. if (nodetype=subn) and (nf_swapped in flags) then
  589. begin
  590. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  591. if right.location.loc=LOC_CMMXREGISTER then
  592. begin
  593. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  594. emit_reg_reg(op,S_NO,left.location.register,hreg);
  595. end
  596. else
  597. begin
  598. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  599. internalerror(200203247);
  600. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  601. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  602. emit_reg_reg(op,S_NO,left.location.register,hreg);
  603. end;
  604. location.register:=hreg;
  605. end
  606. else
  607. begin
  608. if (right.location.loc=LOC_CMMXREGISTER) then
  609. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  610. else
  611. begin
  612. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  613. internalerror(200203246);
  614. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  615. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  616. end;
  617. location.register:=left.location.register;
  618. end;
  619. end
  620. else
  621. begin
  622. { right.location=LOC_MMXREGISTER }
  623. if (nodetype=subn) and (nf_swapped in flags) then
  624. begin
  625. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  626. location_swap(left.location,right.location);
  627. toggleflag(nf_swapped);
  628. end
  629. else
  630. begin
  631. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  632. end;
  633. location.register:=left.location.register;
  634. end;
  635. location_freetemp(current_asmdata.CurrAsmList,right.location);
  636. if cmpop then
  637. location_freetemp(current_asmdata.CurrAsmList,left.location);
  638. end;
  639. {$endif SUPPORT_MMX}
  640. {*****************************************************************************
  641. AddFloat
  642. *****************************************************************************}
  643. procedure tx86addnode.second_addfloatsse;
  644. var
  645. op : topcg;
  646. sqr_sum : boolean;
  647. tmp : tnode;
  648. begin
  649. sqr_sum:=false;
  650. if (current_settings.fputype>=fpu_sse3) and
  651. use_vectorfpu(resultdef) and
  652. (nodetype in [addn,subn]) and
  653. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  654. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  655. begin
  656. sqr_sum:=true;
  657. tmp:=tinlinenode(left).left;
  658. tinlinenode(left).left:=nil;
  659. left.free;
  660. left:=tmp;
  661. tmp:=tinlinenode(right).left;
  662. tinlinenode(right).left:=nil;
  663. right.free;
  664. right:=tmp;
  665. end;
  666. pass_left_right;
  667. { fpu operands are always in reversed order on the stack }
  668. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  669. toggleflag(nf_swapped);
  670. if (nf_swapped in flags) then
  671. { can't use swapleftright if both are on the fpu stack, since then }
  672. { both are "R_ST" -> nothing would change -> manually switch }
  673. if (left.location.loc = LOC_FPUREGISTER) and
  674. (right.location.loc = LOC_FPUREGISTER) then
  675. emit_none(A_FXCH,S_NO)
  676. else
  677. swapleftright;
  678. case nodetype of
  679. addn :
  680. op:=OP_ADD;
  681. muln :
  682. op:=OP_MUL;
  683. subn :
  684. op:=OP_SUB;
  685. slashn :
  686. op:=OP_DIV;
  687. else
  688. internalerror(200312231);
  689. end;
  690. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  691. if sqr_sum then
  692. begin
  693. if nf_swapped in flags then
  694. swapleftright;
  695. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  696. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  697. location:=left.location;
  698. if is_double(resultdef) then
  699. begin
  700. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  701. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  702. case nodetype of
  703. addn:
  704. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  705. subn:
  706. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  707. else
  708. internalerror(201108162);
  709. end;
  710. end
  711. else
  712. begin
  713. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  714. { ensure that bits 64..127 contain valid values }
  715. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  716. { the data is now in bits 0..32 and 64..95 }
  717. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  718. case nodetype of
  719. addn:
  720. begin
  721. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  722. end;
  723. subn:
  724. begin
  725. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  726. end;
  727. else
  728. internalerror(201108163);
  729. end;
  730. end
  731. end
  732. { we can use only right as left operand if the operation is commutative }
  733. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  734. begin
  735. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  736. cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.location.size,location.size,right.location.register,location.register,mms_movescalar);
  737. { force floating point reg. location to be written to memory,
  738. we don't force it to mm register because writing to memory
  739. allows probably shorter code because there is no direct fpu->mm register
  740. copy instruction
  741. }
  742. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  743. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  744. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  745. end
  746. else
  747. begin
  748. if nf_swapped in flags then
  749. swapleftright;
  750. { force floating point reg. location to be written to memory,
  751. we don't force it to mm register because writing to memory
  752. allows probably shorter code because there is no direct fpu->mm register
  753. copy instruction
  754. }
  755. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  756. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  757. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  758. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  759. { force floating point reg. location to be written to memory,
  760. we don't force it to mm register because writing to memory
  761. allows probably shorter code because there is no direct fpu->mm register
  762. copy instruction
  763. }
  764. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  765. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  766. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  767. end;
  768. end;
  769. procedure tx86addnode.second_addfloatavx;
  770. var
  771. op : topcg;
  772. sqr_sum : boolean;
  773. tmp : tnode;
  774. begin
  775. sqr_sum:=false;
  776. {$ifdef dummy}
  777. if (current_settings.fputype>=fpu_sse3) and
  778. use_vectorfpu(resultdef) and
  779. (nodetype in [addn,subn]) and
  780. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  781. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  782. begin
  783. sqr_sum:=true;
  784. tmp:=tinlinenode(left).left;
  785. tinlinenode(left).left:=nil;
  786. left.free;
  787. left:=tmp;
  788. tmp:=tinlinenode(right).left;
  789. tinlinenode(right).left:=nil;
  790. right.free;
  791. right:=tmp;
  792. end;
  793. {$endif dummy}
  794. pass_left_right;
  795. { fpu operands are always in reversed order on the stack }
  796. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  797. toggleflag(nf_swapped);
  798. if (nf_swapped in flags) then
  799. { can't use swapleftright if both are on the fpu stack, since then }
  800. { both are "R_ST" -> nothing would change -> manually switch }
  801. if (left.location.loc = LOC_FPUREGISTER) and
  802. (right.location.loc = LOC_FPUREGISTER) then
  803. emit_none(A_FXCH,S_NO)
  804. else
  805. swapleftright;
  806. case nodetype of
  807. addn :
  808. op:=OP_ADD;
  809. muln :
  810. op:=OP_MUL;
  811. subn :
  812. op:=OP_SUB;
  813. slashn :
  814. op:=OP_DIV;
  815. else
  816. internalerror(200312231);
  817. end;
  818. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  819. if sqr_sum then
  820. begin
  821. if nf_swapped in flags then
  822. swapleftright;
  823. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  824. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  825. location:=left.location;
  826. if is_double(resultdef) then
  827. begin
  828. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  829. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  830. case nodetype of
  831. addn:
  832. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  833. subn:
  834. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  835. else
  836. internalerror(201108162);
  837. end;
  838. end
  839. else
  840. begin
  841. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  842. { ensure that bits 64..127 contain valid values }
  843. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  844. { the data is now in bits 0..32 and 64..95 }
  845. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  846. case nodetype of
  847. addn:
  848. begin
  849. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  850. end;
  851. subn:
  852. begin
  853. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  854. end;
  855. else
  856. internalerror(201108163);
  857. end;
  858. end
  859. end
  860. { left*2 ? }
  861. else if (nodetype=muln) and is_constrealnode(right) and is_number_float(trealconstnode(right).value_real) and (trealconstnode(right).value_real=2) then
  862. begin
  863. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  864. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  865. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  866. left.location.register,
  867. left.location.register,
  868. location.register,
  869. mms_movescalar);
  870. end
  871. { right*2 ? }
  872. else if (nodetype=muln) and is_constrealnode(left) and is_number_float(trealconstnode(left).value_real) and (trealconstnode(left).value_real=2) then
  873. begin
  874. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,right.location.size);
  875. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  876. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  877. right.location.register,
  878. right.location.register,
  879. location.register,
  880. mms_movescalar);
  881. end
  882. { we can use only right as left operand if the operation is commutative }
  883. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  884. begin
  885. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  886. { force floating point reg. location to be written to memory,
  887. we don't force it to mm register because writing to memory
  888. allows probably shorter code because there is no direct fpu->mm register
  889. copy instruction
  890. }
  891. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  892. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  893. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  894. left.location,
  895. right.location.register,
  896. location.register,
  897. mms_movescalar);
  898. end
  899. else
  900. begin
  901. if (nf_swapped in flags) then
  902. swapleftright;
  903. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  904. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  905. { force floating point reg. location to be written to memory,
  906. we don't force it to mm register because writing to memory
  907. allows probably shorter code because there is no direct fpu->mm register
  908. copy instruction
  909. }
  910. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  911. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  912. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  913. right.location,
  914. left.location.register,
  915. location.register,
  916. mms_movescalar);
  917. end;
  918. end;
  919. procedure tx86addnode.second_cmpfloatvector;
  920. var
  921. op : tasmop;
  922. const
  923. ops_single: array[boolean] of tasmop = (A_COMISS,A_VCOMISS);
  924. ops_double: array[boolean] of tasmop = (A_COMISD,A_VCOMISD);
  925. begin
  926. if is_single(left.resultdef) then
  927. op:=ops_single[UseAVX]
  928. else if is_double(left.resultdef) then
  929. op:=ops_double[UseAVX]
  930. else
  931. internalerror(200402222);
  932. pass_left_right;
  933. location_reset(location,LOC_FLAGS,OS_NO);
  934. { Direct move fpu->mm register is not possible, so force any fpu operands to
  935. memory (not to mm registers because one of the memory locations can be used
  936. directly in compare instruction, yielding shorter code) }
  937. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  938. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  939. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  940. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  941. if (right.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  942. begin
  943. case left.location.loc of
  944. LOC_REFERENCE,LOC_CREFERENCE:
  945. begin
  946. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  947. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  948. end;
  949. LOC_MMREGISTER,LOC_CMMREGISTER:
  950. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  951. else
  952. internalerror(200402221);
  953. end;
  954. toggleflag(nf_swapped);
  955. end
  956. else
  957. begin
  958. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  959. case right.location.loc of
  960. LOC_REFERENCE,LOC_CREFERENCE:
  961. begin
  962. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  963. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  964. end;
  965. LOC_MMREGISTER,LOC_CMMREGISTER:
  966. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  967. else
  968. internalerror(200402223);
  969. end;
  970. end;
  971. location.resflags:=getresflags(true);
  972. location_freetemp(current_asmdata.CurrAsmList,left.location);
  973. location_freetemp(current_asmdata.CurrAsmList,right.location);
  974. end;
  975. procedure tx86addnode.second_opvector;
  976. var
  977. op : topcg;
  978. begin
  979. pass_left_right;
  980. if (nf_swapped in flags) then
  981. swapleftright;
  982. case nodetype of
  983. addn :
  984. op:=OP_ADD;
  985. muln :
  986. op:=OP_MUL;
  987. subn :
  988. op:=OP_SUB;
  989. slashn :
  990. op:=OP_DIV;
  991. else
  992. internalerror(200610071);
  993. end;
  994. if fits_in_mm_register(left.resultdef) then
  995. begin
  996. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  997. { we can use only right as left operand if the operation is commutative }
  998. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  999. begin
  1000. location.register:=right.location.register;
  1001. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  1002. end
  1003. else
  1004. begin
  1005. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  1006. location.register:=left.location.register;
  1007. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  1008. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  1009. end;
  1010. end
  1011. else
  1012. begin
  1013. { not yet supported }
  1014. internalerror(200610072);
  1015. end
  1016. end;
  1017. procedure tx86addnode.second_addfloat;
  1018. var
  1019. op : TAsmOp;
  1020. begin
  1021. if use_vectorfpu(resultdef) then
  1022. begin
  1023. if UseAVX then
  1024. second_addfloatavx
  1025. else
  1026. second_addfloatsse;
  1027. exit;
  1028. end;
  1029. pass_left_right;
  1030. case nodetype of
  1031. addn :
  1032. op:=A_FADDP;
  1033. muln :
  1034. op:=A_FMULP;
  1035. subn :
  1036. op:=A_FSUBP;
  1037. slashn :
  1038. op:=A_FDIVP;
  1039. else
  1040. internalerror(2003042214);
  1041. end;
  1042. force_left_and_right_fpureg;
  1043. { if we swaped the tree nodes, then use the reverse operator }
  1044. if nf_swapped in flags then
  1045. begin
  1046. if (nodetype=slashn) then
  1047. op:=A_FDIVRP
  1048. else if (nodetype=subn) then
  1049. op:=A_FSUBRP;
  1050. end;
  1051. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  1052. tcgx86(cg).dec_fpu_stack;
  1053. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  1054. location.register:=NR_ST;
  1055. end;
  1056. procedure tx86addnode.second_cmpfloat;
  1057. {$ifdef i8086}
  1058. var
  1059. tmpref: treference;
  1060. {$endif i8086}
  1061. begin
  1062. if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
  1063. begin
  1064. second_cmpfloatvector;
  1065. exit;
  1066. end;
  1067. pass_left_right;
  1068. force_left_and_right_fpureg;
  1069. {$ifndef x86_64}
  1070. if current_settings.cputype<cpu_Pentium2 then
  1071. begin
  1072. emit_none(A_FCOMPP,S_NO);
  1073. tcgx86(cg).dec_fpu_stack;
  1074. tcgx86(cg).dec_fpu_stack;
  1075. { load fpu flags }
  1076. {$ifdef i8086}
  1077. if current_settings.cputype < cpu_286 then
  1078. begin
  1079. tg.gettemp(current_asmdata.CurrAsmList,2,2,tt_normal,tmpref);
  1080. emit_ref(A_FSTSW,S_NO,tmpref);
  1081. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1082. inc(tmpref.offset);
  1083. emit_ref_reg(A_MOV,S_B,tmpref,NR_AH);
  1084. dec(tmpref.offset);
  1085. emit_none(A_SAHF,S_NO);
  1086. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1087. tg.ungettemp(current_asmdata.CurrAsmList,tmpref);
  1088. end
  1089. else
  1090. {$endif i8086}
  1091. begin
  1092. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1093. emit_reg(A_FNSTSW,S_NO,NR_AX);
  1094. emit_none(A_SAHF,S_NO);
  1095. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1096. end;
  1097. end
  1098. else
  1099. {$endif x86_64}
  1100. begin
  1101. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  1102. { fcomip pops only one fpu register }
  1103. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  1104. tcgx86(cg).dec_fpu_stack;
  1105. tcgx86(cg).dec_fpu_stack;
  1106. end;
  1107. location_reset(location,LOC_FLAGS,OS_NO);
  1108. location.resflags:=getresflags(true);
  1109. end;
  1110. {*****************************************************************************
  1111. Add64bit
  1112. *****************************************************************************}
  1113. procedure tx86addnode.second_add64bit;
  1114. begin
  1115. {$ifdef cpu64bitalu}
  1116. second_addordinal;
  1117. {$else cpu64bitalu}
  1118. { must be implemented separate }
  1119. internalerror(200402042);
  1120. {$endif cpu64bitalu}
  1121. end;
  1122. procedure tx86addnode.second_cmp64bit;
  1123. begin
  1124. {$ifdef cpu64bitalu}
  1125. second_cmpordinal;
  1126. {$else cpu64bitalu}
  1127. { must be implemented separate }
  1128. internalerror(200402043);
  1129. {$endif cpu64bitalu}
  1130. end;
  1131. {*****************************************************************************
  1132. AddOrdinal
  1133. *****************************************************************************}
  1134. procedure tx86addnode.second_cmpordinal;
  1135. var
  1136. opdef : tdef;
  1137. opsize : tcgsize;
  1138. unsigned : boolean;
  1139. begin
  1140. unsigned:=not(is_signed(left.resultdef)) or
  1141. not(is_signed(right.resultdef));
  1142. opdef:=left.resultdef;
  1143. opsize:=def_cgsize(opdef);
  1144. pass_left_right;
  1145. if (right.location.loc=LOC_CONSTANT) and
  1146. (left.location.loc in [LOC_REFERENCE, LOC_CREFERENCE])
  1147. {$ifdef x86_64}
  1148. and ((not (opsize in [OS_64,OS_S64])) or (
  1149. (right.location.value>=low(longint)) and (right.location.value<=high(longint))
  1150. ))
  1151. {$endif x86_64}
  1152. then
  1153. begin
  1154. emit_const_ref(A_CMP, TCGSize2Opsize[opsize], right.location.value, left.location.reference);
  1155. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1156. end
  1157. else
  1158. begin
  1159. left_must_be_reg(opdef,opsize,false);
  1160. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  1161. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1162. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1163. end;
  1164. location_reset(location,LOC_FLAGS,OS_NO);
  1165. location.resflags:=getresflags(unsigned);
  1166. end;
  1167. begin
  1168. caddnode:=tx86addnode;
  1169. end.