2
0

nx86add.pas 40 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. symtype,
  22. cgbase,
  23. cpubase,
  24. node,nadd,ncgadd;
  25. type
  26. tx86addnode = class(tcgaddnode)
  27. protected
  28. function getresflags(unsigned : boolean) : tresflags;
  29. procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  30. procedure check_left_and_right_fpureg(force_fpureg: boolean);
  31. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  32. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  33. procedure second_cmpfloatsse;
  34. procedure second_addfloatsse;
  35. public
  36. procedure second_addfloat;override;
  37. procedure second_addsmallset;override;
  38. procedure second_add64bit;override;
  39. procedure second_cmpfloat;override;
  40. procedure second_cmpsmallset;override;
  41. procedure second_cmp64bit;override;
  42. procedure second_cmpordinal;override;
  43. {$ifdef SUPPORT_MMX}
  44. procedure second_opmmx;override;
  45. {$endif SUPPORT_MMX}
  46. procedure second_opvector;override;
  47. end;
  48. implementation
  49. uses
  50. globtype,globals,
  51. verbose,cutils,
  52. cpuinfo,
  53. aasmbase,aasmtai,aasmdata,aasmcpu,
  54. symconst,symdef,
  55. cgobj,hlcgobj,cgx86,cga,cgutils,
  56. paramgr,tgobj,ncgutil,
  57. ncon,nset,ninl,
  58. defutil;
  59. {*****************************************************************************
  60. Helpers
  61. *****************************************************************************}
  62. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  63. var
  64. power : longint;
  65. hl4 : tasmlabel;
  66. r : Tregister;
  67. begin
  68. { at this point, left.location.loc should be LOC_REGISTER }
  69. if right.location.loc=LOC_REGISTER then
  70. begin
  71. { right.location is a LOC_REGISTER }
  72. { when swapped another result register }
  73. if (nodetype=subn) and (nf_swapped in flags) then
  74. begin
  75. if extra_not then
  76. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  77. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  78. { newly swapped also set swapped flag }
  79. location_swap(left.location,right.location);
  80. toggleflag(nf_swapped);
  81. end
  82. else
  83. begin
  84. if extra_not then
  85. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  86. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  87. location_swap(left.location,right.location);
  88. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  89. end;
  90. end
  91. else
  92. begin
  93. { right.location is not a LOC_REGISTER }
  94. if (nodetype=subn) and (nf_swapped in flags) then
  95. begin
  96. if extra_not then
  97. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  98. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  99. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,hlcg.tcgsize2orddef(opsize),right.location,r);
  100. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  101. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  102. end
  103. else
  104. begin
  105. { Optimizations when right.location is a constant value }
  106. if (op=A_CMP) and
  107. (nodetype in [equaln,unequaln]) and
  108. (right.location.loc=LOC_CONSTANT) and
  109. (right.location.value=0) then
  110. begin
  111. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  112. end
  113. else
  114. if (op=A_ADD) and
  115. (right.location.loc=LOC_CONSTANT) and
  116. (right.location.value=1) and
  117. not(cs_check_overflow in current_settings.localswitches) then
  118. begin
  119. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  120. end
  121. else
  122. if (op=A_SUB) and
  123. (right.location.loc=LOC_CONSTANT) and
  124. (right.location.value=1) and
  125. not(cs_check_overflow in current_settings.localswitches) then
  126. begin
  127. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  128. end
  129. else
  130. if (op=A_IMUL) and
  131. (right.location.loc=LOC_CONSTANT) and
  132. (ispowerof2(int64(right.location.value),power)) and
  133. not(cs_check_overflow in current_settings.localswitches) then
  134. begin
  135. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  136. end
  137. else
  138. begin
  139. if extra_not then
  140. begin
  141. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  142. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,hlcg.tcgsize2orddef(opsize),right.location,r);
  143. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  144. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  145. end
  146. else
  147. begin
  148. emit_op_right_left(op,opsize);
  149. end;
  150. end;
  151. end;
  152. end;
  153. { only in case of overflow operations }
  154. { produce overflow code }
  155. { we must put it here directly, because sign of operation }
  156. { is in unsigned VAR!! }
  157. if mboverflow then
  158. begin
  159. if cs_check_overflow in current_settings.localswitches then
  160. begin
  161. current_asmdata.getjumplabel(hl4);
  162. if unsigned then
  163. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  164. else
  165. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  166. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  167. cg.a_label(current_asmdata.CurrAsmList,hl4);
  168. end;
  169. end;
  170. end;
  171. procedure tx86addnode.left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  172. begin
  173. { left location is not a register? }
  174. if (left.location.loc<>LOC_REGISTER) then
  175. begin
  176. { if right is register then we can swap the locations }
  177. if (not noswap) and
  178. (right.location.loc=LOC_REGISTER) then
  179. begin
  180. location_swap(left.location,right.location);
  181. toggleflag(nf_swapped);
  182. end
  183. else
  184. begin
  185. { maybe we can reuse a constant register when the
  186. operation is a comparison that doesn't change the
  187. value of the register }
  188. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  189. end;
  190. end;
  191. if (right.location.loc<>LOC_CONSTANT) and
  192. (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then
  193. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  194. if (left.location.loc<>LOC_CONSTANT) and
  195. (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then
  196. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  197. end;
  198. procedure tx86addnode.check_left_and_right_fpureg(force_fpureg: boolean);
  199. begin
  200. if (right.location.loc<>LOC_FPUREGISTER) then
  201. begin
  202. if (force_fpureg) then
  203. begin
  204. location_force_fpureg(current_asmdata.CurrAsmList,right.location,false);
  205. if (left.location.loc<>LOC_FPUREGISTER) then
  206. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  207. else
  208. { left was on the stack => swap }
  209. toggleflag(nf_swapped);
  210. end
  211. end
  212. { the nominator in st0 }
  213. else if (left.location.loc<>LOC_FPUREGISTER) then
  214. begin
  215. if (force_fpureg) then
  216. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  217. end
  218. else
  219. begin
  220. { fpu operands are always in the wrong order on the stack }
  221. toggleflag(nf_swapped);
  222. end;
  223. end;
  224. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  225. {$ifdef x86_64}
  226. var
  227. tmpreg : tregister;
  228. {$endif x86_64}
  229. begin
  230. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  231. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  232. { left must be a register }
  233. case right.location.loc of
  234. LOC_REGISTER,
  235. LOC_CREGISTER :
  236. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  237. LOC_REFERENCE,
  238. LOC_CREFERENCE :
  239. begin
  240. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  241. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  242. end;
  243. LOC_CONSTANT :
  244. begin
  245. {$ifdef x86_64}
  246. { x86_64 only supports signed 32 bits constants directly }
  247. if (opsize in [OS_S64,OS_64]) and
  248. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  249. begin
  250. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  251. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  252. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  253. end
  254. else
  255. {$endif x86_64}
  256. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  257. end;
  258. else
  259. internalerror(200203232);
  260. end;
  261. end;
  262. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  263. begin
  264. case nodetype of
  265. equaln : getresflags:=F_E;
  266. unequaln : getresflags:=F_NE;
  267. else
  268. if not(unsigned) then
  269. begin
  270. if nf_swapped in flags then
  271. case nodetype of
  272. ltn : getresflags:=F_G;
  273. lten : getresflags:=F_GE;
  274. gtn : getresflags:=F_L;
  275. gten : getresflags:=F_LE;
  276. end
  277. else
  278. case nodetype of
  279. ltn : getresflags:=F_L;
  280. lten : getresflags:=F_LE;
  281. gtn : getresflags:=F_G;
  282. gten : getresflags:=F_GE;
  283. end;
  284. end
  285. else
  286. begin
  287. if nf_swapped in flags then
  288. case nodetype of
  289. ltn : getresflags:=F_A;
  290. lten : getresflags:=F_AE;
  291. gtn : getresflags:=F_B;
  292. gten : getresflags:=F_BE;
  293. end
  294. else
  295. case nodetype of
  296. ltn : getresflags:=F_B;
  297. lten : getresflags:=F_BE;
  298. gtn : getresflags:=F_A;
  299. gten : getresflags:=F_AE;
  300. end;
  301. end;
  302. end;
  303. end;
  304. {*****************************************************************************
  305. AddSmallSet
  306. *****************************************************************************}
  307. procedure tx86addnode.second_addsmallset;
  308. var
  309. setbase : aint;
  310. opdef : tdef;
  311. opsize : TCGSize;
  312. op : TAsmOp;
  313. extra_not,
  314. noswap : boolean;
  315. all_member_optimization:boolean;
  316. begin
  317. pass_left_right;
  318. noswap:=false;
  319. extra_not:=false;
  320. all_member_optimization:=false;
  321. opdef:=resultdef;
  322. opsize:=int_cgsize(opdef.size);
  323. if (left.resultdef.typ=setdef) then
  324. setbase:=tsetdef(left.resultdef).setbase
  325. else
  326. setbase:=tsetdef(right.resultdef).setbase;
  327. case nodetype of
  328. addn :
  329. begin
  330. { adding elements is not commutative }
  331. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  332. swapleftright;
  333. { are we adding set elements ? }
  334. if right.nodetype=setelementn then
  335. begin
  336. { no range support for smallsets! }
  337. if assigned(tsetelementnode(right).right) then
  338. internalerror(43244);
  339. { btsb isn't supported }
  340. if opsize=OS_8 then
  341. begin
  342. opsize:=OS_32;
  343. opdef:=u32inttype;
  344. end;
  345. { bts requires both elements to be registers }
  346. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  347. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  348. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,right.location,setbase);
  349. op:=A_BTS;
  350. noswap:=true;
  351. end
  352. else
  353. op:=A_OR;
  354. end;
  355. symdifn :
  356. op:=A_XOR;
  357. muln :
  358. op:=A_AND;
  359. subn :
  360. begin
  361. op:=A_AND;
  362. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  363. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  364. all_member_optimization:=true;
  365. if (not(nf_swapped in flags)) and
  366. (right.location.loc=LOC_CONSTANT) then
  367. right.location.value := not(right.location.value)
  368. else if (nf_swapped in flags) and
  369. (left.location.loc=LOC_CONSTANT) then
  370. left.location.value := not(left.location.value)
  371. else
  372. extra_not:=true;
  373. end;
  374. xorn :
  375. op:=A_XOR;
  376. orn :
  377. op:=A_OR;
  378. andn :
  379. op:=A_AND;
  380. else
  381. internalerror(2003042215);
  382. end;
  383. if all_member_optimization then
  384. begin
  385. {A set expression [0..31]-x can be implemented with a simple NOT.}
  386. if nf_swapped in flags then
  387. begin
  388. { newly swapped also set swapped flag }
  389. location_swap(left.location,right.location);
  390. toggleflag(nf_swapped);
  391. end;
  392. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,false);
  393. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  394. location:=right.location;
  395. end
  396. else
  397. begin
  398. { left must be a register }
  399. left_must_be_reg(opdef,opsize,noswap);
  400. emit_generic_code(op,opsize,true,extra_not,false);
  401. location_freetemp(current_asmdata.CurrAsmList,right.location);
  402. { left is always a register and contains the result }
  403. location:=left.location;
  404. end;
  405. { fix the changed opsize we did above because of the missing btsb }
  406. if opsize<>int_cgsize(resultdef.size) then
  407. hlcg.location_force_reg(current_asmdata.CurrAsmList,location,opdef,hlcg.tcgsize2orddef(int_cgsize(resultdef.size)),false);
  408. end;
  409. procedure tx86addnode.second_cmpsmallset;
  410. var
  411. opdef : tdef;
  412. opsize : TCGSize;
  413. op : TAsmOp;
  414. begin
  415. pass_left_right;
  416. opdef:=left.resultdef;
  417. opsize:=int_cgsize(opdef.size);
  418. case nodetype of
  419. equaln,
  420. unequaln :
  421. op:=A_CMP;
  422. lten,gten:
  423. begin
  424. if (not(nf_swapped in flags) and (nodetype = lten)) or
  425. ((nf_swapped in flags) and (nodetype = gten)) then
  426. swapleftright;
  427. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  428. emit_op_right_left(A_AND,opsize);
  429. op:=A_CMP;
  430. { warning: ugly hack, we need a JE so change the node to equaln }
  431. nodetype:=equaln;
  432. end;
  433. else
  434. internalerror(2003042215);
  435. end;
  436. { left must be a register }
  437. left_must_be_reg(opdef,opsize,false);
  438. emit_generic_code(op,opsize,true,false,false);
  439. location_freetemp(current_asmdata.CurrAsmList,right.location);
  440. location_freetemp(current_asmdata.CurrAsmList,left.location);
  441. location_reset(location,LOC_FLAGS,OS_NO);
  442. location.resflags:=getresflags(true);
  443. end;
  444. {*****************************************************************************
  445. AddMMX
  446. *****************************************************************************}
  447. {$ifdef SUPPORT_MMX}
  448. procedure tx86addnode.second_opmmx;
  449. var
  450. op : TAsmOp;
  451. cmpop : boolean;
  452. mmxbase : tmmxtype;
  453. hreg,
  454. hregister : tregister;
  455. begin
  456. pass_left_right;
  457. cmpop:=false;
  458. mmxbase:=mmx_type(left.resultdef);
  459. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  460. case nodetype of
  461. addn :
  462. begin
  463. if (cs_mmx_saturation in current_settings.localswitches) then
  464. begin
  465. case mmxbase of
  466. mmxs8bit:
  467. op:=A_PADDSB;
  468. mmxu8bit:
  469. op:=A_PADDUSB;
  470. mmxs16bit,mmxfixed16:
  471. op:=A_PADDSW;
  472. mmxu16bit:
  473. op:=A_PADDUSW;
  474. end;
  475. end
  476. else
  477. begin
  478. case mmxbase of
  479. mmxs8bit,mmxu8bit:
  480. op:=A_PADDB;
  481. mmxs16bit,mmxu16bit,mmxfixed16:
  482. op:=A_PADDW;
  483. mmxs32bit,mmxu32bit:
  484. op:=A_PADDD;
  485. end;
  486. end;
  487. end;
  488. muln :
  489. begin
  490. case mmxbase of
  491. mmxs16bit,mmxu16bit:
  492. op:=A_PMULLW;
  493. mmxfixed16:
  494. op:=A_PMULHW;
  495. end;
  496. end;
  497. subn :
  498. begin
  499. if (cs_mmx_saturation in current_settings.localswitches) then
  500. begin
  501. case mmxbase of
  502. mmxs8bit:
  503. op:=A_PSUBSB;
  504. mmxu8bit:
  505. op:=A_PSUBUSB;
  506. mmxs16bit,mmxfixed16:
  507. op:=A_PSUBSB;
  508. mmxu16bit:
  509. op:=A_PSUBUSW;
  510. end;
  511. end
  512. else
  513. begin
  514. case mmxbase of
  515. mmxs8bit,mmxu8bit:
  516. op:=A_PSUBB;
  517. mmxs16bit,mmxu16bit,mmxfixed16:
  518. op:=A_PSUBW;
  519. mmxs32bit,mmxu32bit:
  520. op:=A_PSUBD;
  521. end;
  522. end;
  523. end;
  524. xorn:
  525. op:=A_PXOR;
  526. orn:
  527. op:=A_POR;
  528. andn:
  529. op:=A_PAND;
  530. else
  531. internalerror(2003042214);
  532. end;
  533. { left and right no register? }
  534. { then one must be demanded }
  535. if (left.location.loc<>LOC_MMXREGISTER) then
  536. begin
  537. if (right.location.loc=LOC_MMXREGISTER) then
  538. begin
  539. location_swap(left.location,right.location);
  540. toggleflag(nf_swapped);
  541. end
  542. else
  543. begin
  544. { register variable ? }
  545. if (left.location.loc=LOC_CMMXREGISTER) then
  546. begin
  547. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  548. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  549. end
  550. else
  551. begin
  552. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  553. internalerror(200203245);
  554. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  555. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  556. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  557. end;
  558. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  559. left.location.register:=hregister;
  560. end;
  561. end;
  562. { at this point, left.location.loc should be LOC_MMXREGISTER }
  563. if right.location.loc<>LOC_MMXREGISTER then
  564. begin
  565. if (nodetype=subn) and (nf_swapped in flags) then
  566. begin
  567. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  568. if right.location.loc=LOC_CMMXREGISTER then
  569. begin
  570. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  571. emit_reg_reg(op,S_NO,left.location.register,hreg);
  572. end
  573. else
  574. begin
  575. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  576. internalerror(200203247);
  577. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  578. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  579. emit_reg_reg(op,S_NO,left.location.register,hreg);
  580. end;
  581. location.register:=hreg;
  582. end
  583. else
  584. begin
  585. if (right.location.loc=LOC_CMMXREGISTER) then
  586. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  587. else
  588. begin
  589. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  590. internalerror(200203246);
  591. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  592. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  593. end;
  594. location.register:=left.location.register;
  595. end;
  596. end
  597. else
  598. begin
  599. { right.location=LOC_MMXREGISTER }
  600. if (nodetype=subn) and (nf_swapped in flags) then
  601. begin
  602. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  603. location_swap(left.location,right.location);
  604. toggleflag(nf_swapped);
  605. end
  606. else
  607. begin
  608. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  609. end;
  610. location.register:=left.location.register;
  611. end;
  612. location_freetemp(current_asmdata.CurrAsmList,right.location);
  613. if cmpop then
  614. location_freetemp(current_asmdata.CurrAsmList,left.location);
  615. end;
  616. {$endif SUPPORT_MMX}
  617. {*****************************************************************************
  618. AddFloat
  619. *****************************************************************************}
  620. procedure tx86addnode.second_addfloatsse;
  621. var
  622. op : topcg;
  623. sqr_sum : boolean;
  624. tmp : tnode;
  625. begin
  626. sqr_sum:=false;
  627. if (current_settings.fputype>=fpu_sse3) and
  628. use_vectorfpu(resultdef) and
  629. (nodetype in [addn,subn]) and
  630. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  631. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  632. begin
  633. sqr_sum:=true;
  634. tmp:=tinlinenode(left).left;
  635. tinlinenode(left).left:=nil;
  636. left.free;
  637. left:=tmp;
  638. tmp:=tinlinenode(right).left;
  639. tinlinenode(right).left:=nil;
  640. right.free;
  641. right:=tmp;
  642. end;
  643. pass_left_right;
  644. check_left_and_right_fpureg(false);
  645. if (nf_swapped in flags) then
  646. { can't use swapleftright if both are on the fpu stack, since then }
  647. { both are "R_ST" -> nothing would change -> manually switch }
  648. if (left.location.loc = LOC_FPUREGISTER) and
  649. (right.location.loc = LOC_FPUREGISTER) then
  650. emit_none(A_FXCH,S_NO)
  651. else
  652. swapleftright;
  653. case nodetype of
  654. addn :
  655. op:=OP_ADD;
  656. muln :
  657. op:=OP_MUL;
  658. subn :
  659. op:=OP_SUB;
  660. slashn :
  661. op:=OP_DIV;
  662. else
  663. internalerror(200312231);
  664. end;
  665. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  666. if sqr_sum then
  667. begin
  668. if nf_swapped in flags then
  669. swapleftright;
  670. location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
  671. location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,true);
  672. location:=left.location;
  673. if is_double(resultdef) then
  674. begin
  675. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  676. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  677. case nodetype of
  678. addn:
  679. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  680. subn:
  681. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  682. else
  683. internalerror(201108162);
  684. end;
  685. end
  686. else
  687. begin
  688. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  689. { ensure that bits 64..127 contain valid values }
  690. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  691. { the data is now in bits 0..32 and 64..95 }
  692. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  693. case nodetype of
  694. addn:
  695. begin
  696. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  697. end;
  698. subn:
  699. begin
  700. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  701. end;
  702. else
  703. internalerror(201108163);
  704. end;
  705. end
  706. end
  707. { we can use only right as left operand if the operation is commutative }
  708. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  709. begin
  710. location.register:=right.location.register;
  711. { force floating point reg. location to be written to memory,
  712. we don't force it to mm register because writing to memory
  713. allows probably shorter code because there is no direct fpu->mm register
  714. copy instruction
  715. }
  716. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  717. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  718. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  719. end
  720. else
  721. begin
  722. if (nf_swapped in flags) then
  723. swapleftright;
  724. location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
  725. location.register:=left.location.register;
  726. { force floating point reg. location to be written to memory,
  727. we don't force it to mm register because writing to memory
  728. allows probably shorter code because there is no direct fpu->mm register
  729. copy instruction
  730. }
  731. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  732. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  733. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  734. end;
  735. end;
  736. procedure tx86addnode.second_cmpfloatsse;
  737. var
  738. op : tasmop;
  739. begin
  740. if is_single(left.resultdef) then
  741. op:=A_COMISS
  742. else if is_double(left.resultdef) then
  743. op:=A_COMISD
  744. else
  745. internalerror(200402222);
  746. pass_left_right;
  747. location_reset(location,LOC_FLAGS,def_cgsize(resultdef));
  748. { we can use only right as left operand if the operation is commutative }
  749. if (right.location.loc=LOC_MMREGISTER) then
  750. begin
  751. { force floating point reg. location to be written to memory,
  752. we don't force it to mm register because writing to memory
  753. allows probably shorter code because there is no direct fpu->mm register
  754. copy instruction
  755. }
  756. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  757. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  758. case left.location.loc of
  759. LOC_REFERENCE,LOC_CREFERENCE:
  760. begin
  761. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  762. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  763. end;
  764. LOC_MMREGISTER,LOC_CMMREGISTER:
  765. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  766. else
  767. internalerror(200402221);
  768. end;
  769. if nf_swapped in flags then
  770. exclude(flags,nf_swapped)
  771. else
  772. include(flags,nf_swapped)
  773. end
  774. else
  775. begin
  776. location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,false);
  777. { force floating point reg. location to be written to memory,
  778. we don't force it to mm register because writing to memory
  779. allows probably shorter code because there is no direct fpu->mm register
  780. copy instruction
  781. }
  782. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  783. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  784. case right.location.loc of
  785. LOC_REFERENCE,LOC_CREFERENCE:
  786. begin
  787. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  788. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  789. end;
  790. LOC_MMREGISTER,LOC_CMMREGISTER:
  791. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  792. else
  793. internalerror(200402223);
  794. end;
  795. end;
  796. location.resflags:=getresflags(true);
  797. end;
  798. procedure tx86addnode.second_opvector;
  799. var
  800. op : topcg;
  801. begin
  802. pass_left_right;
  803. if (nf_swapped in flags) then
  804. swapleftright;
  805. case nodetype of
  806. addn :
  807. op:=OP_ADD;
  808. muln :
  809. op:=OP_MUL;
  810. subn :
  811. op:=OP_SUB;
  812. slashn :
  813. op:=OP_DIV;
  814. else
  815. internalerror(200610071);
  816. end;
  817. if fits_in_mm_register(left.resultdef) then
  818. begin
  819. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  820. { we can use only right as left operand if the operation is commutative }
  821. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  822. begin
  823. location.register:=right.location.register;
  824. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  825. end
  826. else
  827. begin
  828. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  829. location.register:=left.location.register;
  830. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  831. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  832. end;
  833. end
  834. else
  835. begin
  836. { not yet supported }
  837. internalerror(200610072);
  838. end
  839. end;
  840. procedure tx86addnode.second_addfloat;
  841. var
  842. op : TAsmOp;
  843. begin
  844. if use_vectorfpu(resultdef) then
  845. begin
  846. second_addfloatsse;
  847. exit;
  848. end;
  849. pass_left_right;
  850. case nodetype of
  851. addn :
  852. op:=A_FADDP;
  853. muln :
  854. op:=A_FMULP;
  855. subn :
  856. op:=A_FSUBP;
  857. slashn :
  858. op:=A_FDIVP;
  859. else
  860. internalerror(2003042214);
  861. end;
  862. check_left_and_right_fpureg(true);
  863. { if we swaped the tree nodes, then use the reverse operator }
  864. if nf_swapped in flags then
  865. begin
  866. if (nodetype=slashn) then
  867. op:=A_FDIVRP
  868. else if (nodetype=subn) then
  869. op:=A_FSUBRP;
  870. end;
  871. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  872. tcgx86(cg).dec_fpu_stack;
  873. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  874. location.register:=NR_ST;
  875. end;
  876. procedure tx86addnode.second_cmpfloat;
  877. var
  878. resflags : tresflags;
  879. begin
  880. if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
  881. begin
  882. second_cmpfloatsse;
  883. exit;
  884. end;
  885. pass_left_right;
  886. check_left_and_right_fpureg(true);
  887. {$ifndef x86_64}
  888. if current_settings.cputype<cpu_Pentium2 then
  889. begin
  890. emit_none(A_FCOMPP,S_NO);
  891. tcgx86(cg).dec_fpu_stack;
  892. tcgx86(cg).dec_fpu_stack;
  893. { load fpu flags }
  894. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  895. emit_reg(A_FSTSW,S_NO,NR_AX);
  896. emit_none(A_SAHF,S_NO);
  897. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  898. if nf_swapped in flags then
  899. begin
  900. case nodetype of
  901. equaln : resflags:=F_E;
  902. unequaln : resflags:=F_NE;
  903. ltn : resflags:=F_A;
  904. lten : resflags:=F_AE;
  905. gtn : resflags:=F_B;
  906. gten : resflags:=F_BE;
  907. end;
  908. end
  909. else
  910. begin
  911. case nodetype of
  912. equaln : resflags:=F_E;
  913. unequaln : resflags:=F_NE;
  914. ltn : resflags:=F_B;
  915. lten : resflags:=F_BE;
  916. gtn : resflags:=F_A;
  917. gten : resflags:=F_AE;
  918. end;
  919. end;
  920. end
  921. else
  922. {$endif x86_64}
  923. begin
  924. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  925. { fcomip pops only one fpu register }
  926. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  927. tcgx86(cg).dec_fpu_stack;
  928. tcgx86(cg).dec_fpu_stack;
  929. { load fpu flags }
  930. if nf_swapped in flags then
  931. begin
  932. case nodetype of
  933. equaln : resflags:=F_E;
  934. unequaln : resflags:=F_NE;
  935. ltn : resflags:=F_A;
  936. lten : resflags:=F_AE;
  937. gtn : resflags:=F_B;
  938. gten : resflags:=F_BE;
  939. end;
  940. end
  941. else
  942. begin
  943. case nodetype of
  944. equaln : resflags:=F_E;
  945. unequaln : resflags:=F_NE;
  946. ltn : resflags:=F_B;
  947. lten : resflags:=F_BE;
  948. gtn : resflags:=F_A;
  949. gten : resflags:=F_AE;
  950. end;
  951. end;
  952. end;
  953. location_reset(location,LOC_FLAGS,OS_NO);
  954. location.resflags:=resflags;
  955. end;
  956. {*****************************************************************************
  957. Add64bit
  958. *****************************************************************************}
  959. procedure tx86addnode.second_add64bit;
  960. begin
  961. {$ifdef cpu64bitalu}
  962. second_addordinal;
  963. {$else cpu64bitalu}
  964. { must be implemented separate }
  965. internalerror(200402042);
  966. {$endif cpu64bitalu}
  967. end;
  968. procedure tx86addnode.second_cmp64bit;
  969. begin
  970. {$ifdef cpu64bitalu}
  971. second_cmpordinal;
  972. {$else cpu64bitalu}
  973. { must be implemented separate }
  974. internalerror(200402043);
  975. {$endif cpu64bitalu}
  976. end;
  977. {*****************************************************************************
  978. AddOrdinal
  979. *****************************************************************************}
  980. procedure tx86addnode.second_cmpordinal;
  981. var
  982. opdef : tdef;
  983. opsize : tcgsize;
  984. unsigned : boolean;
  985. begin
  986. unsigned:=not(is_signed(left.resultdef)) or
  987. not(is_signed(right.resultdef));
  988. opdef:=left.resultdef;
  989. opsize:=def_cgsize(opdef);
  990. pass_left_right;
  991. left_must_be_reg(opdef,opsize,false);
  992. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  993. location_freetemp(current_asmdata.CurrAsmList,right.location);
  994. location_freetemp(current_asmdata.CurrAsmList,left.location);
  995. location_reset(location,LOC_FLAGS,OS_NO);
  996. location.resflags:=getresflags(unsigned);
  997. end;
  998. begin
  999. caddnode:=tx86addnode;
  1000. end.