nx86add.pas 52 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. symtype,
  22. cgbase,
  23. cpubase,
  24. node,nadd,ncgadd;
  25. type
  26. tx86addnode = class(tcgaddnode)
  27. protected
  28. function getresflags(unsigned : boolean) : tresflags;
  29. procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  30. procedure check_left_and_right_fpureg(force_fpureg: boolean);
  31. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  32. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  33. procedure second_cmpfloatsse;
  34. procedure second_cmpfloatavx;
  35. procedure second_addfloatsse;
  36. procedure second_addfloatavx;
  37. public
  38. procedure second_addfloat;override;
  39. {$ifndef i8086}
  40. procedure second_addsmallset;override;
  41. {$endif not i8086}
  42. procedure second_add64bit;override;
  43. procedure second_cmpfloat;override;
  44. procedure second_cmpsmallset;override;
  45. procedure second_cmp64bit;override;
  46. procedure second_cmpordinal;override;
  47. {$ifdef SUPPORT_MMX}
  48. procedure second_opmmx;override;
  49. {$endif SUPPORT_MMX}
  50. procedure second_opvector;override;
  51. end;
  52. implementation
  53. uses
  54. globtype,globals,
  55. verbose,cutils,
  56. cpuinfo,
  57. aasmbase,aasmtai,aasmdata,aasmcpu,
  58. symconst,symdef,
  59. cgobj,hlcgobj,cgx86,cga,cgutils,
  60. paramgr,tgobj,ncgutil,
  61. ncon,nset,ninl,
  62. defutil;
  63. {*****************************************************************************
  64. Helpers
  65. *****************************************************************************}
  66. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  67. var
  68. power : longint;
  69. hl4 : tasmlabel;
  70. r : Tregister;
  71. begin
  72. { at this point, left.location.loc should be LOC_REGISTER }
  73. if right.location.loc=LOC_REGISTER then
  74. begin
  75. { right.location is a LOC_REGISTER }
  76. { when swapped another result register }
  77. if (nodetype=subn) and (nf_swapped in flags) then
  78. begin
  79. if extra_not then
  80. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  81. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  82. { newly swapped also set swapped flag }
  83. location_swap(left.location,right.location);
  84. toggleflag(nf_swapped);
  85. end
  86. else
  87. begin
  88. if extra_not then
  89. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  90. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  91. location_swap(left.location,right.location);
  92. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  93. end;
  94. end
  95. else
  96. begin
  97. { right.location is not a LOC_REGISTER }
  98. if (nodetype=subn) and (nf_swapped in flags) then
  99. begin
  100. if extra_not then
  101. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  102. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  103. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  104. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  105. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  106. end
  107. else
  108. begin
  109. { Optimizations when right.location is a constant value }
  110. if (op=A_CMP) and
  111. (nodetype in [equaln,unequaln]) and
  112. (right.location.loc=LOC_CONSTANT) and
  113. (right.location.value=0) then
  114. begin
  115. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  116. end
  117. else
  118. if (op=A_ADD) and
  119. (right.location.loc=LOC_CONSTANT) and
  120. (right.location.value=1) and
  121. not(cs_check_overflow in current_settings.localswitches) then
  122. begin
  123. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  124. end
  125. else
  126. if (op=A_SUB) and
  127. (right.location.loc=LOC_CONSTANT) and
  128. (right.location.value=1) and
  129. not(cs_check_overflow in current_settings.localswitches) and
  130. UseIncDec then
  131. begin
  132. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  133. end
  134. else
  135. if (op=A_IMUL) and
  136. (right.location.loc=LOC_CONSTANT) and
  137. (ispowerof2(int64(right.location.value),power)) and
  138. not(cs_check_overflow in current_settings.localswitches) then
  139. begin
  140. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  141. end
  142. else
  143. begin
  144. if extra_not then
  145. begin
  146. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  147. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  148. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  149. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  150. end
  151. else
  152. begin
  153. emit_op_right_left(op,opsize);
  154. end;
  155. end;
  156. end;
  157. end;
  158. { only in case of overflow operations }
  159. { produce overflow code }
  160. { we must put it here directly, because sign of operation }
  161. { is in unsigned VAR!! }
  162. if mboverflow then
  163. begin
  164. if cs_check_overflow in current_settings.localswitches then
  165. begin
  166. current_asmdata.getjumplabel(hl4);
  167. if unsigned then
  168. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  169. else
  170. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  171. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  172. cg.a_label(current_asmdata.CurrAsmList,hl4);
  173. end;
  174. end;
  175. end;
  176. procedure tx86addnode.left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  177. begin
  178. { left location is not a register? }
  179. if (left.location.loc<>LOC_REGISTER) then
  180. begin
  181. { if right is register then we can swap the locations }
  182. if (not noswap) and
  183. (right.location.loc=LOC_REGISTER) then
  184. begin
  185. location_swap(left.location,right.location);
  186. toggleflag(nf_swapped);
  187. end
  188. else
  189. begin
  190. { maybe we can reuse a constant register when the
  191. operation is a comparison that doesn't change the
  192. value of the register }
  193. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  194. end;
  195. end;
  196. if (right.location.loc<>LOC_CONSTANT) and
  197. (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then
  198. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  199. if (left.location.loc<>LOC_CONSTANT) and
  200. (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then
  201. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  202. end;
  203. procedure tx86addnode.check_left_and_right_fpureg(force_fpureg: boolean);
  204. begin
  205. if (right.location.loc<>LOC_FPUREGISTER) then
  206. begin
  207. if (force_fpureg) then
  208. begin
  209. location_force_fpureg(current_asmdata.CurrAsmList,right.location,false);
  210. if (left.location.loc<>LOC_FPUREGISTER) then
  211. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  212. else
  213. { left was on the stack => swap }
  214. toggleflag(nf_swapped);
  215. end
  216. end
  217. { the nominator in st0 }
  218. else if (left.location.loc<>LOC_FPUREGISTER) then
  219. begin
  220. if (force_fpureg) then
  221. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  222. end
  223. else
  224. begin
  225. { fpu operands are always in the wrong order on the stack }
  226. toggleflag(nf_swapped);
  227. end;
  228. end;
  229. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  230. {$ifdef x86_64}
  231. var
  232. tmpreg : tregister;
  233. {$endif x86_64}
  234. begin
  235. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  236. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  237. { left must be a register }
  238. case right.location.loc of
  239. LOC_REGISTER,
  240. LOC_CREGISTER :
  241. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  242. LOC_REFERENCE,
  243. LOC_CREFERENCE :
  244. begin
  245. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  246. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  247. end;
  248. LOC_CONSTANT :
  249. begin
  250. {$ifdef x86_64}
  251. { x86_64 only supports signed 32 bits constants directly }
  252. if (opsize in [OS_S64,OS_64]) and
  253. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  254. begin
  255. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  256. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  257. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  258. end
  259. else
  260. {$endif x86_64}
  261. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  262. end;
  263. else
  264. internalerror(200203232);
  265. end;
  266. end;
  267. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  268. begin
  269. case nodetype of
  270. equaln : getresflags:=F_E;
  271. unequaln : getresflags:=F_NE;
  272. else
  273. if not(unsigned) then
  274. begin
  275. if nf_swapped in flags then
  276. case nodetype of
  277. ltn : getresflags:=F_G;
  278. lten : getresflags:=F_GE;
  279. gtn : getresflags:=F_L;
  280. gten : getresflags:=F_LE;
  281. end
  282. else
  283. case nodetype of
  284. ltn : getresflags:=F_L;
  285. lten : getresflags:=F_LE;
  286. gtn : getresflags:=F_G;
  287. gten : getresflags:=F_GE;
  288. end;
  289. end
  290. else
  291. begin
  292. if nf_swapped in flags then
  293. case nodetype of
  294. ltn : getresflags:=F_A;
  295. lten : getresflags:=F_AE;
  296. gtn : getresflags:=F_B;
  297. gten : getresflags:=F_BE;
  298. end
  299. else
  300. case nodetype of
  301. ltn : getresflags:=F_B;
  302. lten : getresflags:=F_BE;
  303. gtn : getresflags:=F_A;
  304. gten : getresflags:=F_AE;
  305. end;
  306. end;
  307. end;
  308. end;
  309. {*****************************************************************************
  310. AddSmallSet
  311. *****************************************************************************}
  312. {$ifndef i8086}
  313. procedure tx86addnode.second_addsmallset;
  314. var
  315. setbase : aint;
  316. opdef : tdef;
  317. opsize : TCGSize;
  318. op : TAsmOp;
  319. extra_not,
  320. noswap : boolean;
  321. all_member_optimization:boolean;
  322. begin
  323. pass_left_right;
  324. noswap:=false;
  325. extra_not:=false;
  326. all_member_optimization:=false;
  327. opdef:=resultdef;
  328. opsize:=int_cgsize(opdef.size);
  329. if (left.resultdef.typ=setdef) then
  330. setbase:=tsetdef(left.resultdef).setbase
  331. else
  332. setbase:=tsetdef(right.resultdef).setbase;
  333. case nodetype of
  334. addn :
  335. begin
  336. { adding elements is not commutative }
  337. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  338. swapleftright;
  339. { are we adding set elements ? }
  340. if right.nodetype=setelementn then
  341. begin
  342. { no range support for smallsets! }
  343. if assigned(tsetelementnode(right).right) then
  344. internalerror(43244);
  345. { btsb isn't supported }
  346. if opsize=OS_8 then
  347. begin
  348. opsize:=OS_32;
  349. opdef:=u32inttype;
  350. end;
  351. { bts requires both elements to be registers }
  352. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  353. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  354. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,right.location,setbase);
  355. op:=A_BTS;
  356. noswap:=true;
  357. end
  358. else
  359. op:=A_OR;
  360. end;
  361. symdifn :
  362. op:=A_XOR;
  363. muln :
  364. op:=A_AND;
  365. subn :
  366. begin
  367. op:=A_AND;
  368. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  369. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  370. all_member_optimization:=true;
  371. if (not(nf_swapped in flags)) and
  372. (right.location.loc=LOC_CONSTANT) then
  373. right.location.value := not(right.location.value)
  374. else if (nf_swapped in flags) and
  375. (left.location.loc=LOC_CONSTANT) then
  376. left.location.value := not(left.location.value)
  377. else
  378. extra_not:=true;
  379. end;
  380. xorn :
  381. op:=A_XOR;
  382. orn :
  383. op:=A_OR;
  384. andn :
  385. op:=A_AND;
  386. else
  387. internalerror(2003042215);
  388. end;
  389. if all_member_optimization then
  390. begin
  391. {A set expression [0..31]-x can be implemented with a simple NOT.}
  392. if nf_swapped in flags then
  393. begin
  394. { newly swapped also set swapped flag }
  395. location_swap(left.location,right.location);
  396. toggleflag(nf_swapped);
  397. end;
  398. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,false);
  399. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  400. location:=right.location;
  401. end
  402. else
  403. begin
  404. { left must be a register }
  405. left_must_be_reg(opdef,opsize,noswap);
  406. emit_generic_code(op,opsize,true,extra_not,false);
  407. location_freetemp(current_asmdata.CurrAsmList,right.location);
  408. { left is always a register and contains the result }
  409. location:=left.location;
  410. end;
  411. { fix the changed opsize we did above because of the missing btsb }
  412. if opsize<>int_cgsize(resultdef.size) then
  413. hlcg.location_force_reg(current_asmdata.CurrAsmList,location,opdef,cgsize_orddef(int_cgsize(resultdef.size)),false);
  414. end;
  415. {$endif not i8086}
  416. procedure tx86addnode.second_cmpsmallset;
  417. var
  418. opdef : tdef;
  419. opsize : TCGSize;
  420. op : TAsmOp;
  421. begin
  422. pass_left_right;
  423. opdef:=left.resultdef;
  424. opsize:=int_cgsize(opdef.size);
  425. case nodetype of
  426. equaln,
  427. unequaln :
  428. op:=A_CMP;
  429. lten,gten:
  430. begin
  431. if (not(nf_swapped in flags) and (nodetype = lten)) or
  432. ((nf_swapped in flags) and (nodetype = gten)) then
  433. swapleftright;
  434. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  435. emit_op_right_left(A_AND,opsize);
  436. op:=A_CMP;
  437. { warning: ugly hack, we need a JE so change the node to equaln }
  438. nodetype:=equaln;
  439. end;
  440. else
  441. internalerror(2003042215);
  442. end;
  443. { left must be a register }
  444. left_must_be_reg(opdef,opsize,false);
  445. emit_generic_code(op,opsize,true,false,false);
  446. location_freetemp(current_asmdata.CurrAsmList,right.location);
  447. location_freetemp(current_asmdata.CurrAsmList,left.location);
  448. location_reset(location,LOC_FLAGS,OS_NO);
  449. location.resflags:=getresflags(true);
  450. end;
  451. {*****************************************************************************
  452. AddMMX
  453. *****************************************************************************}
  454. {$ifdef SUPPORT_MMX}
  455. procedure tx86addnode.second_opmmx;
  456. var
  457. op : TAsmOp;
  458. cmpop : boolean;
  459. mmxbase : tmmxtype;
  460. hreg,
  461. hregister : tregister;
  462. begin
  463. pass_left_right;
  464. cmpop:=false;
  465. mmxbase:=mmx_type(left.resultdef);
  466. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  467. case nodetype of
  468. addn :
  469. begin
  470. if (cs_mmx_saturation in current_settings.localswitches) then
  471. begin
  472. case mmxbase of
  473. mmxs8bit:
  474. op:=A_PADDSB;
  475. mmxu8bit:
  476. op:=A_PADDUSB;
  477. mmxs16bit,mmxfixed16:
  478. op:=A_PADDSW;
  479. mmxu16bit:
  480. op:=A_PADDUSW;
  481. end;
  482. end
  483. else
  484. begin
  485. case mmxbase of
  486. mmxs8bit,mmxu8bit:
  487. op:=A_PADDB;
  488. mmxs16bit,mmxu16bit,mmxfixed16:
  489. op:=A_PADDW;
  490. mmxs32bit,mmxu32bit:
  491. op:=A_PADDD;
  492. end;
  493. end;
  494. end;
  495. muln :
  496. begin
  497. case mmxbase of
  498. mmxs16bit,mmxu16bit:
  499. op:=A_PMULLW;
  500. mmxfixed16:
  501. op:=A_PMULHW;
  502. end;
  503. end;
  504. subn :
  505. begin
  506. if (cs_mmx_saturation in current_settings.localswitches) then
  507. begin
  508. case mmxbase of
  509. mmxs8bit:
  510. op:=A_PSUBSB;
  511. mmxu8bit:
  512. op:=A_PSUBUSB;
  513. mmxs16bit,mmxfixed16:
  514. op:=A_PSUBSB;
  515. mmxu16bit:
  516. op:=A_PSUBUSW;
  517. end;
  518. end
  519. else
  520. begin
  521. case mmxbase of
  522. mmxs8bit,mmxu8bit:
  523. op:=A_PSUBB;
  524. mmxs16bit,mmxu16bit,mmxfixed16:
  525. op:=A_PSUBW;
  526. mmxs32bit,mmxu32bit:
  527. op:=A_PSUBD;
  528. end;
  529. end;
  530. end;
  531. xorn:
  532. op:=A_PXOR;
  533. orn:
  534. op:=A_POR;
  535. andn:
  536. op:=A_PAND;
  537. else
  538. internalerror(2003042214);
  539. end;
  540. { left and right no register? }
  541. { then one must be demanded }
  542. if (left.location.loc<>LOC_MMXREGISTER) then
  543. begin
  544. if (right.location.loc=LOC_MMXREGISTER) then
  545. begin
  546. location_swap(left.location,right.location);
  547. toggleflag(nf_swapped);
  548. end
  549. else
  550. begin
  551. { register variable ? }
  552. if (left.location.loc=LOC_CMMXREGISTER) then
  553. begin
  554. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  555. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  556. end
  557. else
  558. begin
  559. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  560. internalerror(200203245);
  561. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  562. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  563. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  564. end;
  565. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  566. left.location.register:=hregister;
  567. end;
  568. end;
  569. { at this point, left.location.loc should be LOC_MMXREGISTER }
  570. if right.location.loc<>LOC_MMXREGISTER then
  571. begin
  572. if (nodetype=subn) and (nf_swapped in flags) then
  573. begin
  574. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  575. if right.location.loc=LOC_CMMXREGISTER then
  576. begin
  577. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  578. emit_reg_reg(op,S_NO,left.location.register,hreg);
  579. end
  580. else
  581. begin
  582. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  583. internalerror(200203247);
  584. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  585. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  586. emit_reg_reg(op,S_NO,left.location.register,hreg);
  587. end;
  588. location.register:=hreg;
  589. end
  590. else
  591. begin
  592. if (right.location.loc=LOC_CMMXREGISTER) then
  593. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  594. else
  595. begin
  596. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  597. internalerror(200203246);
  598. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  599. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  600. end;
  601. location.register:=left.location.register;
  602. end;
  603. end
  604. else
  605. begin
  606. { right.location=LOC_MMXREGISTER }
  607. if (nodetype=subn) and (nf_swapped in flags) then
  608. begin
  609. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  610. location_swap(left.location,right.location);
  611. toggleflag(nf_swapped);
  612. end
  613. else
  614. begin
  615. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  616. end;
  617. location.register:=left.location.register;
  618. end;
  619. location_freetemp(current_asmdata.CurrAsmList,right.location);
  620. if cmpop then
  621. location_freetemp(current_asmdata.CurrAsmList,left.location);
  622. end;
  623. {$endif SUPPORT_MMX}
  624. {*****************************************************************************
  625. AddFloat
  626. *****************************************************************************}
  627. procedure tx86addnode.second_addfloatsse;
  628. var
  629. op : topcg;
  630. sqr_sum : boolean;
  631. tmp : tnode;
  632. begin
  633. sqr_sum:=false;
  634. if (current_settings.fputype>=fpu_sse3) and
  635. use_vectorfpu(resultdef) and
  636. (nodetype in [addn,subn]) and
  637. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  638. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  639. begin
  640. sqr_sum:=true;
  641. tmp:=tinlinenode(left).left;
  642. tinlinenode(left).left:=nil;
  643. left.free;
  644. left:=tmp;
  645. tmp:=tinlinenode(right).left;
  646. tinlinenode(right).left:=nil;
  647. right.free;
  648. right:=tmp;
  649. end;
  650. pass_left_right;
  651. check_left_and_right_fpureg(false);
  652. if (nf_swapped in flags) then
  653. { can't use swapleftright if both are on the fpu stack, since then }
  654. { both are "R_ST" -> nothing would change -> manually switch }
  655. if (left.location.loc = LOC_FPUREGISTER) and
  656. (right.location.loc = LOC_FPUREGISTER) then
  657. emit_none(A_FXCH,S_NO)
  658. else
  659. swapleftright;
  660. case nodetype of
  661. addn :
  662. op:=OP_ADD;
  663. muln :
  664. op:=OP_MUL;
  665. subn :
  666. op:=OP_SUB;
  667. slashn :
  668. op:=OP_DIV;
  669. else
  670. internalerror(200312231);
  671. end;
  672. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  673. if sqr_sum then
  674. begin
  675. if nf_swapped in flags then
  676. swapleftright;
  677. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  678. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  679. location:=left.location;
  680. if is_double(resultdef) then
  681. begin
  682. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  683. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  684. case nodetype of
  685. addn:
  686. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  687. subn:
  688. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  689. else
  690. internalerror(201108162);
  691. end;
  692. end
  693. else
  694. begin
  695. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  696. { ensure that bits 64..127 contain valid values }
  697. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  698. { the data is now in bits 0..32 and 64..95 }
  699. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  700. case nodetype of
  701. addn:
  702. begin
  703. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  704. end;
  705. subn:
  706. begin
  707. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  708. end;
  709. else
  710. internalerror(201108163);
  711. end;
  712. end
  713. end
  714. { we can use only right as left operand if the operation is commutative }
  715. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  716. begin
  717. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  718. cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.location.size,location.size,right.location.register,location.register,mms_movescalar);
  719. { force floating point reg. location to be written to memory,
  720. we don't force it to mm register because writing to memory
  721. allows probably shorter code because there is no direct fpu->mm register
  722. copy instruction
  723. }
  724. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  725. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  726. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  727. end
  728. else
  729. begin
  730. if nf_swapped in flags then
  731. swapleftright;
  732. { force floating point reg. location to be written to memory,
  733. we don't force it to mm register because writing to memory
  734. allows probably shorter code because there is no direct fpu->mm register
  735. copy instruction
  736. }
  737. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  738. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  739. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  740. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  741. { force floating point reg. location to be written to memory,
  742. we don't force it to mm register because writing to memory
  743. allows probably shorter code because there is no direct fpu->mm register
  744. copy instruction
  745. }
  746. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  747. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  748. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  749. end;
  750. end;
  751. procedure tx86addnode.second_addfloatavx;
  752. var
  753. op : topcg;
  754. sqr_sum : boolean;
  755. tmp : tnode;
  756. begin
  757. sqr_sum:=false;
  758. {$ifdef dummy}
  759. if (current_settings.fputype>=fpu_sse3) and
  760. use_vectorfpu(resultdef) and
  761. (nodetype in [addn,subn]) and
  762. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  763. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  764. begin
  765. sqr_sum:=true;
  766. tmp:=tinlinenode(left).left;
  767. tinlinenode(left).left:=nil;
  768. left.free;
  769. left:=tmp;
  770. tmp:=tinlinenode(right).left;
  771. tinlinenode(right).left:=nil;
  772. right.free;
  773. right:=tmp;
  774. end;
  775. {$endif dummy}
  776. pass_left_right;
  777. check_left_and_right_fpureg(false);
  778. if (nf_swapped in flags) then
  779. { can't use swapleftright if both are on the fpu stack, since then }
  780. { both are "R_ST" -> nothing would change -> manually switch }
  781. if (left.location.loc = LOC_FPUREGISTER) and
  782. (right.location.loc = LOC_FPUREGISTER) then
  783. emit_none(A_FXCH,S_NO)
  784. else
  785. swapleftright;
  786. case nodetype of
  787. addn :
  788. op:=OP_ADD;
  789. muln :
  790. op:=OP_MUL;
  791. subn :
  792. op:=OP_SUB;
  793. slashn :
  794. op:=OP_DIV;
  795. else
  796. internalerror(200312231);
  797. end;
  798. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  799. if sqr_sum then
  800. begin
  801. if nf_swapped in flags then
  802. swapleftright;
  803. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  804. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  805. location:=left.location;
  806. if is_double(resultdef) then
  807. begin
  808. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  809. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  810. case nodetype of
  811. addn:
  812. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  813. subn:
  814. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  815. else
  816. internalerror(201108162);
  817. end;
  818. end
  819. else
  820. begin
  821. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  822. { ensure that bits 64..127 contain valid values }
  823. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  824. { the data is now in bits 0..32 and 64..95 }
  825. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  826. case nodetype of
  827. addn:
  828. begin
  829. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  830. end;
  831. subn:
  832. begin
  833. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  834. end;
  835. else
  836. internalerror(201108163);
  837. end;
  838. end
  839. end
  840. { left*2 ? }
  841. else if (nodetype=muln) and is_constrealnode(right) and is_number_float(trealconstnode(right).value_real) and (trealconstnode(right).value_real=2) then
  842. begin
  843. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  844. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  845. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  846. left.location.register,
  847. left.location.register,
  848. location.register,
  849. mms_movescalar);
  850. end
  851. { right*2 ? }
  852. else if (nodetype=muln) and is_constrealnode(left) and is_number_float(trealconstnode(left).value_real) and (trealconstnode(left).value_real=2) then
  853. begin
  854. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,right.location.size);
  855. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  856. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  857. right.location.register,
  858. right.location.register,
  859. location.register,
  860. mms_movescalar);
  861. end
  862. { we can use only right as left operand if the operation is commutative }
  863. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  864. begin
  865. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  866. { force floating point reg. location to be written to memory,
  867. we don't force it to mm register because writing to memory
  868. allows probably shorter code because there is no direct fpu->mm register
  869. copy instruction
  870. }
  871. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  872. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  873. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  874. left.location,
  875. right.location.register,
  876. location.register,
  877. mms_movescalar);
  878. end
  879. else
  880. begin
  881. if (nf_swapped in flags) then
  882. swapleftright;
  883. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  884. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  885. { force floating point reg. location to be written to memory,
  886. we don't force it to mm register because writing to memory
  887. allows probably shorter code because there is no direct fpu->mm register
  888. copy instruction
  889. }
  890. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  891. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  892. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  893. right.location,
  894. left.location.register,
  895. location.register,
  896. mms_movescalar);
  897. end;
  898. end;
  899. procedure tx86addnode.second_cmpfloatsse;
  900. var
  901. op : tasmop;
  902. begin
  903. if is_single(left.resultdef) then
  904. op:=A_COMISS
  905. else if is_double(left.resultdef) then
  906. op:=A_COMISD
  907. else
  908. internalerror(200402222);
  909. pass_left_right;
  910. location_reset(location,LOC_FLAGS,def_cgsize(resultdef));
  911. { we can use only right as left operand if the operation is commutative }
  912. if (right.location.loc=LOC_MMREGISTER) then
  913. begin
  914. { force floating point reg. location to be written to memory,
  915. we don't force it to mm register because writing to memory
  916. allows probably shorter code because there is no direct fpu->mm register
  917. copy instruction
  918. }
  919. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  920. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  921. case left.location.loc of
  922. LOC_REFERENCE,LOC_CREFERENCE:
  923. begin
  924. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  925. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  926. end;
  927. LOC_MMREGISTER,LOC_CMMREGISTER:
  928. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  929. else
  930. internalerror(200402221);
  931. end;
  932. if nf_swapped in flags then
  933. exclude(flags,nf_swapped)
  934. else
  935. include(flags,nf_swapped)
  936. end
  937. else
  938. begin
  939. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  940. { force floating point reg. location to be written to memory,
  941. we don't force it to mm register because writing to memory
  942. allows probably shorter code because there is no direct fpu->mm register
  943. copy instruction
  944. }
  945. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  946. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  947. case right.location.loc of
  948. LOC_REFERENCE,LOC_CREFERENCE:
  949. begin
  950. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  951. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  952. end;
  953. LOC_MMREGISTER,LOC_CMMREGISTER:
  954. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  955. else
  956. internalerror(200402223);
  957. end;
  958. end;
  959. location.resflags:=getresflags(true);
  960. end;
  961. procedure tx86addnode.second_cmpfloatavx;
  962. var
  963. op : tasmop;
  964. begin
  965. if is_single(left.resultdef) then
  966. op:=A_VCOMISS
  967. else if is_double(left.resultdef) then
  968. op:=A_VCOMISD
  969. else
  970. internalerror(200402222);
  971. pass_left_right;
  972. location_reset(location,LOC_FLAGS,def_cgsize(resultdef));
  973. { we can use only right as left operand if the operation is commutative }
  974. if (right.location.loc=LOC_MMREGISTER) then
  975. begin
  976. { force floating point reg. location to be written to memory,
  977. we don't force it to mm register because writing to memory
  978. allows probably shorter code because there is no direct fpu->mm register
  979. copy instruction
  980. }
  981. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  982. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  983. case left.location.loc of
  984. LOC_REFERENCE,LOC_CREFERENCE:
  985. begin
  986. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  987. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  988. end;
  989. LOC_MMREGISTER,LOC_CMMREGISTER:
  990. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  991. else
  992. internalerror(200402221);
  993. end;
  994. if nf_swapped in flags then
  995. exclude(flags,nf_swapped)
  996. else
  997. include(flags,nf_swapped)
  998. end
  999. else
  1000. begin
  1001. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1002. { force floating point reg. location to be written to memory,
  1003. we don't force it to mm register because writing to memory
  1004. allows probably shorter code because there is no direct fpu->mm register
  1005. copy instruction
  1006. }
  1007. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1008. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1009. case right.location.loc of
  1010. LOC_REFERENCE,LOC_CREFERENCE:
  1011. begin
  1012. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  1013. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  1014. end;
  1015. LOC_MMREGISTER,LOC_CMMREGISTER:
  1016. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  1017. else
  1018. internalerror(200402223);
  1019. end;
  1020. end;
  1021. location.resflags:=getresflags(true);
  1022. end;
  1023. procedure tx86addnode.second_opvector;
  1024. var
  1025. op : topcg;
  1026. begin
  1027. pass_left_right;
  1028. if (nf_swapped in flags) then
  1029. swapleftright;
  1030. case nodetype of
  1031. addn :
  1032. op:=OP_ADD;
  1033. muln :
  1034. op:=OP_MUL;
  1035. subn :
  1036. op:=OP_SUB;
  1037. slashn :
  1038. op:=OP_DIV;
  1039. else
  1040. internalerror(200610071);
  1041. end;
  1042. if fits_in_mm_register(left.resultdef) then
  1043. begin
  1044. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  1045. { we can use only right as left operand if the operation is commutative }
  1046. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  1047. begin
  1048. location.register:=right.location.register;
  1049. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  1050. end
  1051. else
  1052. begin
  1053. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  1054. location.register:=left.location.register;
  1055. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  1056. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  1057. end;
  1058. end
  1059. else
  1060. begin
  1061. { not yet supported }
  1062. internalerror(200610072);
  1063. end
  1064. end;
  1065. procedure tx86addnode.second_addfloat;
  1066. var
  1067. op : TAsmOp;
  1068. begin
  1069. if use_vectorfpu(resultdef) then
  1070. begin
  1071. if UseAVX then
  1072. second_addfloatavx
  1073. else
  1074. second_addfloatsse;
  1075. exit;
  1076. end;
  1077. pass_left_right;
  1078. case nodetype of
  1079. addn :
  1080. op:=A_FADDP;
  1081. muln :
  1082. op:=A_FMULP;
  1083. subn :
  1084. op:=A_FSUBP;
  1085. slashn :
  1086. op:=A_FDIVP;
  1087. else
  1088. internalerror(2003042214);
  1089. end;
  1090. check_left_and_right_fpureg(true);
  1091. { if we swaped the tree nodes, then use the reverse operator }
  1092. if nf_swapped in flags then
  1093. begin
  1094. if (nodetype=slashn) then
  1095. op:=A_FDIVRP
  1096. else if (nodetype=subn) then
  1097. op:=A_FSUBRP;
  1098. end;
  1099. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  1100. tcgx86(cg).dec_fpu_stack;
  1101. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  1102. location.register:=NR_ST;
  1103. end;
  1104. procedure tx86addnode.second_cmpfloat;
  1105. var
  1106. resflags : tresflags;
  1107. {$ifdef i8086}
  1108. tmpref: treference;
  1109. {$endif i8086}
  1110. begin
  1111. if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
  1112. begin
  1113. if UseAVX then
  1114. second_cmpfloatavx
  1115. else
  1116. second_cmpfloatsse;
  1117. exit;
  1118. end;
  1119. pass_left_right;
  1120. check_left_and_right_fpureg(true);
  1121. {$ifndef x86_64}
  1122. if current_settings.cputype<cpu_Pentium2 then
  1123. begin
  1124. emit_none(A_FCOMPP,S_NO);
  1125. tcgx86(cg).dec_fpu_stack;
  1126. tcgx86(cg).dec_fpu_stack;
  1127. { load fpu flags }
  1128. {$ifdef i8086}
  1129. if current_settings.cputype < cpu_286 then
  1130. begin
  1131. tg.gettemp(current_asmdata.CurrAsmList,2,2,tt_normal,tmpref);
  1132. emit_ref(A_FNSTSW,S_NO,tmpref);
  1133. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1134. emit_ref_reg(A_MOV,S_W,tmpref,NR_AX);
  1135. emit_none(A_SAHF,S_NO);
  1136. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1137. tg.ungettemp(current_asmdata.CurrAsmList,tmpref);
  1138. end
  1139. else
  1140. {$endif i8086}
  1141. begin
  1142. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1143. emit_reg(A_FNSTSW,S_NO,NR_AX);
  1144. emit_none(A_SAHF,S_NO);
  1145. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1146. end;
  1147. if nf_swapped in flags then
  1148. begin
  1149. case nodetype of
  1150. equaln : resflags:=F_E;
  1151. unequaln : resflags:=F_NE;
  1152. ltn : resflags:=F_A;
  1153. lten : resflags:=F_AE;
  1154. gtn : resflags:=F_B;
  1155. gten : resflags:=F_BE;
  1156. end;
  1157. end
  1158. else
  1159. begin
  1160. case nodetype of
  1161. equaln : resflags:=F_E;
  1162. unequaln : resflags:=F_NE;
  1163. ltn : resflags:=F_B;
  1164. lten : resflags:=F_BE;
  1165. gtn : resflags:=F_A;
  1166. gten : resflags:=F_AE;
  1167. end;
  1168. end;
  1169. end
  1170. else
  1171. {$endif x86_64}
  1172. begin
  1173. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  1174. { fcomip pops only one fpu register }
  1175. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  1176. tcgx86(cg).dec_fpu_stack;
  1177. tcgx86(cg).dec_fpu_stack;
  1178. { load fpu flags }
  1179. if nf_swapped in flags then
  1180. begin
  1181. case nodetype of
  1182. equaln : resflags:=F_E;
  1183. unequaln : resflags:=F_NE;
  1184. ltn : resflags:=F_A;
  1185. lten : resflags:=F_AE;
  1186. gtn : resflags:=F_B;
  1187. gten : resflags:=F_BE;
  1188. end;
  1189. end
  1190. else
  1191. begin
  1192. case nodetype of
  1193. equaln : resflags:=F_E;
  1194. unequaln : resflags:=F_NE;
  1195. ltn : resflags:=F_B;
  1196. lten : resflags:=F_BE;
  1197. gtn : resflags:=F_A;
  1198. gten : resflags:=F_AE;
  1199. end;
  1200. end;
  1201. end;
  1202. location_reset(location,LOC_FLAGS,OS_NO);
  1203. location.resflags:=resflags;
  1204. end;
  1205. {*****************************************************************************
  1206. Add64bit
  1207. *****************************************************************************}
  1208. procedure tx86addnode.second_add64bit;
  1209. begin
  1210. {$ifdef cpu64bitalu}
  1211. second_addordinal;
  1212. {$else cpu64bitalu}
  1213. { must be implemented separate }
  1214. internalerror(200402042);
  1215. {$endif cpu64bitalu}
  1216. end;
  1217. procedure tx86addnode.second_cmp64bit;
  1218. begin
  1219. {$ifdef cpu64bitalu}
  1220. second_cmpordinal;
  1221. {$else cpu64bitalu}
  1222. { must be implemented separate }
  1223. internalerror(200402043);
  1224. {$endif cpu64bitalu}
  1225. end;
  1226. {*****************************************************************************
  1227. AddOrdinal
  1228. *****************************************************************************}
  1229. procedure tx86addnode.second_cmpordinal;
  1230. var
  1231. opdef : tdef;
  1232. opsize : tcgsize;
  1233. unsigned : boolean;
  1234. begin
  1235. unsigned:=not(is_signed(left.resultdef)) or
  1236. not(is_signed(right.resultdef));
  1237. opdef:=left.resultdef;
  1238. opsize:=def_cgsize(opdef);
  1239. pass_left_right;
  1240. left_must_be_reg(opdef,opsize,false);
  1241. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  1242. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1243. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1244. location_reset(location,LOC_FLAGS,OS_NO);
  1245. location.resflags:=getresflags(unsigned);
  1246. end;
  1247. begin
  1248. caddnode:=tx86addnode;
  1249. end.