nx86add.pas 51 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. symtype,
  22. cgbase,
  23. cpubase,
  24. node,nadd,ncgadd;
  25. type
  26. tx86addnode = class(tcgaddnode)
  27. protected
  28. function getresflags(unsigned : boolean) : tresflags;
  29. procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  30. procedure check_left_and_right_fpureg(force_fpureg: boolean);
  31. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  32. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  33. procedure second_cmpfloatsse;
  34. procedure second_cmpfloatavx;
  35. procedure second_addfloatsse;
  36. procedure second_addfloatavx;
  37. public
  38. procedure second_addfloat;override;
  39. {$ifndef i8086}
  40. procedure second_addsmallset;override;
  41. {$endif not i8086}
  42. procedure second_add64bit;override;
  43. procedure second_cmpfloat;override;
  44. procedure second_cmpsmallset;override;
  45. procedure second_cmp64bit;override;
  46. procedure second_cmpordinal;override;
  47. {$ifdef SUPPORT_MMX}
  48. procedure second_opmmx;override;
  49. {$endif SUPPORT_MMX}
  50. procedure second_opvector;override;
  51. end;
  52. implementation
  53. uses
  54. globtype,globals,
  55. verbose,cutils,
  56. cpuinfo,
  57. aasmbase,aasmtai,aasmdata,aasmcpu,
  58. symconst,symdef,
  59. cgobj,hlcgobj,cgx86,cga,cgutils,
  60. paramgr,tgobj,ncgutil,
  61. ncon,nset,ninl,
  62. defutil;
  63. {*****************************************************************************
  64. Helpers
  65. *****************************************************************************}
  66. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  67. var
  68. power : longint;
  69. hl4 : tasmlabel;
  70. r : Tregister;
  71. href : treference;
  72. begin
  73. { at this point, left.location.loc should be LOC_REGISTER }
  74. if right.location.loc=LOC_REGISTER then
  75. begin
  76. { right.location is a LOC_REGISTER }
  77. { when swapped another result register }
  78. if (nodetype=subn) and (nf_swapped in flags) then
  79. begin
  80. if extra_not then
  81. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  82. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  83. { newly swapped also set swapped flag }
  84. location_swap(left.location,right.location);
  85. toggleflag(nf_swapped);
  86. end
  87. else
  88. begin
  89. if extra_not then
  90. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  91. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  92. location_swap(left.location,right.location);
  93. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  94. end;
  95. end
  96. else
  97. begin
  98. { right.location is not a LOC_REGISTER }
  99. if (nodetype=subn) and (nf_swapped in flags) then
  100. begin
  101. if extra_not then
  102. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  103. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  104. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  105. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  106. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  107. end
  108. else
  109. begin
  110. { Optimizations when right.location is a constant value }
  111. if (op=A_CMP) and
  112. (nodetype in [equaln,unequaln]) and
  113. (right.location.loc=LOC_CONSTANT) and
  114. (right.location.value=0) then
  115. begin
  116. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  117. end
  118. else
  119. if (op=A_ADD) and
  120. (right.location.loc=LOC_CONSTANT) and
  121. (right.location.value=1) and
  122. not(cs_check_overflow in current_settings.localswitches) then
  123. begin
  124. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  125. end
  126. else
  127. if (op=A_SUB) and
  128. (right.location.loc=LOC_CONSTANT) and
  129. (right.location.value=1) and
  130. not(cs_check_overflow in current_settings.localswitches) and
  131. UseIncDec then
  132. begin
  133. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  134. end
  135. else
  136. if (op=A_IMUL) and
  137. (right.location.loc=LOC_CONSTANT) and
  138. (ispowerof2(int64(right.location.value),power)) and
  139. not(cs_check_overflow in current_settings.localswitches) then
  140. begin
  141. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  142. end
  143. else if (op=A_IMUL) and
  144. (right.location.loc=LOC_CONSTANT) and
  145. (right.location.value>1) and (ispowerof2(int64(right.location.value)-1,power)) and
  146. (power in [1..3]) and
  147. not(cs_check_overflow in current_settings.localswitches) then
  148. begin
  149. reference_reset_base(href,left.location.register,0,0);
  150. href.index:=left.location.register;
  151. href.scalefactor:=int64(right.location.value)-1;
  152. left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  153. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_LEA,TCgSize2OpSize[opsize],href,left.location.register));
  154. end
  155. else
  156. begin
  157. if extra_not then
  158. begin
  159. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  160. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  161. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  162. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  163. end
  164. else
  165. begin
  166. emit_op_right_left(op,opsize);
  167. end;
  168. end;
  169. end;
  170. end;
  171. { only in case of overflow operations }
  172. { produce overflow code }
  173. { we must put it here directly, because sign of operation }
  174. { is in unsigned VAR!! }
  175. if mboverflow then
  176. begin
  177. if cs_check_overflow in current_settings.localswitches then
  178. begin
  179. current_asmdata.getjumplabel(hl4);
  180. if unsigned then
  181. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  182. else
  183. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  184. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  185. cg.a_label(current_asmdata.CurrAsmList,hl4);
  186. end;
  187. end;
  188. end;
  189. procedure tx86addnode.left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  190. begin
  191. { left location is not a register? }
  192. if (left.location.loc<>LOC_REGISTER) then
  193. begin
  194. { if right is register then we can swap the locations }
  195. if (not noswap) and
  196. (right.location.loc=LOC_REGISTER) then
  197. begin
  198. location_swap(left.location,right.location);
  199. toggleflag(nf_swapped);
  200. end
  201. else
  202. begin
  203. { maybe we can reuse a constant register when the
  204. operation is a comparison that doesn't change the
  205. value of the register }
  206. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  207. end;
  208. end;
  209. if (right.location.loc<>LOC_CONSTANT) and
  210. (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then
  211. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  212. if (left.location.loc<>LOC_CONSTANT) and
  213. (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then
  214. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  215. end;
  216. procedure tx86addnode.check_left_and_right_fpureg(force_fpureg: boolean);
  217. begin
  218. if (right.location.loc<>LOC_FPUREGISTER) then
  219. begin
  220. if (force_fpureg) then
  221. begin
  222. location_force_fpureg(current_asmdata.CurrAsmList,right.location,false);
  223. if (left.location.loc<>LOC_FPUREGISTER) then
  224. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  225. else
  226. { left was on the stack => swap }
  227. toggleflag(nf_swapped);
  228. end
  229. end
  230. { the nominator in st0 }
  231. else if (left.location.loc<>LOC_FPUREGISTER) then
  232. begin
  233. if (force_fpureg) then
  234. location_force_fpureg(current_asmdata.CurrAsmList,left.location,false)
  235. end
  236. else
  237. begin
  238. { fpu operands are always in the wrong order on the stack }
  239. toggleflag(nf_swapped);
  240. end;
  241. end;
  242. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  243. {$ifdef x86_64}
  244. var
  245. tmpreg : tregister;
  246. {$endif x86_64}
  247. begin
  248. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  249. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  250. { left must be a register }
  251. case right.location.loc of
  252. LOC_REGISTER,
  253. LOC_CREGISTER :
  254. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  255. LOC_REFERENCE,
  256. LOC_CREFERENCE :
  257. begin
  258. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  259. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  260. end;
  261. LOC_CONSTANT :
  262. begin
  263. {$ifdef x86_64}
  264. { x86_64 only supports signed 32 bits constants directly }
  265. if (opsize in [OS_S64,OS_64]) and
  266. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  267. begin
  268. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  269. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  270. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  271. end
  272. else
  273. {$endif x86_64}
  274. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  275. end;
  276. else
  277. internalerror(200203232);
  278. end;
  279. end;
  280. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  281. begin
  282. case nodetype of
  283. equaln : getresflags:=F_E;
  284. unequaln : getresflags:=F_NE;
  285. else
  286. if not(unsigned) then
  287. begin
  288. if nf_swapped in flags then
  289. case nodetype of
  290. ltn : getresflags:=F_G;
  291. lten : getresflags:=F_GE;
  292. gtn : getresflags:=F_L;
  293. gten : getresflags:=F_LE;
  294. else
  295. internalerror(2013120105);
  296. end
  297. else
  298. case nodetype of
  299. ltn : getresflags:=F_L;
  300. lten : getresflags:=F_LE;
  301. gtn : getresflags:=F_G;
  302. gten : getresflags:=F_GE;
  303. else
  304. internalerror(2013120106);
  305. end;
  306. end
  307. else
  308. begin
  309. if nf_swapped in flags then
  310. case nodetype of
  311. ltn : getresflags:=F_A;
  312. lten : getresflags:=F_AE;
  313. gtn : getresflags:=F_B;
  314. gten : getresflags:=F_BE;
  315. else
  316. internalerror(2013120107);
  317. end
  318. else
  319. case nodetype of
  320. ltn : getresflags:=F_B;
  321. lten : getresflags:=F_BE;
  322. gtn : getresflags:=F_A;
  323. gten : getresflags:=F_AE;
  324. else
  325. internalerror(2013120108);
  326. end;
  327. end;
  328. end;
  329. end;
  330. {*****************************************************************************
  331. AddSmallSet
  332. *****************************************************************************}
  333. {$ifndef i8086}
  334. procedure tx86addnode.second_addsmallset;
  335. var
  336. setbase : aint;
  337. opdef : tdef;
  338. opsize : TCGSize;
  339. op : TAsmOp;
  340. extra_not,
  341. noswap : boolean;
  342. all_member_optimization:boolean;
  343. begin
  344. pass_left_right;
  345. noswap:=false;
  346. extra_not:=false;
  347. all_member_optimization:=false;
  348. opdef:=resultdef;
  349. opsize:=int_cgsize(opdef.size);
  350. if (left.resultdef.typ=setdef) then
  351. setbase:=tsetdef(left.resultdef).setbase
  352. else
  353. setbase:=tsetdef(right.resultdef).setbase;
  354. case nodetype of
  355. addn :
  356. begin
  357. { adding elements is not commutative }
  358. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  359. swapleftright;
  360. { are we adding set elements ? }
  361. if right.nodetype=setelementn then
  362. begin
  363. { no range support for smallsets! }
  364. if assigned(tsetelementnode(right).right) then
  365. internalerror(43244);
  366. { btsb isn't supported }
  367. if opsize=OS_8 then
  368. begin
  369. opsize:=OS_32;
  370. opdef:=u32inttype;
  371. end;
  372. { bts requires both elements to be registers }
  373. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  374. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  375. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,right.location,setbase);
  376. op:=A_BTS;
  377. noswap:=true;
  378. end
  379. else
  380. op:=A_OR;
  381. end;
  382. symdifn :
  383. op:=A_XOR;
  384. muln :
  385. op:=A_AND;
  386. subn :
  387. begin
  388. op:=A_AND;
  389. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  390. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  391. all_member_optimization:=true;
  392. if (not(nf_swapped in flags)) and
  393. (right.location.loc=LOC_CONSTANT) then
  394. right.location.value := not(right.location.value)
  395. else if (nf_swapped in flags) and
  396. (left.location.loc=LOC_CONSTANT) then
  397. left.location.value := not(left.location.value)
  398. else
  399. extra_not:=true;
  400. end;
  401. xorn :
  402. op:=A_XOR;
  403. orn :
  404. op:=A_OR;
  405. andn :
  406. op:=A_AND;
  407. else
  408. internalerror(2003042215);
  409. end;
  410. if all_member_optimization then
  411. begin
  412. {A set expression [0..31]-x can be implemented with a simple NOT.}
  413. if nf_swapped in flags then
  414. begin
  415. { newly swapped also set swapped flag }
  416. location_swap(left.location,right.location);
  417. toggleflag(nf_swapped);
  418. end;
  419. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,false);
  420. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  421. location:=right.location;
  422. end
  423. else
  424. begin
  425. { left must be a register }
  426. left_must_be_reg(opdef,opsize,noswap);
  427. emit_generic_code(op,opsize,true,extra_not,false);
  428. location_freetemp(current_asmdata.CurrAsmList,right.location);
  429. { left is always a register and contains the result }
  430. location:=left.location;
  431. end;
  432. { fix the changed opsize we did above because of the missing btsb }
  433. if opsize<>int_cgsize(resultdef.size) then
  434. hlcg.location_force_reg(current_asmdata.CurrAsmList,location,opdef,cgsize_orddef(int_cgsize(resultdef.size)),false);
  435. end;
  436. {$endif not i8086}
  437. procedure tx86addnode.second_cmpsmallset;
  438. var
  439. opdef : tdef;
  440. opsize : TCGSize;
  441. op : TAsmOp;
  442. begin
  443. pass_left_right;
  444. opdef:=left.resultdef;
  445. opsize:=int_cgsize(opdef.size);
  446. case nodetype of
  447. equaln,
  448. unequaln :
  449. op:=A_CMP;
  450. lten,gten:
  451. begin
  452. if (not(nf_swapped in flags) and (nodetype = lten)) or
  453. ((nf_swapped in flags) and (nodetype = gten)) then
  454. swapleftright;
  455. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  456. emit_op_right_left(A_AND,opsize);
  457. op:=A_CMP;
  458. { warning: ugly hack, we need a JE so change the node to equaln }
  459. nodetype:=equaln;
  460. end;
  461. else
  462. internalerror(2003042215);
  463. end;
  464. { left must be a register }
  465. left_must_be_reg(opdef,opsize,false);
  466. emit_generic_code(op,opsize,true,false,false);
  467. location_freetemp(current_asmdata.CurrAsmList,right.location);
  468. location_freetemp(current_asmdata.CurrAsmList,left.location);
  469. location_reset(location,LOC_FLAGS,OS_NO);
  470. location.resflags:=getresflags(true);
  471. end;
  472. {*****************************************************************************
  473. AddMMX
  474. *****************************************************************************}
  475. {$ifdef SUPPORT_MMX}
  476. procedure tx86addnode.second_opmmx;
  477. var
  478. op : TAsmOp;
  479. cmpop : boolean;
  480. mmxbase : tmmxtype;
  481. hreg,
  482. hregister : tregister;
  483. begin
  484. pass_left_right;
  485. cmpop:=false;
  486. mmxbase:=mmx_type(left.resultdef);
  487. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  488. case nodetype of
  489. addn :
  490. begin
  491. if (cs_mmx_saturation in current_settings.localswitches) then
  492. begin
  493. case mmxbase of
  494. mmxs8bit:
  495. op:=A_PADDSB;
  496. mmxu8bit:
  497. op:=A_PADDUSB;
  498. mmxs16bit,mmxfixed16:
  499. op:=A_PADDSW;
  500. mmxu16bit:
  501. op:=A_PADDUSW;
  502. end;
  503. end
  504. else
  505. begin
  506. case mmxbase of
  507. mmxs8bit,mmxu8bit:
  508. op:=A_PADDB;
  509. mmxs16bit,mmxu16bit,mmxfixed16:
  510. op:=A_PADDW;
  511. mmxs32bit,mmxu32bit:
  512. op:=A_PADDD;
  513. end;
  514. end;
  515. end;
  516. muln :
  517. begin
  518. case mmxbase of
  519. mmxs16bit,mmxu16bit:
  520. op:=A_PMULLW;
  521. mmxfixed16:
  522. op:=A_PMULHW;
  523. end;
  524. end;
  525. subn :
  526. begin
  527. if (cs_mmx_saturation in current_settings.localswitches) then
  528. begin
  529. case mmxbase of
  530. mmxs8bit:
  531. op:=A_PSUBSB;
  532. mmxu8bit:
  533. op:=A_PSUBUSB;
  534. mmxs16bit,mmxfixed16:
  535. op:=A_PSUBSB;
  536. mmxu16bit:
  537. op:=A_PSUBUSW;
  538. end;
  539. end
  540. else
  541. begin
  542. case mmxbase of
  543. mmxs8bit,mmxu8bit:
  544. op:=A_PSUBB;
  545. mmxs16bit,mmxu16bit,mmxfixed16:
  546. op:=A_PSUBW;
  547. mmxs32bit,mmxu32bit:
  548. op:=A_PSUBD;
  549. end;
  550. end;
  551. end;
  552. xorn:
  553. op:=A_PXOR;
  554. orn:
  555. op:=A_POR;
  556. andn:
  557. op:=A_PAND;
  558. else
  559. internalerror(2003042214);
  560. end;
  561. { left and right no register? }
  562. { then one must be demanded }
  563. if (left.location.loc<>LOC_MMXREGISTER) then
  564. begin
  565. if (right.location.loc=LOC_MMXREGISTER) then
  566. begin
  567. location_swap(left.location,right.location);
  568. toggleflag(nf_swapped);
  569. end
  570. else
  571. begin
  572. { register variable ? }
  573. if (left.location.loc=LOC_CMMXREGISTER) then
  574. begin
  575. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  576. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  577. end
  578. else
  579. begin
  580. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  581. internalerror(200203245);
  582. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  583. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  584. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  585. end;
  586. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  587. left.location.register:=hregister;
  588. end;
  589. end;
  590. { at this point, left.location.loc should be LOC_MMXREGISTER }
  591. if right.location.loc<>LOC_MMXREGISTER then
  592. begin
  593. if (nodetype=subn) and (nf_swapped in flags) then
  594. begin
  595. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  596. if right.location.loc=LOC_CMMXREGISTER then
  597. begin
  598. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  599. emit_reg_reg(op,S_NO,left.location.register,hreg);
  600. end
  601. else
  602. begin
  603. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  604. internalerror(200203247);
  605. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  606. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  607. emit_reg_reg(op,S_NO,left.location.register,hreg);
  608. end;
  609. location.register:=hreg;
  610. end
  611. else
  612. begin
  613. if (right.location.loc=LOC_CMMXREGISTER) then
  614. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  615. else
  616. begin
  617. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  618. internalerror(200203246);
  619. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  620. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  621. end;
  622. location.register:=left.location.register;
  623. end;
  624. end
  625. else
  626. begin
  627. { right.location=LOC_MMXREGISTER }
  628. if (nodetype=subn) and (nf_swapped in flags) then
  629. begin
  630. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  631. location_swap(left.location,right.location);
  632. toggleflag(nf_swapped);
  633. end
  634. else
  635. begin
  636. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  637. end;
  638. location.register:=left.location.register;
  639. end;
  640. location_freetemp(current_asmdata.CurrAsmList,right.location);
  641. if cmpop then
  642. location_freetemp(current_asmdata.CurrAsmList,left.location);
  643. end;
  644. {$endif SUPPORT_MMX}
  645. {*****************************************************************************
  646. AddFloat
  647. *****************************************************************************}
  648. procedure tx86addnode.second_addfloatsse;
  649. var
  650. op : topcg;
  651. sqr_sum : boolean;
  652. tmp : tnode;
  653. begin
  654. sqr_sum:=false;
  655. if (current_settings.fputype>=fpu_sse3) and
  656. use_vectorfpu(resultdef) and
  657. (nodetype in [addn,subn]) and
  658. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  659. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  660. begin
  661. sqr_sum:=true;
  662. tmp:=tinlinenode(left).left;
  663. tinlinenode(left).left:=nil;
  664. left.free;
  665. left:=tmp;
  666. tmp:=tinlinenode(right).left;
  667. tinlinenode(right).left:=nil;
  668. right.free;
  669. right:=tmp;
  670. end;
  671. pass_left_right;
  672. check_left_and_right_fpureg(false);
  673. if (nf_swapped in flags) then
  674. { can't use swapleftright if both are on the fpu stack, since then }
  675. { both are "R_ST" -> nothing would change -> manually switch }
  676. if (left.location.loc = LOC_FPUREGISTER) and
  677. (right.location.loc = LOC_FPUREGISTER) then
  678. emit_none(A_FXCH,S_NO)
  679. else
  680. swapleftright;
  681. case nodetype of
  682. addn :
  683. op:=OP_ADD;
  684. muln :
  685. op:=OP_MUL;
  686. subn :
  687. op:=OP_SUB;
  688. slashn :
  689. op:=OP_DIV;
  690. else
  691. internalerror(200312231);
  692. end;
  693. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  694. if sqr_sum then
  695. begin
  696. if nf_swapped in flags then
  697. swapleftright;
  698. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  699. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  700. location:=left.location;
  701. if is_double(resultdef) then
  702. begin
  703. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  704. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  705. case nodetype of
  706. addn:
  707. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  708. subn:
  709. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  710. else
  711. internalerror(201108162);
  712. end;
  713. end
  714. else
  715. begin
  716. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  717. { ensure that bits 64..127 contain valid values }
  718. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  719. { the data is now in bits 0..32 and 64..95 }
  720. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  721. case nodetype of
  722. addn:
  723. begin
  724. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  725. end;
  726. subn:
  727. begin
  728. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  729. end;
  730. else
  731. internalerror(201108163);
  732. end;
  733. end
  734. end
  735. { we can use only right as left operand if the operation is commutative }
  736. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  737. begin
  738. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  739. cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.location.size,location.size,right.location.register,location.register,mms_movescalar);
  740. { force floating point reg. location to be written to memory,
  741. we don't force it to mm register because writing to memory
  742. allows probably shorter code because there is no direct fpu->mm register
  743. copy instruction
  744. }
  745. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  746. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  747. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  748. end
  749. else
  750. begin
  751. if nf_swapped in flags then
  752. swapleftright;
  753. { force floating point reg. location to be written to memory,
  754. we don't force it to mm register because writing to memory
  755. allows probably shorter code because there is no direct fpu->mm register
  756. copy instruction
  757. }
  758. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  759. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  760. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  761. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  762. { force floating point reg. location to be written to memory,
  763. we don't force it to mm register because writing to memory
  764. allows probably shorter code because there is no direct fpu->mm register
  765. copy instruction
  766. }
  767. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  768. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  769. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  770. end;
  771. end;
  772. procedure tx86addnode.second_addfloatavx;
  773. var
  774. op : topcg;
  775. sqr_sum : boolean;
  776. tmp : tnode;
  777. begin
  778. sqr_sum:=false;
  779. {$ifdef dummy}
  780. if (current_settings.fputype>=fpu_sse3) and
  781. use_vectorfpu(resultdef) and
  782. (nodetype in [addn,subn]) and
  783. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  784. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  785. begin
  786. sqr_sum:=true;
  787. tmp:=tinlinenode(left).left;
  788. tinlinenode(left).left:=nil;
  789. left.free;
  790. left:=tmp;
  791. tmp:=tinlinenode(right).left;
  792. tinlinenode(right).left:=nil;
  793. right.free;
  794. right:=tmp;
  795. end;
  796. {$endif dummy}
  797. pass_left_right;
  798. check_left_and_right_fpureg(false);
  799. if (nf_swapped in flags) then
  800. { can't use swapleftright if both are on the fpu stack, since then }
  801. { both are "R_ST" -> nothing would change -> manually switch }
  802. if (left.location.loc = LOC_FPUREGISTER) and
  803. (right.location.loc = LOC_FPUREGISTER) then
  804. emit_none(A_FXCH,S_NO)
  805. else
  806. swapleftright;
  807. case nodetype of
  808. addn :
  809. op:=OP_ADD;
  810. muln :
  811. op:=OP_MUL;
  812. subn :
  813. op:=OP_SUB;
  814. slashn :
  815. op:=OP_DIV;
  816. else
  817. internalerror(200312231);
  818. end;
  819. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  820. if sqr_sum then
  821. begin
  822. if nf_swapped in flags then
  823. swapleftright;
  824. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  825. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  826. location:=left.location;
  827. if is_double(resultdef) then
  828. begin
  829. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  830. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  831. case nodetype of
  832. addn:
  833. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  834. subn:
  835. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  836. else
  837. internalerror(201108162);
  838. end;
  839. end
  840. else
  841. begin
  842. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  843. { ensure that bits 64..127 contain valid values }
  844. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  845. { the data is now in bits 0..32 and 64..95 }
  846. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  847. case nodetype of
  848. addn:
  849. begin
  850. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  851. end;
  852. subn:
  853. begin
  854. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  855. end;
  856. else
  857. internalerror(201108163);
  858. end;
  859. end
  860. end
  861. { left*2 ? }
  862. else if (nodetype=muln) and is_constrealnode(right) and is_number_float(trealconstnode(right).value_real) and (trealconstnode(right).value_real=2) then
  863. begin
  864. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  865. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  866. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  867. left.location.register,
  868. left.location.register,
  869. location.register,
  870. mms_movescalar);
  871. end
  872. { right*2 ? }
  873. else if (nodetype=muln) and is_constrealnode(left) and is_number_float(trealconstnode(left).value_real) and (trealconstnode(left).value_real=2) then
  874. begin
  875. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,right.location.size);
  876. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  877. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  878. right.location.register,
  879. right.location.register,
  880. location.register,
  881. mms_movescalar);
  882. end
  883. { we can use only right as left operand if the operation is commutative }
  884. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  885. begin
  886. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  887. { force floating point reg. location to be written to memory,
  888. we don't force it to mm register because writing to memory
  889. allows probably shorter code because there is no direct fpu->mm register
  890. copy instruction
  891. }
  892. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  893. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  894. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  895. left.location,
  896. right.location.register,
  897. location.register,
  898. mms_movescalar);
  899. end
  900. else
  901. begin
  902. if (nf_swapped in flags) then
  903. swapleftright;
  904. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  905. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  906. { force floating point reg. location to be written to memory,
  907. we don't force it to mm register because writing to memory
  908. allows probably shorter code because there is no direct fpu->mm register
  909. copy instruction
  910. }
  911. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  912. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  913. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  914. right.location,
  915. left.location.register,
  916. location.register,
  917. mms_movescalar);
  918. end;
  919. end;
  920. procedure tx86addnode.second_cmpfloatsse;
  921. var
  922. op : tasmop;
  923. begin
  924. if is_single(left.resultdef) then
  925. op:=A_COMISS
  926. else if is_double(left.resultdef) then
  927. op:=A_COMISD
  928. else
  929. internalerror(200402222);
  930. pass_left_right;
  931. location_reset(location,LOC_FLAGS,def_cgsize(resultdef));
  932. { we can use only right as left operand if the operation is commutative }
  933. if (right.location.loc=LOC_MMREGISTER) then
  934. begin
  935. { force floating point reg. location to be written to memory,
  936. we don't force it to mm register because writing to memory
  937. allows probably shorter code because there is no direct fpu->mm register
  938. copy instruction
  939. }
  940. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  941. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  942. case left.location.loc of
  943. LOC_REFERENCE,LOC_CREFERENCE:
  944. begin
  945. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  946. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  947. end;
  948. LOC_MMREGISTER,LOC_CMMREGISTER:
  949. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  950. else
  951. internalerror(200402221);
  952. end;
  953. if nf_swapped in flags then
  954. exclude(flags,nf_swapped)
  955. else
  956. include(flags,nf_swapped)
  957. end
  958. else
  959. begin
  960. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  961. { force floating point reg. location to be written to memory,
  962. we don't force it to mm register because writing to memory
  963. allows probably shorter code because there is no direct fpu->mm register
  964. copy instruction
  965. }
  966. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  967. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  968. case right.location.loc of
  969. LOC_REFERENCE,LOC_CREFERENCE:
  970. begin
  971. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  972. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  973. end;
  974. LOC_MMREGISTER,LOC_CMMREGISTER:
  975. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  976. else
  977. internalerror(200402223);
  978. end;
  979. end;
  980. location.resflags:=getresflags(true);
  981. end;
  982. procedure tx86addnode.second_cmpfloatavx;
  983. var
  984. op : tasmop;
  985. begin
  986. if is_single(left.resultdef) then
  987. op:=A_VCOMISS
  988. else if is_double(left.resultdef) then
  989. op:=A_VCOMISD
  990. else
  991. internalerror(200402222);
  992. pass_left_right;
  993. location_reset(location,LOC_FLAGS,def_cgsize(resultdef));
  994. { we can use only right as left operand if the operation is commutative }
  995. if (right.location.loc=LOC_MMREGISTER) then
  996. begin
  997. { force floating point reg. location to be written to memory,
  998. we don't force it to mm register because writing to memory
  999. allows probably shorter code because there is no direct fpu->mm register
  1000. copy instruction
  1001. }
  1002. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1003. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  1004. case left.location.loc of
  1005. LOC_REFERENCE,LOC_CREFERENCE:
  1006. begin
  1007. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  1008. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  1009. end;
  1010. LOC_MMREGISTER,LOC_CMMREGISTER:
  1011. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  1012. else
  1013. internalerror(200402221);
  1014. end;
  1015. if nf_swapped in flags then
  1016. exclude(flags,nf_swapped)
  1017. else
  1018. include(flags,nf_swapped)
  1019. end
  1020. else
  1021. begin
  1022. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1023. { force floating point reg. location to be written to memory,
  1024. we don't force it to mm register because writing to memory
  1025. allows probably shorter code because there is no direct fpu->mm register
  1026. copy instruction
  1027. }
  1028. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1029. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1030. case right.location.loc of
  1031. LOC_REFERENCE,LOC_CREFERENCE:
  1032. begin
  1033. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  1034. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  1035. end;
  1036. LOC_MMREGISTER,LOC_CMMREGISTER:
  1037. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  1038. else
  1039. internalerror(200402223);
  1040. end;
  1041. end;
  1042. location.resflags:=getresflags(true);
  1043. end;
  1044. procedure tx86addnode.second_opvector;
  1045. var
  1046. op : topcg;
  1047. begin
  1048. pass_left_right;
  1049. if (nf_swapped in flags) then
  1050. swapleftright;
  1051. case nodetype of
  1052. addn :
  1053. op:=OP_ADD;
  1054. muln :
  1055. op:=OP_MUL;
  1056. subn :
  1057. op:=OP_SUB;
  1058. slashn :
  1059. op:=OP_DIV;
  1060. else
  1061. internalerror(200610071);
  1062. end;
  1063. if fits_in_mm_register(left.resultdef) then
  1064. begin
  1065. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  1066. { we can use only right as left operand if the operation is commutative }
  1067. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  1068. begin
  1069. location.register:=right.location.register;
  1070. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  1071. end
  1072. else
  1073. begin
  1074. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  1075. location.register:=left.location.register;
  1076. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  1077. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  1078. end;
  1079. end
  1080. else
  1081. begin
  1082. { not yet supported }
  1083. internalerror(200610072);
  1084. end
  1085. end;
  1086. procedure tx86addnode.second_addfloat;
  1087. var
  1088. op : TAsmOp;
  1089. begin
  1090. if use_vectorfpu(resultdef) then
  1091. begin
  1092. if UseAVX then
  1093. second_addfloatavx
  1094. else
  1095. second_addfloatsse;
  1096. exit;
  1097. end;
  1098. pass_left_right;
  1099. case nodetype of
  1100. addn :
  1101. op:=A_FADDP;
  1102. muln :
  1103. op:=A_FMULP;
  1104. subn :
  1105. op:=A_FSUBP;
  1106. slashn :
  1107. op:=A_FDIVP;
  1108. else
  1109. internalerror(2003042214);
  1110. end;
  1111. check_left_and_right_fpureg(true);
  1112. { if we swaped the tree nodes, then use the reverse operator }
  1113. if nf_swapped in flags then
  1114. begin
  1115. if (nodetype=slashn) then
  1116. op:=A_FDIVRP
  1117. else if (nodetype=subn) then
  1118. op:=A_FSUBRP;
  1119. end;
  1120. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  1121. tcgx86(cg).dec_fpu_stack;
  1122. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  1123. location.register:=NR_ST;
  1124. end;
  1125. procedure tx86addnode.second_cmpfloat;
  1126. {$ifdef i8086}
  1127. var
  1128. tmpref: treference;
  1129. {$endif i8086}
  1130. begin
  1131. if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
  1132. begin
  1133. if UseAVX then
  1134. second_cmpfloatavx
  1135. else
  1136. second_cmpfloatsse;
  1137. exit;
  1138. end;
  1139. pass_left_right;
  1140. check_left_and_right_fpureg(true);
  1141. {$ifndef x86_64}
  1142. if current_settings.cputype<cpu_Pentium2 then
  1143. begin
  1144. emit_none(A_FCOMPP,S_NO);
  1145. tcgx86(cg).dec_fpu_stack;
  1146. tcgx86(cg).dec_fpu_stack;
  1147. { load fpu flags }
  1148. {$ifdef i8086}
  1149. if current_settings.cputype < cpu_286 then
  1150. begin
  1151. tg.gettemp(current_asmdata.CurrAsmList,2,2,tt_normal,tmpref);
  1152. emit_ref(A_FSTSW,S_NO,tmpref);
  1153. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1154. inc(tmpref.offset);
  1155. emit_ref_reg(A_MOV,S_B,tmpref,NR_AH);
  1156. dec(tmpref.offset);
  1157. emit_none(A_SAHF,S_NO);
  1158. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1159. tg.ungettemp(current_asmdata.CurrAsmList,tmpref);
  1160. end
  1161. else
  1162. {$endif i8086}
  1163. begin
  1164. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1165. emit_reg(A_FNSTSW,S_NO,NR_AX);
  1166. emit_none(A_SAHF,S_NO);
  1167. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1168. end;
  1169. end
  1170. else
  1171. {$endif x86_64}
  1172. begin
  1173. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  1174. { fcomip pops only one fpu register }
  1175. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  1176. tcgx86(cg).dec_fpu_stack;
  1177. tcgx86(cg).dec_fpu_stack;
  1178. end;
  1179. location_reset(location,LOC_FLAGS,OS_NO);
  1180. location.resflags:=getresflags(true);
  1181. end;
  1182. {*****************************************************************************
  1183. Add64bit
  1184. *****************************************************************************}
  1185. procedure tx86addnode.second_add64bit;
  1186. begin
  1187. {$ifdef cpu64bitalu}
  1188. second_addordinal;
  1189. {$else cpu64bitalu}
  1190. { must be implemented separate }
  1191. internalerror(200402042);
  1192. {$endif cpu64bitalu}
  1193. end;
  1194. procedure tx86addnode.second_cmp64bit;
  1195. begin
  1196. {$ifdef cpu64bitalu}
  1197. second_cmpordinal;
  1198. {$else cpu64bitalu}
  1199. { must be implemented separate }
  1200. internalerror(200402043);
  1201. {$endif cpu64bitalu}
  1202. end;
  1203. {*****************************************************************************
  1204. AddOrdinal
  1205. *****************************************************************************}
  1206. procedure tx86addnode.second_cmpordinal;
  1207. var
  1208. opdef : tdef;
  1209. opsize : tcgsize;
  1210. unsigned : boolean;
  1211. begin
  1212. unsigned:=not(is_signed(left.resultdef)) or
  1213. not(is_signed(right.resultdef));
  1214. opdef:=left.resultdef;
  1215. opsize:=def_cgsize(opdef);
  1216. pass_left_right;
  1217. left_must_be_reg(opdef,opsize,false);
  1218. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  1219. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1220. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1221. location_reset(location,LOC_FLAGS,OS_NO);
  1222. location.resflags:=getresflags(unsigned);
  1223. end;
  1224. begin
  1225. caddnode:=tx86addnode;
  1226. end.