nx86add.pas 54 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. symtype,
  22. cgbase,
  23. cpubase,
  24. node,nadd,ncgadd;
  25. type
  26. tx86addnode = class(tcgaddnode)
  27. protected
  28. function getresflags(unsigned : boolean) : tresflags;
  29. function getfpuresflags : tresflags;
  30. procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  31. procedure force_left_and_right_fpureg;
  32. procedure prepare_x87_locations(out refnode: tnode);
  33. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  34. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  35. procedure second_cmpfloatvector;
  36. procedure second_addfloatsse;
  37. procedure second_addfloatavx;
  38. public
  39. function use_fma : boolean;override;
  40. procedure second_addfloat;override;
  41. {$ifndef i8086}
  42. procedure second_addsmallset;override;
  43. {$endif not i8086}
  44. procedure second_add64bit;override;
  45. procedure second_cmpfloat;override;
  46. procedure second_cmpsmallset;override;
  47. procedure second_cmp64bit;override;
  48. procedure second_cmpordinal;override;
  49. {$ifdef SUPPORT_MMX}
  50. procedure second_opmmx;override;
  51. {$endif SUPPORT_MMX}
  52. procedure second_opvector;override;
  53. end;
  54. implementation
  55. uses
  56. globtype,globals,systems,
  57. verbose,cutils,
  58. cpuinfo,
  59. aasmbase,aasmtai,aasmdata,aasmcpu,
  60. symconst,symdef,
  61. cgobj,hlcgobj,cgx86,cga,cgutils,
  62. paramgr,tgobj,ncgutil,
  63. ncon,nset,ninl,
  64. defutil;
  65. {*****************************************************************************
  66. Helpers
  67. *****************************************************************************}
  68. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  69. var
  70. power : longint;
  71. hl4 : tasmlabel;
  72. r : Tregister;
  73. href : treference;
  74. begin
  75. { at this point, left.location.loc should be LOC_REGISTER }
  76. if right.location.loc=LOC_REGISTER then
  77. begin
  78. { right.location is a LOC_REGISTER }
  79. { when swapped another result register }
  80. if (nodetype=subn) and (nf_swapped in flags) then
  81. begin
  82. if extra_not then
  83. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  84. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  85. { newly swapped also set swapped flag }
  86. location_swap(left.location,right.location);
  87. toggleflag(nf_swapped);
  88. end
  89. else
  90. begin
  91. if extra_not then
  92. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  93. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  94. location_swap(left.location,right.location);
  95. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  96. end;
  97. end
  98. else
  99. begin
  100. { right.location is not a LOC_REGISTER }
  101. if (nodetype=subn) and (nf_swapped in flags) then
  102. begin
  103. if extra_not then
  104. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  105. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  106. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  107. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  108. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  109. end
  110. else
  111. begin
  112. { Optimizations when right.location is a constant value }
  113. if (op=A_CMP) and
  114. (nodetype in [equaln,unequaln]) and
  115. (right.location.loc=LOC_CONSTANT) and
  116. (right.location.value=0) then
  117. begin
  118. { 'test $-1,%reg' is transformable into 'test $-1,spilltemp' if %reg needs
  119. spilling, while 'test %reg,%reg' still requires loading into register.
  120. If spilling is not necessary, it is changed back into 'test %reg,%reg' by
  121. peephole optimizer (this optimization is currently available only for i386). }
  122. {$ifdef i386}
  123. emit_const_reg(A_TEST,TCGSize2Opsize[opsize],aint(-1),left.location.register)
  124. {$else i386}
  125. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  126. {$endif i386}
  127. end
  128. else
  129. if (op=A_ADD) and
  130. (right.location.loc=LOC_CONSTANT) and
  131. (right.location.value=1) and
  132. not(cs_check_overflow in current_settings.localswitches) then
  133. begin
  134. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  135. end
  136. else
  137. if (op=A_SUB) and
  138. (right.location.loc=LOC_CONSTANT) and
  139. (right.location.value=1) and
  140. not(cs_check_overflow in current_settings.localswitches) and
  141. UseIncDec then
  142. begin
  143. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  144. end
  145. else
  146. if (op=A_IMUL) and
  147. (right.location.loc=LOC_CONSTANT) and
  148. (ispowerof2(int64(right.location.value),power)) and
  149. not(cs_check_overflow in current_settings.localswitches) then
  150. begin
  151. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  152. end
  153. else if (op=A_IMUL) and
  154. (right.location.loc=LOC_CONSTANT) and
  155. (right.location.value>1) and (ispowerof2(int64(right.location.value)-1,power)) and
  156. (power in [1..3]) and
  157. not(cs_check_overflow in current_settings.localswitches) then
  158. begin
  159. reference_reset_base(href,left.location.register,0,0);
  160. href.index:=left.location.register;
  161. href.scalefactor:=int64(right.location.value)-1;
  162. left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  163. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_LEA,TCgSize2OpSize[opsize],href,left.location.register));
  164. end
  165. else
  166. begin
  167. if extra_not then
  168. begin
  169. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  170. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  171. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  172. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  173. end
  174. else
  175. begin
  176. emit_op_right_left(op,opsize);
  177. end;
  178. end;
  179. end;
  180. end;
  181. { only in case of overflow operations }
  182. { produce overflow code }
  183. { we must put it here directly, because sign of operation }
  184. { is in unsigned VAR!! }
  185. if mboverflow then
  186. begin
  187. if cs_check_overflow in current_settings.localswitches then
  188. begin
  189. current_asmdata.getjumplabel(hl4);
  190. if unsigned then
  191. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  192. else
  193. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  194. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  195. cg.a_label(current_asmdata.CurrAsmList,hl4);
  196. end;
  197. end;
  198. end;
  199. procedure tx86addnode.left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  200. begin
  201. { left location is not a register? }
  202. if (left.location.loc<>LOC_REGISTER) then
  203. begin
  204. { if right is register then we can swap the locations }
  205. if (not noswap) and
  206. (right.location.loc=LOC_REGISTER) then
  207. begin
  208. location_swap(left.location,right.location);
  209. toggleflag(nf_swapped);
  210. end
  211. else
  212. begin
  213. { maybe we can reuse a constant register when the
  214. operation is a comparison that doesn't change the
  215. value of the register }
  216. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  217. end;
  218. end;
  219. if (right.location.loc<>LOC_CONSTANT) and
  220. (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then
  221. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  222. if (left.location.loc<>LOC_CONSTANT) and
  223. (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then
  224. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  225. end;
  226. procedure tx86addnode.force_left_and_right_fpureg;
  227. begin
  228. if (right.location.loc<>LOC_FPUREGISTER) then
  229. begin
  230. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  231. if (left.location.loc<>LOC_FPUREGISTER) then
  232. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  233. else
  234. { left was on the stack => swap }
  235. toggleflag(nf_swapped);
  236. end
  237. { the nominator in st0 }
  238. else if (left.location.loc<>LOC_FPUREGISTER) then
  239. begin
  240. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  241. end
  242. else
  243. begin
  244. { fpu operands are always in the wrong order on the stack }
  245. toggleflag(nf_swapped);
  246. end;
  247. end;
  248. { Makes sides suitable for executing an x87 instruction:
  249. if either side is OS_F32/OS_F64-sized LOC_REFERENCE, it is returned in 'refnode'
  250. everything else is loaded to FPU stack. }
  251. procedure tx86addnode.prepare_x87_locations(out refnode: tnode);
  252. begin
  253. refnode:=nil;
  254. { later on, no mm registers are allowed, so transfer everything to memory here
  255. below it is loaded into an fpu register if neede }
  256. if left.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then
  257. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  258. if right.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then
  259. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  260. case ord(left.location.loc=LOC_FPUREGISTER)+ord(right.location.loc=LOC_FPUREGISTER) of
  261. 0:
  262. begin
  263. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  264. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  265. InternalError(2013090803);
  266. if (left.location.size in [OS_F32,OS_F64]) then
  267. begin
  268. refnode:=left;
  269. toggleflag(nf_swapped);
  270. end
  271. else
  272. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  273. end;
  274. 1:
  275. begin { if left is on the stack then swap. }
  276. if (left.location.loc=LOC_FPUREGISTER) then
  277. refnode:=right
  278. else
  279. refnode:=left;
  280. if not(refnode.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  281. InternalError(2013090801);
  282. if not (refnode.location.size in [OS_F32,OS_F64]) then
  283. begin
  284. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,refnode.location,refnode.resultdef,false);
  285. if (refnode=right) then
  286. toggleflag(nf_swapped);
  287. refnode:=nil;
  288. end
  289. else
  290. begin
  291. if (refnode=left) then
  292. toggleflag(nf_swapped);
  293. end;
  294. end;
  295. 2: { fpu operands are always in the wrong order on the stack }
  296. toggleflag(nf_swapped);
  297. else
  298. InternalError(2013090802);
  299. end;
  300. end;
  301. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  302. {$ifdef x86_64}
  303. var
  304. tmpreg : tregister;
  305. {$endif x86_64}
  306. begin
  307. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  308. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  309. { left must be a register }
  310. case right.location.loc of
  311. LOC_REGISTER,
  312. LOC_CREGISTER :
  313. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  314. LOC_REFERENCE,
  315. LOC_CREFERENCE :
  316. begin
  317. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  318. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  319. end;
  320. LOC_CONSTANT :
  321. begin
  322. {$ifdef x86_64}
  323. { x86_64 only supports signed 32 bits constants directly }
  324. if (opsize in [OS_S64,OS_64]) and
  325. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  326. begin
  327. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  328. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  329. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  330. end
  331. else
  332. {$endif x86_64}
  333. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  334. end;
  335. else
  336. internalerror(200203232);
  337. end;
  338. end;
  339. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  340. begin
  341. case nodetype of
  342. equaln : getresflags:=F_E;
  343. unequaln : getresflags:=F_NE;
  344. else
  345. if not(unsigned) then
  346. begin
  347. if nf_swapped in flags then
  348. case nodetype of
  349. ltn : getresflags:=F_G;
  350. lten : getresflags:=F_GE;
  351. gtn : getresflags:=F_L;
  352. gten : getresflags:=F_LE;
  353. else
  354. internalerror(2013120105);
  355. end
  356. else
  357. case nodetype of
  358. ltn : getresflags:=F_L;
  359. lten : getresflags:=F_LE;
  360. gtn : getresflags:=F_G;
  361. gten : getresflags:=F_GE;
  362. else
  363. internalerror(2013120106);
  364. end;
  365. end
  366. else
  367. begin
  368. if nf_swapped in flags then
  369. case nodetype of
  370. ltn : getresflags:=F_A;
  371. lten : getresflags:=F_AE;
  372. gtn : getresflags:=F_B;
  373. gten : getresflags:=F_BE;
  374. else
  375. internalerror(2013120107);
  376. end
  377. else
  378. case nodetype of
  379. ltn : getresflags:=F_B;
  380. lten : getresflags:=F_BE;
  381. gtn : getresflags:=F_A;
  382. gten : getresflags:=F_AE;
  383. else
  384. internalerror(2013120108);
  385. end;
  386. end;
  387. end;
  388. end;
  389. function tx86addnode.getfpuresflags : tresflags;
  390. begin
  391. if (nodetype=equaln) then
  392. result:=F_FE
  393. else if (nodetype=unequaln) then
  394. result:=F_FNE
  395. else if (nf_swapped in flags) then
  396. case nodetype of
  397. ltn : result:=F_FA;
  398. lten : result:=F_FAE;
  399. gtn : result:=F_FB;
  400. gten : result:=F_FBE;
  401. else
  402. internalerror(2014031402);
  403. end
  404. else
  405. case nodetype of
  406. ltn : result:=F_FB;
  407. lten : result:=F_FBE;
  408. gtn : result:=F_FA;
  409. gten : result:=F_FAE;
  410. else
  411. internalerror(2014031403);
  412. end;
  413. end;
  414. {*****************************************************************************
  415. AddSmallSet
  416. *****************************************************************************}
  417. {$ifndef i8086}
  418. procedure tx86addnode.second_addsmallset;
  419. var
  420. setbase : aint;
  421. opdef : tdef;
  422. opsize : TCGSize;
  423. op : TAsmOp;
  424. extra_not,
  425. noswap : boolean;
  426. all_member_optimization:boolean;
  427. begin
  428. pass_left_right;
  429. noswap:=false;
  430. extra_not:=false;
  431. all_member_optimization:=false;
  432. opdef:=resultdef;
  433. opsize:=int_cgsize(opdef.size);
  434. if (left.resultdef.typ=setdef) then
  435. setbase:=tsetdef(left.resultdef).setbase
  436. else
  437. setbase:=tsetdef(right.resultdef).setbase;
  438. case nodetype of
  439. addn :
  440. begin
  441. { adding elements is not commutative }
  442. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  443. swapleftright;
  444. { are we adding set elements ? }
  445. if right.nodetype=setelementn then
  446. begin
  447. { no range support for smallsets! }
  448. if assigned(tsetelementnode(right).right) then
  449. internalerror(43244);
  450. { btsb isn't supported }
  451. if opsize=OS_8 then
  452. begin
  453. opsize:=OS_32;
  454. opdef:=u32inttype;
  455. end;
  456. { bts requires both elements to be registers }
  457. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  458. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  459. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,right.location,setbase);
  460. op:=A_BTS;
  461. noswap:=true;
  462. end
  463. else
  464. op:=A_OR;
  465. end;
  466. symdifn :
  467. op:=A_XOR;
  468. muln :
  469. op:=A_AND;
  470. subn :
  471. begin
  472. op:=A_AND;
  473. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  474. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  475. all_member_optimization:=true;
  476. if (not(nf_swapped in flags)) and
  477. (right.location.loc=LOC_CONSTANT) then
  478. right.location.value := not(right.location.value)
  479. else if (nf_swapped in flags) and
  480. (left.location.loc=LOC_CONSTANT) then
  481. left.location.value := not(left.location.value)
  482. else
  483. extra_not:=true;
  484. end;
  485. xorn :
  486. op:=A_XOR;
  487. orn :
  488. op:=A_OR;
  489. andn :
  490. op:=A_AND;
  491. else
  492. internalerror(2003042215);
  493. end;
  494. if all_member_optimization then
  495. begin
  496. {A set expression [0..31]-x can be implemented with a simple NOT.}
  497. if nf_swapped in flags then
  498. begin
  499. { newly swapped also set swapped flag }
  500. location_swap(left.location,right.location);
  501. toggleflag(nf_swapped);
  502. end;
  503. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,false);
  504. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  505. location:=right.location;
  506. end
  507. else
  508. begin
  509. { left must be a register }
  510. left_must_be_reg(opdef,opsize,noswap);
  511. emit_generic_code(op,opsize,true,extra_not,false);
  512. location_freetemp(current_asmdata.CurrAsmList,right.location);
  513. { left is always a register and contains the result }
  514. location:=left.location;
  515. end;
  516. { fix the changed opsize we did above because of the missing btsb }
  517. if opsize<>int_cgsize(resultdef.size) then
  518. hlcg.location_force_reg(current_asmdata.CurrAsmList,location,opdef,cgsize_orddef(int_cgsize(resultdef.size)),false);
  519. end;
  520. {$endif not i8086}
  521. procedure tx86addnode.second_cmpsmallset;
  522. var
  523. opdef : tdef;
  524. opsize : TCGSize;
  525. op : TAsmOp;
  526. begin
  527. pass_left_right;
  528. opdef:=left.resultdef;
  529. opsize:=int_cgsize(opdef.size);
  530. case nodetype of
  531. equaln,
  532. unequaln :
  533. op:=A_CMP;
  534. lten,gten:
  535. begin
  536. if (not(nf_swapped in flags) and (nodetype = lten)) or
  537. ((nf_swapped in flags) and (nodetype = gten)) then
  538. swapleftright;
  539. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  540. emit_op_right_left(A_AND,opsize);
  541. op:=A_CMP;
  542. { warning: ugly hack, we need a JE so change the node to equaln }
  543. nodetype:=equaln;
  544. end;
  545. else
  546. internalerror(2003042215);
  547. end;
  548. { left must be a register }
  549. left_must_be_reg(opdef,opsize,false);
  550. emit_generic_code(op,opsize,true,false,false);
  551. location_freetemp(current_asmdata.CurrAsmList,right.location);
  552. location_freetemp(current_asmdata.CurrAsmList,left.location);
  553. location_reset(location,LOC_FLAGS,OS_NO);
  554. location.resflags:=getresflags(true);
  555. end;
  556. {*****************************************************************************
  557. AddMMX
  558. *****************************************************************************}
  559. {$ifdef SUPPORT_MMX}
  560. procedure tx86addnode.second_opmmx;
  561. var
  562. op : TAsmOp;
  563. cmpop : boolean;
  564. mmxbase : tmmxtype;
  565. hreg,
  566. hregister : tregister;
  567. begin
  568. pass_left_right;
  569. cmpop:=false;
  570. op:=A_NOP;
  571. mmxbase:=mmx_type(left.resultdef);
  572. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  573. case nodetype of
  574. addn :
  575. begin
  576. if (cs_mmx_saturation in current_settings.localswitches) then
  577. begin
  578. case mmxbase of
  579. mmxs8bit:
  580. op:=A_PADDSB;
  581. mmxu8bit:
  582. op:=A_PADDUSB;
  583. mmxs16bit,mmxfixed16:
  584. op:=A_PADDSW;
  585. mmxu16bit:
  586. op:=A_PADDUSW;
  587. end;
  588. end
  589. else
  590. begin
  591. case mmxbase of
  592. mmxs8bit,mmxu8bit:
  593. op:=A_PADDB;
  594. mmxs16bit,mmxu16bit,mmxfixed16:
  595. op:=A_PADDW;
  596. mmxs32bit,mmxu32bit:
  597. op:=A_PADDD;
  598. end;
  599. end;
  600. end;
  601. muln :
  602. begin
  603. case mmxbase of
  604. mmxs16bit,mmxu16bit:
  605. op:=A_PMULLW;
  606. mmxfixed16:
  607. op:=A_PMULHW;
  608. end;
  609. end;
  610. subn :
  611. begin
  612. if (cs_mmx_saturation in current_settings.localswitches) then
  613. begin
  614. case mmxbase of
  615. mmxs8bit:
  616. op:=A_PSUBSB;
  617. mmxu8bit:
  618. op:=A_PSUBUSB;
  619. mmxs16bit,mmxfixed16:
  620. op:=A_PSUBSB;
  621. mmxu16bit:
  622. op:=A_PSUBUSW;
  623. end;
  624. end
  625. else
  626. begin
  627. case mmxbase of
  628. mmxs8bit,mmxu8bit:
  629. op:=A_PSUBB;
  630. mmxs16bit,mmxu16bit,mmxfixed16:
  631. op:=A_PSUBW;
  632. mmxs32bit,mmxu32bit:
  633. op:=A_PSUBD;
  634. end;
  635. end;
  636. end;
  637. xorn:
  638. op:=A_PXOR;
  639. orn:
  640. op:=A_POR;
  641. andn:
  642. op:=A_PAND;
  643. else
  644. internalerror(2003042214);
  645. end;
  646. if op = A_NOP then
  647. internalerror(201408201);
  648. { left and right no register? }
  649. { then one must be demanded }
  650. if (left.location.loc<>LOC_MMXREGISTER) then
  651. begin
  652. if (right.location.loc=LOC_MMXREGISTER) then
  653. begin
  654. location_swap(left.location,right.location);
  655. toggleflag(nf_swapped);
  656. end
  657. else
  658. begin
  659. { register variable ? }
  660. if (left.location.loc=LOC_CMMXREGISTER) then
  661. begin
  662. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  663. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  664. end
  665. else
  666. begin
  667. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  668. internalerror(200203245);
  669. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  670. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  671. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  672. end;
  673. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  674. left.location.register:=hregister;
  675. end;
  676. end;
  677. { at this point, left.location.loc should be LOC_MMXREGISTER }
  678. if right.location.loc<>LOC_MMXREGISTER then
  679. begin
  680. if (nodetype=subn) and (nf_swapped in flags) then
  681. begin
  682. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  683. if right.location.loc=LOC_CMMXREGISTER then
  684. begin
  685. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  686. emit_reg_reg(op,S_NO,left.location.register,hreg);
  687. end
  688. else
  689. begin
  690. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  691. internalerror(200203247);
  692. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  693. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  694. emit_reg_reg(op,S_NO,left.location.register,hreg);
  695. end;
  696. location.register:=hreg;
  697. end
  698. else
  699. begin
  700. if (right.location.loc=LOC_CMMXREGISTER) then
  701. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  702. else
  703. begin
  704. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  705. internalerror(200203246);
  706. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  707. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  708. end;
  709. location.register:=left.location.register;
  710. end;
  711. end
  712. else
  713. begin
  714. { right.location=LOC_MMXREGISTER }
  715. if (nodetype=subn) and (nf_swapped in flags) then
  716. begin
  717. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  718. location_swap(left.location,right.location);
  719. toggleflag(nf_swapped);
  720. end
  721. else
  722. begin
  723. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  724. end;
  725. location.register:=left.location.register;
  726. end;
  727. location_freetemp(current_asmdata.CurrAsmList,right.location);
  728. if cmpop then
  729. location_freetemp(current_asmdata.CurrAsmList,left.location);
  730. end;
  731. {$endif SUPPORT_MMX}
  732. {*****************************************************************************
  733. AddFloat
  734. *****************************************************************************}
  735. procedure tx86addnode.second_addfloatsse;
  736. var
  737. op : topcg;
  738. sqr_sum : boolean;
  739. tmp : tnode;
  740. begin
  741. sqr_sum:=false;
  742. if (current_settings.fputype>=fpu_sse3) and
  743. use_vectorfpu(resultdef) and
  744. (nodetype in [addn,subn]) and
  745. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  746. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  747. begin
  748. sqr_sum:=true;
  749. tmp:=tinlinenode(left).left;
  750. tinlinenode(left).left:=nil;
  751. left.free;
  752. left:=tmp;
  753. tmp:=tinlinenode(right).left;
  754. tinlinenode(right).left:=nil;
  755. right.free;
  756. right:=tmp;
  757. end;
  758. pass_left_right;
  759. { fpu operands are always in reversed order on the stack }
  760. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  761. toggleflag(nf_swapped);
  762. if (nf_swapped in flags) then
  763. { can't use swapleftright if both are on the fpu stack, since then }
  764. { both are "R_ST" -> nothing would change -> manually switch }
  765. if (left.location.loc = LOC_FPUREGISTER) and
  766. (right.location.loc = LOC_FPUREGISTER) then
  767. emit_none(A_FXCH,S_NO)
  768. else
  769. swapleftright;
  770. case nodetype of
  771. addn :
  772. op:=OP_ADD;
  773. muln :
  774. op:=OP_MUL;
  775. subn :
  776. op:=OP_SUB;
  777. slashn :
  778. op:=OP_DIV;
  779. else
  780. internalerror(200312231);
  781. end;
  782. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  783. if sqr_sum then
  784. begin
  785. if nf_swapped in flags then
  786. swapleftright;
  787. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  788. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  789. location:=left.location;
  790. if is_double(resultdef) then
  791. begin
  792. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  793. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  794. case nodetype of
  795. addn:
  796. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  797. subn:
  798. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  799. else
  800. internalerror(201108162);
  801. end;
  802. end
  803. else
  804. begin
  805. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  806. { ensure that bits 64..127 contain valid values }
  807. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  808. { the data is now in bits 0..32 and 64..95 }
  809. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  810. case nodetype of
  811. addn:
  812. begin
  813. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  814. end;
  815. subn:
  816. begin
  817. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  818. end;
  819. else
  820. internalerror(201108163);
  821. end;
  822. end
  823. end
  824. { we can use only right as left operand if the operation is commutative }
  825. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  826. begin
  827. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  828. cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.location.size,location.size,right.location.register,location.register,mms_movescalar);
  829. { force floating point reg. location to be written to memory,
  830. we don't force it to mm register because writing to memory
  831. allows probably shorter code because there is no direct fpu->mm register
  832. copy instruction
  833. }
  834. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  835. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  836. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  837. end
  838. else
  839. begin
  840. if nf_swapped in flags then
  841. swapleftright;
  842. { force floating point reg. location to be written to memory,
  843. we don't force it to mm register because writing to memory
  844. allows probably shorter code because there is no direct fpu->mm register
  845. copy instruction
  846. }
  847. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  848. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  849. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  850. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  851. { force floating point reg. location to be written to memory,
  852. we don't force it to mm register because writing to memory
  853. allows probably shorter code because there is no direct fpu->mm register
  854. copy instruction
  855. }
  856. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  857. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  858. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  859. end;
  860. end;
  861. procedure tx86addnode.second_addfloatavx;
  862. var
  863. op : topcg;
  864. sqr_sum : boolean;
  865. {$ifdef dummy}
  866. tmp : tnode;
  867. {$endif dummy}
  868. begin
  869. sqr_sum:=false;
  870. {$ifdef dummy}
  871. if (current_settings.fputype>=fpu_sse3) and
  872. use_vectorfpu(resultdef) and
  873. (nodetype in [addn,subn]) and
  874. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  875. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  876. begin
  877. sqr_sum:=true;
  878. tmp:=tinlinenode(left).left;
  879. tinlinenode(left).left:=nil;
  880. left.free;
  881. left:=tmp;
  882. tmp:=tinlinenode(right).left;
  883. tinlinenode(right).left:=nil;
  884. right.free;
  885. right:=tmp;
  886. end;
  887. {$endif dummy}
  888. pass_left_right;
  889. { fpu operands are always in reversed order on the stack }
  890. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  891. toggleflag(nf_swapped);
  892. if (nf_swapped in flags) then
  893. { can't use swapleftright if both are on the fpu stack, since then }
  894. { both are "R_ST" -> nothing would change -> manually switch }
  895. if (left.location.loc = LOC_FPUREGISTER) and
  896. (right.location.loc = LOC_FPUREGISTER) then
  897. emit_none(A_FXCH,S_NO)
  898. else
  899. swapleftright;
  900. case nodetype of
  901. addn :
  902. op:=OP_ADD;
  903. muln :
  904. op:=OP_MUL;
  905. subn :
  906. op:=OP_SUB;
  907. slashn :
  908. op:=OP_DIV;
  909. else
  910. internalerror(200312231);
  911. end;
  912. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  913. if sqr_sum then
  914. begin
  915. if nf_swapped in flags then
  916. swapleftright;
  917. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  918. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  919. location:=left.location;
  920. if is_double(resultdef) then
  921. begin
  922. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  923. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  924. case nodetype of
  925. addn:
  926. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  927. subn:
  928. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  929. else
  930. internalerror(201108162);
  931. end;
  932. end
  933. else
  934. begin
  935. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  936. { ensure that bits 64..127 contain valid values }
  937. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  938. { the data is now in bits 0..32 and 64..95 }
  939. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  940. case nodetype of
  941. addn:
  942. begin
  943. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  944. end;
  945. subn:
  946. begin
  947. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  948. end;
  949. else
  950. internalerror(201108163);
  951. end;
  952. end
  953. end
  954. { left*2 ? }
  955. else if (nodetype=muln) and is_constrealnode(right) and is_number_float(trealconstnode(right).value_real) and (trealconstnode(right).value_real=2) then
  956. begin
  957. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  958. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  959. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  960. left.location.register,
  961. left.location.register,
  962. location.register,
  963. mms_movescalar);
  964. end
  965. { right*2 ? }
  966. else if (nodetype=muln) and is_constrealnode(left) and is_number_float(trealconstnode(left).value_real) and (trealconstnode(left).value_real=2) then
  967. begin
  968. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,right.location.size);
  969. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  970. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  971. right.location.register,
  972. right.location.register,
  973. location.register,
  974. mms_movescalar);
  975. end
  976. { we can use only right as left operand if the operation is commutative }
  977. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  978. begin
  979. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  980. { force floating point reg. location to be written to memory,
  981. we don't force it to mm register because writing to memory
  982. allows probably shorter code because there is no direct fpu->mm register
  983. copy instruction
  984. }
  985. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  986. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  987. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  988. left.location,
  989. right.location.register,
  990. location.register,
  991. mms_movescalar);
  992. end
  993. else
  994. begin
  995. if (nf_swapped in flags) then
  996. swapleftright;
  997. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  998. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  999. { force floating point reg. location to be written to memory,
  1000. we don't force it to mm register because writing to memory
  1001. allows probably shorter code because there is no direct fpu->mm register
  1002. copy instruction
  1003. }
  1004. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1005. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1006. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  1007. right.location,
  1008. left.location.register,
  1009. location.register,
  1010. mms_movescalar);
  1011. end;
  1012. end;
  1013. function tx86addnode.use_fma : boolean;
  1014. begin
  1015. {$ifndef i8086}
  1016. { test if the result stays in an xmm register, fiddeling with fpu registers and fma makes no sense }
  1017. Result:=use_vectorfpu(resultdef) and
  1018. ((cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[]);
  1019. {$else i8086}
  1020. Result:=inherited use_fma;
  1021. {$endif i8086}
  1022. end;
  1023. procedure tx86addnode.second_cmpfloatvector;
  1024. var
  1025. op : tasmop;
  1026. const
  1027. ops_single: array[boolean] of tasmop = (A_COMISS,A_VCOMISS);
  1028. ops_double: array[boolean] of tasmop = (A_COMISD,A_VCOMISD);
  1029. begin
  1030. if is_single(left.resultdef) then
  1031. op:=ops_single[UseAVX]
  1032. else if is_double(left.resultdef) then
  1033. op:=ops_double[UseAVX]
  1034. else
  1035. internalerror(200402222);
  1036. pass_left_right;
  1037. location_reset(location,LOC_FLAGS,OS_NO);
  1038. { Direct move fpu->mm register is not possible, so force any fpu operands to
  1039. memory (not to mm registers because one of the memory locations can be used
  1040. directly in compare instruction, yielding shorter code) }
  1041. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1042. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  1043. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1044. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1045. if (right.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1046. begin
  1047. case left.location.loc of
  1048. LOC_REFERENCE,LOC_CREFERENCE:
  1049. begin
  1050. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  1051. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  1052. end;
  1053. LOC_MMREGISTER,LOC_CMMREGISTER:
  1054. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  1055. else
  1056. internalerror(200402221);
  1057. end;
  1058. toggleflag(nf_swapped);
  1059. end
  1060. else
  1061. begin
  1062. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1063. case right.location.loc of
  1064. LOC_REFERENCE,LOC_CREFERENCE:
  1065. begin
  1066. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  1067. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  1068. end;
  1069. LOC_MMREGISTER,LOC_CMMREGISTER:
  1070. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  1071. else
  1072. internalerror(200402223);
  1073. end;
  1074. end;
  1075. location.resflags:=getfpuresflags;
  1076. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1077. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1078. end;
  1079. procedure tx86addnode.second_opvector;
  1080. var
  1081. op : topcg;
  1082. begin
  1083. pass_left_right;
  1084. if (nf_swapped in flags) then
  1085. swapleftright;
  1086. case nodetype of
  1087. addn :
  1088. op:=OP_ADD;
  1089. muln :
  1090. op:=OP_MUL;
  1091. subn :
  1092. op:=OP_SUB;
  1093. slashn :
  1094. op:=OP_DIV;
  1095. else
  1096. internalerror(200610071);
  1097. end;
  1098. if fits_in_mm_register(left.resultdef) then
  1099. begin
  1100. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  1101. { we can use only right as left operand if the operation is commutative }
  1102. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  1103. begin
  1104. location.register:=right.location.register;
  1105. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  1106. end
  1107. else
  1108. begin
  1109. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  1110. location.register:=left.location.register;
  1111. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  1112. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  1113. end;
  1114. end
  1115. else
  1116. begin
  1117. { not yet supported }
  1118. internalerror(200610072);
  1119. end
  1120. end;
  1121. procedure tx86addnode.second_addfloat;
  1122. const
  1123. ops_add: array[boolean] of TAsmOp = (A_FADDP,A_FADD);
  1124. ops_mul: array[boolean] of TAsmOp = (A_FMULP,A_FMUL);
  1125. ops_sub: array[boolean] of TAsmOp = (A_FSUBP,A_FSUB);
  1126. ops_rsub: array[boolean] of TAsmOp = (A_FSUBRP,A_FSUBR);
  1127. ops_div: array[boolean] of TAsmOp = (A_FDIVP,A_FDIV);
  1128. ops_rdiv: array[boolean] of TAsmOp = (A_FDIVRP,A_FDIVR);
  1129. var
  1130. op : TAsmOp;
  1131. refnode : tnode;
  1132. hasref : boolean;
  1133. begin
  1134. if use_vectorfpu(resultdef) then
  1135. begin
  1136. if UseAVX then
  1137. second_addfloatavx
  1138. else
  1139. second_addfloatsse;
  1140. exit;
  1141. end;
  1142. pass_left_right;
  1143. prepare_x87_locations(refnode);
  1144. hasref:=assigned(refnode);
  1145. case nodetype of
  1146. addn :
  1147. op:=ops_add[hasref];
  1148. muln :
  1149. op:=ops_mul[hasref];
  1150. subn :
  1151. if (nf_swapped in flags) then
  1152. op:=ops_rsub[hasref]
  1153. else
  1154. op:=ops_sub[hasref];
  1155. slashn :
  1156. if (nf_swapped in flags) then
  1157. op:=ops_rdiv[hasref]
  1158. else
  1159. op:=ops_div[hasref];
  1160. else
  1161. internalerror(2003042214);
  1162. end;
  1163. if hasref then
  1164. emit_ref(op,tcgsize2opsize[refnode.location.size],refnode.location.reference)
  1165. else
  1166. begin
  1167. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  1168. tcgx86(cg).dec_fpu_stack;
  1169. end;
  1170. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  1171. location.register:=NR_ST;
  1172. end;
  1173. procedure tx86addnode.second_cmpfloat;
  1174. {$ifdef i8086}
  1175. var
  1176. tmpref: treference;
  1177. {$endif i8086}
  1178. begin
  1179. if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
  1180. begin
  1181. second_cmpfloatvector;
  1182. exit;
  1183. end;
  1184. pass_left_right;
  1185. force_left_and_right_fpureg;
  1186. {$ifndef x86_64}
  1187. if current_settings.cputype<cpu_Pentium2 then
  1188. begin
  1189. emit_none(A_FCOMPP,S_NO);
  1190. tcgx86(cg).dec_fpu_stack;
  1191. tcgx86(cg).dec_fpu_stack;
  1192. { load fpu flags }
  1193. {$ifdef i8086}
  1194. if current_settings.cputype < cpu_286 then
  1195. begin
  1196. tg.gettemp(current_asmdata.CurrAsmList,2,2,tt_normal,tmpref);
  1197. emit_ref(A_FSTSW,S_NO,tmpref);
  1198. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1199. inc(tmpref.offset);
  1200. emit_ref_reg(A_MOV,S_B,tmpref,NR_AH);
  1201. dec(tmpref.offset);
  1202. emit_none(A_SAHF,S_NO);
  1203. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1204. tg.ungettemp(current_asmdata.CurrAsmList,tmpref);
  1205. end
  1206. else
  1207. {$endif i8086}
  1208. begin
  1209. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1210. emit_reg(A_FNSTSW,S_NO,NR_AX);
  1211. emit_none(A_SAHF,S_NO);
  1212. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1213. end;
  1214. end
  1215. else
  1216. {$endif x86_64}
  1217. begin
  1218. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  1219. { fcomip pops only one fpu register }
  1220. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  1221. tcgx86(cg).dec_fpu_stack;
  1222. tcgx86(cg).dec_fpu_stack;
  1223. end;
  1224. location_reset(location,LOC_FLAGS,OS_NO);
  1225. location.resflags:=getfpuresflags;
  1226. end;
  1227. {*****************************************************************************
  1228. Add64bit
  1229. *****************************************************************************}
  1230. procedure tx86addnode.second_add64bit;
  1231. begin
  1232. {$ifdef cpu64bitalu}
  1233. second_addordinal;
  1234. {$else cpu64bitalu}
  1235. { must be implemented separate }
  1236. internalerror(200402042);
  1237. {$endif cpu64bitalu}
  1238. end;
  1239. procedure tx86addnode.second_cmp64bit;
  1240. begin
  1241. {$ifdef cpu64bitalu}
  1242. second_cmpordinal;
  1243. {$else cpu64bitalu}
  1244. { must be implemented separate }
  1245. internalerror(200402043);
  1246. {$endif cpu64bitalu}
  1247. end;
  1248. {*****************************************************************************
  1249. AddOrdinal
  1250. *****************************************************************************}
  1251. procedure tx86addnode.second_cmpordinal;
  1252. var
  1253. opdef : tdef;
  1254. opsize : tcgsize;
  1255. unsigned : boolean;
  1256. begin
  1257. unsigned:=not(is_signed(left.resultdef)) or
  1258. not(is_signed(right.resultdef));
  1259. opdef:=left.resultdef;
  1260. opsize:=def_cgsize(opdef);
  1261. pass_left_right;
  1262. if (right.location.loc=LOC_CONSTANT) and
  1263. (left.location.loc in [LOC_REFERENCE, LOC_CREFERENCE])
  1264. {$ifdef x86_64}
  1265. and ((not (opsize in [OS_64,OS_S64])) or (
  1266. (right.location.value>=low(longint)) and (right.location.value<=high(longint))
  1267. ))
  1268. {$endif x86_64}
  1269. then
  1270. begin
  1271. emit_const_ref(A_CMP, TCGSize2Opsize[opsize], right.location.value, left.location.reference);
  1272. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1273. end
  1274. else
  1275. begin
  1276. left_must_be_reg(opdef,opsize,false);
  1277. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  1278. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1279. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1280. end;
  1281. location_reset(location,LOC_FLAGS,OS_NO);
  1282. location.resflags:=getresflags(unsigned);
  1283. end;
  1284. begin
  1285. caddnode:=tx86addnode;
  1286. end.