nx86add.pas 54 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. symtype,
  22. cgbase,
  23. cpubase,
  24. node,nadd,ncgadd;
  25. type
  26. tx86addnode = class(tcgaddnode)
  27. protected
  28. function getresflags(unsigned : boolean) : tresflags;
  29. function getfpuresflags : tresflags;
  30. procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  31. procedure force_left_and_right_fpureg;
  32. procedure prepare_x87_locations(out refnode: tnode);
  33. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  34. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  35. procedure second_cmpfloatvector;
  36. procedure second_addfloatsse;
  37. procedure second_addfloatavx;
  38. public
  39. function use_fma : boolean;override;
  40. procedure second_addfloat;override;
  41. {$ifndef i8086}
  42. procedure second_addsmallset;override;
  43. {$endif not i8086}
  44. procedure second_add64bit;override;
  45. procedure second_cmpfloat;override;
  46. procedure second_cmpsmallset;override;
  47. procedure second_cmp64bit;override;
  48. procedure second_cmpordinal;override;
  49. {$ifdef SUPPORT_MMX}
  50. procedure second_opmmx;override;
  51. {$endif SUPPORT_MMX}
  52. procedure second_opvector;override;
  53. end;
  54. implementation
  55. uses
  56. globtype,globals,systems,
  57. verbose,cutils,
  58. cpuinfo,
  59. aasmbase,aasmtai,aasmdata,aasmcpu,
  60. symconst,symdef,
  61. cgobj,hlcgobj,cgx86,cga,cgutils,
  62. paramgr,tgobj,ncgutil,
  63. ncon,nset,ninl,
  64. defutil;
  65. {*****************************************************************************
  66. Helpers
  67. *****************************************************************************}
  68. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  69. var
  70. power : longint;
  71. hl4 : tasmlabel;
  72. r : Tregister;
  73. href : treference;
  74. begin
  75. { at this point, left.location.loc should be LOC_REGISTER }
  76. if right.location.loc=LOC_REGISTER then
  77. begin
  78. { right.location is a LOC_REGISTER }
  79. { when swapped another result register }
  80. if (nodetype=subn) and (nf_swapped in flags) then
  81. begin
  82. if extra_not then
  83. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  84. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  85. { newly swapped also set swapped flag }
  86. location_swap(left.location,right.location);
  87. toggleflag(nf_swapped);
  88. end
  89. else
  90. begin
  91. if extra_not then
  92. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  93. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  94. location_swap(left.location,right.location);
  95. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  96. end;
  97. end
  98. else
  99. begin
  100. { right.location is not a LOC_REGISTER }
  101. if (nodetype=subn) and (nf_swapped in flags) then
  102. begin
  103. if extra_not then
  104. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  105. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  106. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  107. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  108. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  109. end
  110. else
  111. begin
  112. { Optimizations when right.location is a constant value }
  113. if (op=A_CMP) and
  114. (nodetype in [equaln,unequaln]) and
  115. (right.location.loc=LOC_CONSTANT) and
  116. (right.location.value=0) then
  117. begin
  118. { 'test $-1,%reg' is transformable into 'test $-1,spilltemp' if %reg needs
  119. spilling, while 'test %reg,%reg' still requires loading into register.
  120. If spilling is not necessary, it is changed back into 'test %reg,%reg' by
  121. peephole optimizer (this optimization is currently available only for i386). }
  122. if (target_info.cpu=cpu_i386) then
  123. emit_const_reg(A_TEST,TCGSize2Opsize[opsize],aint(-1),left.location.register)
  124. else
  125. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  126. end
  127. else
  128. if (op=A_ADD) and
  129. (right.location.loc=LOC_CONSTANT) and
  130. (right.location.value=1) and
  131. not(cs_check_overflow in current_settings.localswitches) then
  132. begin
  133. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  134. end
  135. else
  136. if (op=A_SUB) and
  137. (right.location.loc=LOC_CONSTANT) and
  138. (right.location.value=1) and
  139. not(cs_check_overflow in current_settings.localswitches) and
  140. UseIncDec then
  141. begin
  142. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  143. end
  144. else
  145. if (op=A_IMUL) and
  146. (right.location.loc=LOC_CONSTANT) and
  147. (ispowerof2(int64(right.location.value),power)) and
  148. not(cs_check_overflow in current_settings.localswitches) then
  149. begin
  150. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  151. end
  152. else if (op=A_IMUL) and
  153. (right.location.loc=LOC_CONSTANT) and
  154. (right.location.value>1) and (ispowerof2(int64(right.location.value)-1,power)) and
  155. (power in [1..3]) and
  156. not(cs_check_overflow in current_settings.localswitches) then
  157. begin
  158. reference_reset_base(href,left.location.register,0,0);
  159. href.index:=left.location.register;
  160. href.scalefactor:=int64(right.location.value)-1;
  161. left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  162. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_LEA,TCgSize2OpSize[opsize],href,left.location.register));
  163. end
  164. else
  165. begin
  166. if extra_not then
  167. begin
  168. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  169. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  170. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  171. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  172. end
  173. else
  174. begin
  175. emit_op_right_left(op,opsize);
  176. end;
  177. end;
  178. end;
  179. end;
  180. { only in case of overflow operations }
  181. { produce overflow code }
  182. { we must put it here directly, because sign of operation }
  183. { is in unsigned VAR!! }
  184. if mboverflow then
  185. begin
  186. if cs_check_overflow in current_settings.localswitches then
  187. begin
  188. current_asmdata.getjumplabel(hl4);
  189. if unsigned then
  190. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  191. else
  192. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  193. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  194. cg.a_label(current_asmdata.CurrAsmList,hl4);
  195. end;
  196. end;
  197. end;
  198. procedure tx86addnode.left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  199. begin
  200. { left location is not a register? }
  201. if (left.location.loc<>LOC_REGISTER) then
  202. begin
  203. { if right is register then we can swap the locations }
  204. if (not noswap) and
  205. (right.location.loc=LOC_REGISTER) then
  206. begin
  207. location_swap(left.location,right.location);
  208. toggleflag(nf_swapped);
  209. end
  210. else
  211. begin
  212. { maybe we can reuse a constant register when the
  213. operation is a comparison that doesn't change the
  214. value of the register }
  215. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  216. end;
  217. end;
  218. if (right.location.loc<>LOC_CONSTANT) and
  219. (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then
  220. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  221. if (left.location.loc<>LOC_CONSTANT) and
  222. (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then
  223. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  224. end;
  225. procedure tx86addnode.force_left_and_right_fpureg;
  226. begin
  227. if (right.location.loc<>LOC_FPUREGISTER) then
  228. begin
  229. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  230. if (left.location.loc<>LOC_FPUREGISTER) then
  231. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  232. else
  233. { left was on the stack => swap }
  234. toggleflag(nf_swapped);
  235. end
  236. { the nominator in st0 }
  237. else if (left.location.loc<>LOC_FPUREGISTER) then
  238. begin
  239. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  240. end
  241. else
  242. begin
  243. { fpu operands are always in the wrong order on the stack }
  244. toggleflag(nf_swapped);
  245. end;
  246. end;
  247. { Makes sides suitable for executing an x87 instruction:
  248. if either side is OS_F32/OS_F64-sized LOC_REFERENCE, it is returned in 'refnode'
  249. everything else is loaded to FPU stack. }
  250. procedure tx86addnode.prepare_x87_locations(out refnode: tnode);
  251. begin
  252. refnode:=nil;
  253. { later on, no mm registers are allowed, so transfer everything to memory here
  254. below it is loaded into an fpu register if neede }
  255. if left.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then
  256. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  257. if right.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then
  258. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  259. case ord(left.location.loc=LOC_FPUREGISTER)+ord(right.location.loc=LOC_FPUREGISTER) of
  260. 0:
  261. begin
  262. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  263. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  264. InternalError(2013090803);
  265. if (left.location.size in [OS_F32,OS_F64]) then
  266. begin
  267. refnode:=left;
  268. toggleflag(nf_swapped);
  269. end
  270. else
  271. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  272. end;
  273. 1:
  274. begin { if left is on the stack then swap. }
  275. if (left.location.loc=LOC_FPUREGISTER) then
  276. refnode:=right
  277. else
  278. refnode:=left;
  279. if not(refnode.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  280. InternalError(2013090801);
  281. if not (refnode.location.size in [OS_F32,OS_F64]) then
  282. begin
  283. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,refnode.location,refnode.resultdef,false);
  284. if (refnode=right) then
  285. toggleflag(nf_swapped);
  286. refnode:=nil;
  287. end
  288. else
  289. begin
  290. if (refnode=left) then
  291. toggleflag(nf_swapped);
  292. end;
  293. end;
  294. 2: { fpu operands are always in the wrong order on the stack }
  295. toggleflag(nf_swapped);
  296. else
  297. InternalError(2013090802);
  298. end;
  299. end;
  300. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  301. {$ifdef x86_64}
  302. var
  303. tmpreg : tregister;
  304. {$endif x86_64}
  305. begin
  306. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  307. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  308. { left must be a register }
  309. case right.location.loc of
  310. LOC_REGISTER,
  311. LOC_CREGISTER :
  312. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  313. LOC_REFERENCE,
  314. LOC_CREFERENCE :
  315. begin
  316. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  317. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  318. end;
  319. LOC_CONSTANT :
  320. begin
  321. {$ifdef x86_64}
  322. { x86_64 only supports signed 32 bits constants directly }
  323. if (opsize in [OS_S64,OS_64]) and
  324. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  325. begin
  326. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  327. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  328. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  329. end
  330. else
  331. {$endif x86_64}
  332. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  333. end;
  334. else
  335. internalerror(200203232);
  336. end;
  337. end;
  338. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  339. begin
  340. case nodetype of
  341. equaln : getresflags:=F_E;
  342. unequaln : getresflags:=F_NE;
  343. else
  344. if not(unsigned) then
  345. begin
  346. if nf_swapped in flags then
  347. case nodetype of
  348. ltn : getresflags:=F_G;
  349. lten : getresflags:=F_GE;
  350. gtn : getresflags:=F_L;
  351. gten : getresflags:=F_LE;
  352. else
  353. internalerror(2013120105);
  354. end
  355. else
  356. case nodetype of
  357. ltn : getresflags:=F_L;
  358. lten : getresflags:=F_LE;
  359. gtn : getresflags:=F_G;
  360. gten : getresflags:=F_GE;
  361. else
  362. internalerror(2013120106);
  363. end;
  364. end
  365. else
  366. begin
  367. if nf_swapped in flags then
  368. case nodetype of
  369. ltn : getresflags:=F_A;
  370. lten : getresflags:=F_AE;
  371. gtn : getresflags:=F_B;
  372. gten : getresflags:=F_BE;
  373. else
  374. internalerror(2013120107);
  375. end
  376. else
  377. case nodetype of
  378. ltn : getresflags:=F_B;
  379. lten : getresflags:=F_BE;
  380. gtn : getresflags:=F_A;
  381. gten : getresflags:=F_AE;
  382. else
  383. internalerror(2013120108);
  384. end;
  385. end;
  386. end;
  387. end;
  388. function tx86addnode.getfpuresflags : tresflags;
  389. begin
  390. if (nodetype=equaln) then
  391. result:=F_FE
  392. else if (nodetype=unequaln) then
  393. result:=F_FNE
  394. else if (nf_swapped in flags) then
  395. case nodetype of
  396. ltn : result:=F_FA;
  397. lten : result:=F_FAE;
  398. gtn : result:=F_FB;
  399. gten : result:=F_FBE;
  400. else
  401. internalerror(2014031402);
  402. end
  403. else
  404. case nodetype of
  405. ltn : result:=F_FB;
  406. lten : result:=F_FBE;
  407. gtn : result:=F_FA;
  408. gten : result:=F_FAE;
  409. else
  410. internalerror(2014031403);
  411. end;
  412. end;
  413. {*****************************************************************************
  414. AddSmallSet
  415. *****************************************************************************}
  416. {$ifndef i8086}
  417. procedure tx86addnode.second_addsmallset;
  418. var
  419. setbase : aint;
  420. opdef : tdef;
  421. opsize : TCGSize;
  422. op : TAsmOp;
  423. extra_not,
  424. noswap : boolean;
  425. all_member_optimization:boolean;
  426. begin
  427. pass_left_right;
  428. noswap:=false;
  429. extra_not:=false;
  430. all_member_optimization:=false;
  431. opdef:=resultdef;
  432. opsize:=int_cgsize(opdef.size);
  433. if (left.resultdef.typ=setdef) then
  434. setbase:=tsetdef(left.resultdef).setbase
  435. else
  436. setbase:=tsetdef(right.resultdef).setbase;
  437. case nodetype of
  438. addn :
  439. begin
  440. { adding elements is not commutative }
  441. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  442. swapleftright;
  443. { are we adding set elements ? }
  444. if right.nodetype=setelementn then
  445. begin
  446. { no range support for smallsets! }
  447. if assigned(tsetelementnode(right).right) then
  448. internalerror(43244);
  449. { btsb isn't supported }
  450. if opsize=OS_8 then
  451. begin
  452. opsize:=OS_32;
  453. opdef:=u32inttype;
  454. end;
  455. { bts requires both elements to be registers }
  456. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  457. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  458. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,right.location,setbase);
  459. op:=A_BTS;
  460. noswap:=true;
  461. end
  462. else
  463. op:=A_OR;
  464. end;
  465. symdifn :
  466. op:=A_XOR;
  467. muln :
  468. op:=A_AND;
  469. subn :
  470. begin
  471. op:=A_AND;
  472. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  473. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  474. all_member_optimization:=true;
  475. if (not(nf_swapped in flags)) and
  476. (right.location.loc=LOC_CONSTANT) then
  477. right.location.value := not(right.location.value)
  478. else if (nf_swapped in flags) and
  479. (left.location.loc=LOC_CONSTANT) then
  480. left.location.value := not(left.location.value)
  481. else
  482. extra_not:=true;
  483. end;
  484. xorn :
  485. op:=A_XOR;
  486. orn :
  487. op:=A_OR;
  488. andn :
  489. op:=A_AND;
  490. else
  491. internalerror(2003042215);
  492. end;
  493. if all_member_optimization then
  494. begin
  495. {A set expression [0..31]-x can be implemented with a simple NOT.}
  496. if nf_swapped in flags then
  497. begin
  498. { newly swapped also set swapped flag }
  499. location_swap(left.location,right.location);
  500. toggleflag(nf_swapped);
  501. end;
  502. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,false);
  503. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  504. location:=right.location;
  505. end
  506. else
  507. begin
  508. { left must be a register }
  509. left_must_be_reg(opdef,opsize,noswap);
  510. emit_generic_code(op,opsize,true,extra_not,false);
  511. location_freetemp(current_asmdata.CurrAsmList,right.location);
  512. { left is always a register and contains the result }
  513. location:=left.location;
  514. end;
  515. { fix the changed opsize we did above because of the missing btsb }
  516. if opsize<>int_cgsize(resultdef.size) then
  517. hlcg.location_force_reg(current_asmdata.CurrAsmList,location,opdef,cgsize_orddef(int_cgsize(resultdef.size)),false);
  518. end;
  519. {$endif not i8086}
  520. procedure tx86addnode.second_cmpsmallset;
  521. var
  522. opdef : tdef;
  523. opsize : TCGSize;
  524. op : TAsmOp;
  525. begin
  526. pass_left_right;
  527. opdef:=left.resultdef;
  528. opsize:=int_cgsize(opdef.size);
  529. case nodetype of
  530. equaln,
  531. unequaln :
  532. op:=A_CMP;
  533. lten,gten:
  534. begin
  535. if (not(nf_swapped in flags) and (nodetype = lten)) or
  536. ((nf_swapped in flags) and (nodetype = gten)) then
  537. swapleftright;
  538. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  539. emit_op_right_left(A_AND,opsize);
  540. op:=A_CMP;
  541. { warning: ugly hack, we need a JE so change the node to equaln }
  542. nodetype:=equaln;
  543. end;
  544. else
  545. internalerror(2003042215);
  546. end;
  547. { left must be a register }
  548. left_must_be_reg(opdef,opsize,false);
  549. emit_generic_code(op,opsize,true,false,false);
  550. location_freetemp(current_asmdata.CurrAsmList,right.location);
  551. location_freetemp(current_asmdata.CurrAsmList,left.location);
  552. location_reset(location,LOC_FLAGS,OS_NO);
  553. location.resflags:=getresflags(true);
  554. end;
  555. {*****************************************************************************
  556. AddMMX
  557. *****************************************************************************}
  558. {$ifdef SUPPORT_MMX}
  559. procedure tx86addnode.second_opmmx;
  560. var
  561. op : TAsmOp;
  562. cmpop : boolean;
  563. mmxbase : tmmxtype;
  564. hreg,
  565. hregister : tregister;
  566. begin
  567. pass_left_right;
  568. cmpop:=false;
  569. op:=A_NOP;
  570. mmxbase:=mmx_type(left.resultdef);
  571. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  572. case nodetype of
  573. addn :
  574. begin
  575. if (cs_mmx_saturation in current_settings.localswitches) then
  576. begin
  577. case mmxbase of
  578. mmxs8bit:
  579. op:=A_PADDSB;
  580. mmxu8bit:
  581. op:=A_PADDUSB;
  582. mmxs16bit,mmxfixed16:
  583. op:=A_PADDSW;
  584. mmxu16bit:
  585. op:=A_PADDUSW;
  586. end;
  587. end
  588. else
  589. begin
  590. case mmxbase of
  591. mmxs8bit,mmxu8bit:
  592. op:=A_PADDB;
  593. mmxs16bit,mmxu16bit,mmxfixed16:
  594. op:=A_PADDW;
  595. mmxs32bit,mmxu32bit:
  596. op:=A_PADDD;
  597. end;
  598. end;
  599. end;
  600. muln :
  601. begin
  602. case mmxbase of
  603. mmxs16bit,mmxu16bit:
  604. op:=A_PMULLW;
  605. mmxfixed16:
  606. op:=A_PMULHW;
  607. end;
  608. end;
  609. subn :
  610. begin
  611. if (cs_mmx_saturation in current_settings.localswitches) then
  612. begin
  613. case mmxbase of
  614. mmxs8bit:
  615. op:=A_PSUBSB;
  616. mmxu8bit:
  617. op:=A_PSUBUSB;
  618. mmxs16bit,mmxfixed16:
  619. op:=A_PSUBSB;
  620. mmxu16bit:
  621. op:=A_PSUBUSW;
  622. end;
  623. end
  624. else
  625. begin
  626. case mmxbase of
  627. mmxs8bit,mmxu8bit:
  628. op:=A_PSUBB;
  629. mmxs16bit,mmxu16bit,mmxfixed16:
  630. op:=A_PSUBW;
  631. mmxs32bit,mmxu32bit:
  632. op:=A_PSUBD;
  633. end;
  634. end;
  635. end;
  636. xorn:
  637. op:=A_PXOR;
  638. orn:
  639. op:=A_POR;
  640. andn:
  641. op:=A_PAND;
  642. else
  643. internalerror(2003042214);
  644. end;
  645. if op = A_NOP then
  646. internalerror(201408201);
  647. { left and right no register? }
  648. { then one must be demanded }
  649. if (left.location.loc<>LOC_MMXREGISTER) then
  650. begin
  651. if (right.location.loc=LOC_MMXREGISTER) then
  652. begin
  653. location_swap(left.location,right.location);
  654. toggleflag(nf_swapped);
  655. end
  656. else
  657. begin
  658. { register variable ? }
  659. if (left.location.loc=LOC_CMMXREGISTER) then
  660. begin
  661. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  662. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  663. end
  664. else
  665. begin
  666. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  667. internalerror(200203245);
  668. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  669. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  670. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  671. end;
  672. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  673. left.location.register:=hregister;
  674. end;
  675. end;
  676. { at this point, left.location.loc should be LOC_MMXREGISTER }
  677. if right.location.loc<>LOC_MMXREGISTER then
  678. begin
  679. if (nodetype=subn) and (nf_swapped in flags) then
  680. begin
  681. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  682. if right.location.loc=LOC_CMMXREGISTER then
  683. begin
  684. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  685. emit_reg_reg(op,S_NO,left.location.register,hreg);
  686. end
  687. else
  688. begin
  689. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  690. internalerror(200203247);
  691. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  692. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  693. emit_reg_reg(op,S_NO,left.location.register,hreg);
  694. end;
  695. location.register:=hreg;
  696. end
  697. else
  698. begin
  699. if (right.location.loc=LOC_CMMXREGISTER) then
  700. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  701. else
  702. begin
  703. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  704. internalerror(200203246);
  705. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  706. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  707. end;
  708. location.register:=left.location.register;
  709. end;
  710. end
  711. else
  712. begin
  713. { right.location=LOC_MMXREGISTER }
  714. if (nodetype=subn) and (nf_swapped in flags) then
  715. begin
  716. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  717. location_swap(left.location,right.location);
  718. toggleflag(nf_swapped);
  719. end
  720. else
  721. begin
  722. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  723. end;
  724. location.register:=left.location.register;
  725. end;
  726. location_freetemp(current_asmdata.CurrAsmList,right.location);
  727. if cmpop then
  728. location_freetemp(current_asmdata.CurrAsmList,left.location);
  729. end;
  730. {$endif SUPPORT_MMX}
  731. {*****************************************************************************
  732. AddFloat
  733. *****************************************************************************}
  734. procedure tx86addnode.second_addfloatsse;
  735. var
  736. op : topcg;
  737. sqr_sum : boolean;
  738. tmp : tnode;
  739. begin
  740. sqr_sum:=false;
  741. if (current_settings.fputype>=fpu_sse3) and
  742. use_vectorfpu(resultdef) and
  743. (nodetype in [addn,subn]) and
  744. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  745. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  746. begin
  747. sqr_sum:=true;
  748. tmp:=tinlinenode(left).left;
  749. tinlinenode(left).left:=nil;
  750. left.free;
  751. left:=tmp;
  752. tmp:=tinlinenode(right).left;
  753. tinlinenode(right).left:=nil;
  754. right.free;
  755. right:=tmp;
  756. end;
  757. pass_left_right;
  758. { fpu operands are always in reversed order on the stack }
  759. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  760. toggleflag(nf_swapped);
  761. if (nf_swapped in flags) then
  762. { can't use swapleftright if both are on the fpu stack, since then }
  763. { both are "R_ST" -> nothing would change -> manually switch }
  764. if (left.location.loc = LOC_FPUREGISTER) and
  765. (right.location.loc = LOC_FPUREGISTER) then
  766. emit_none(A_FXCH,S_NO)
  767. else
  768. swapleftright;
  769. case nodetype of
  770. addn :
  771. op:=OP_ADD;
  772. muln :
  773. op:=OP_MUL;
  774. subn :
  775. op:=OP_SUB;
  776. slashn :
  777. op:=OP_DIV;
  778. else
  779. internalerror(200312231);
  780. end;
  781. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  782. if sqr_sum then
  783. begin
  784. if nf_swapped in flags then
  785. swapleftright;
  786. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  787. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  788. location:=left.location;
  789. if is_double(resultdef) then
  790. begin
  791. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  792. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  793. case nodetype of
  794. addn:
  795. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  796. subn:
  797. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  798. else
  799. internalerror(201108162);
  800. end;
  801. end
  802. else
  803. begin
  804. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  805. { ensure that bits 64..127 contain valid values }
  806. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  807. { the data is now in bits 0..32 and 64..95 }
  808. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  809. case nodetype of
  810. addn:
  811. begin
  812. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  813. end;
  814. subn:
  815. begin
  816. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  817. end;
  818. else
  819. internalerror(201108163);
  820. end;
  821. end
  822. end
  823. { we can use only right as left operand if the operation is commutative }
  824. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  825. begin
  826. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  827. cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.location.size,location.size,right.location.register,location.register,mms_movescalar);
  828. { force floating point reg. location to be written to memory,
  829. we don't force it to mm register because writing to memory
  830. allows probably shorter code because there is no direct fpu->mm register
  831. copy instruction
  832. }
  833. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  834. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  835. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  836. end
  837. else
  838. begin
  839. if nf_swapped in flags then
  840. swapleftright;
  841. { force floating point reg. location to be written to memory,
  842. we don't force it to mm register because writing to memory
  843. allows probably shorter code because there is no direct fpu->mm register
  844. copy instruction
  845. }
  846. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  847. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  848. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  849. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  850. { force floating point reg. location to be written to memory,
  851. we don't force it to mm register because writing to memory
  852. allows probably shorter code because there is no direct fpu->mm register
  853. copy instruction
  854. }
  855. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  856. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  857. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  858. end;
  859. end;
  860. procedure tx86addnode.second_addfloatavx;
  861. var
  862. op : topcg;
  863. sqr_sum : boolean;
  864. tmp : tnode;
  865. begin
  866. sqr_sum:=false;
  867. {$ifdef dummy}
  868. if (current_settings.fputype>=fpu_sse3) and
  869. use_vectorfpu(resultdef) and
  870. (nodetype in [addn,subn]) and
  871. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  872. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  873. begin
  874. sqr_sum:=true;
  875. tmp:=tinlinenode(left).left;
  876. tinlinenode(left).left:=nil;
  877. left.free;
  878. left:=tmp;
  879. tmp:=tinlinenode(right).left;
  880. tinlinenode(right).left:=nil;
  881. right.free;
  882. right:=tmp;
  883. end;
  884. {$endif dummy}
  885. pass_left_right;
  886. { fpu operands are always in reversed order on the stack }
  887. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  888. toggleflag(nf_swapped);
  889. if (nf_swapped in flags) then
  890. { can't use swapleftright if both are on the fpu stack, since then }
  891. { both are "R_ST" -> nothing would change -> manually switch }
  892. if (left.location.loc = LOC_FPUREGISTER) and
  893. (right.location.loc = LOC_FPUREGISTER) then
  894. emit_none(A_FXCH,S_NO)
  895. else
  896. swapleftright;
  897. case nodetype of
  898. addn :
  899. op:=OP_ADD;
  900. muln :
  901. op:=OP_MUL;
  902. subn :
  903. op:=OP_SUB;
  904. slashn :
  905. op:=OP_DIV;
  906. else
  907. internalerror(200312231);
  908. end;
  909. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  910. if sqr_sum then
  911. begin
  912. if nf_swapped in flags then
  913. swapleftright;
  914. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  915. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  916. location:=left.location;
  917. if is_double(resultdef) then
  918. begin
  919. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  920. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  921. case nodetype of
  922. addn:
  923. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  924. subn:
  925. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  926. else
  927. internalerror(201108162);
  928. end;
  929. end
  930. else
  931. begin
  932. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  933. { ensure that bits 64..127 contain valid values }
  934. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  935. { the data is now in bits 0..32 and 64..95 }
  936. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  937. case nodetype of
  938. addn:
  939. begin
  940. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  941. end;
  942. subn:
  943. begin
  944. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  945. end;
  946. else
  947. internalerror(201108163);
  948. end;
  949. end
  950. end
  951. { left*2 ? }
  952. else if (nodetype=muln) and is_constrealnode(right) and is_number_float(trealconstnode(right).value_real) and (trealconstnode(right).value_real=2) then
  953. begin
  954. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  955. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  956. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  957. left.location.register,
  958. left.location.register,
  959. location.register,
  960. mms_movescalar);
  961. end
  962. { right*2 ? }
  963. else if (nodetype=muln) and is_constrealnode(left) and is_number_float(trealconstnode(left).value_real) and (trealconstnode(left).value_real=2) then
  964. begin
  965. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,right.location.size);
  966. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  967. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  968. right.location.register,
  969. right.location.register,
  970. location.register,
  971. mms_movescalar);
  972. end
  973. { we can use only right as left operand if the operation is commutative }
  974. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  975. begin
  976. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  977. { force floating point reg. location to be written to memory,
  978. we don't force it to mm register because writing to memory
  979. allows probably shorter code because there is no direct fpu->mm register
  980. copy instruction
  981. }
  982. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  983. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  984. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  985. left.location,
  986. right.location.register,
  987. location.register,
  988. mms_movescalar);
  989. end
  990. else
  991. begin
  992. if (nf_swapped in flags) then
  993. swapleftright;
  994. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  995. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  996. { force floating point reg. location to be written to memory,
  997. we don't force it to mm register because writing to memory
  998. allows probably shorter code because there is no direct fpu->mm register
  999. copy instruction
  1000. }
  1001. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1002. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1003. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  1004. right.location,
  1005. left.location.register,
  1006. location.register,
  1007. mms_movescalar);
  1008. end;
  1009. end;
  1010. function tx86addnode.use_fma : boolean;
  1011. begin
  1012. {$ifndef i8086}
  1013. { test if the result stays in an xmm register, fiddeling with fpu registers and fma makes no sense }
  1014. Result:=use_vectorfpu(resultdef) and
  1015. ((cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[]);
  1016. {$else i8086}
  1017. Result:=inherited use_fma;
  1018. {$endif i8086}
  1019. end;
  1020. procedure tx86addnode.second_cmpfloatvector;
  1021. var
  1022. op : tasmop;
  1023. const
  1024. ops_single: array[boolean] of tasmop = (A_COMISS,A_VCOMISS);
  1025. ops_double: array[boolean] of tasmop = (A_COMISD,A_VCOMISD);
  1026. begin
  1027. if is_single(left.resultdef) then
  1028. op:=ops_single[UseAVX]
  1029. else if is_double(left.resultdef) then
  1030. op:=ops_double[UseAVX]
  1031. else
  1032. internalerror(200402222);
  1033. pass_left_right;
  1034. location_reset(location,LOC_FLAGS,OS_NO);
  1035. { Direct move fpu->mm register is not possible, so force any fpu operands to
  1036. memory (not to mm registers because one of the memory locations can be used
  1037. directly in compare instruction, yielding shorter code) }
  1038. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1039. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  1040. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1041. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1042. if (right.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1043. begin
  1044. case left.location.loc of
  1045. LOC_REFERENCE,LOC_CREFERENCE:
  1046. begin
  1047. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  1048. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  1049. end;
  1050. LOC_MMREGISTER,LOC_CMMREGISTER:
  1051. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  1052. else
  1053. internalerror(200402221);
  1054. end;
  1055. toggleflag(nf_swapped);
  1056. end
  1057. else
  1058. begin
  1059. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1060. case right.location.loc of
  1061. LOC_REFERENCE,LOC_CREFERENCE:
  1062. begin
  1063. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  1064. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  1065. end;
  1066. LOC_MMREGISTER,LOC_CMMREGISTER:
  1067. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  1068. else
  1069. internalerror(200402223);
  1070. end;
  1071. end;
  1072. location.resflags:=getfpuresflags;
  1073. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1074. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1075. end;
  1076. procedure tx86addnode.second_opvector;
  1077. var
  1078. op : topcg;
  1079. begin
  1080. pass_left_right;
  1081. if (nf_swapped in flags) then
  1082. swapleftright;
  1083. case nodetype of
  1084. addn :
  1085. op:=OP_ADD;
  1086. muln :
  1087. op:=OP_MUL;
  1088. subn :
  1089. op:=OP_SUB;
  1090. slashn :
  1091. op:=OP_DIV;
  1092. else
  1093. internalerror(200610071);
  1094. end;
  1095. if fits_in_mm_register(left.resultdef) then
  1096. begin
  1097. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  1098. { we can use only right as left operand if the operation is commutative }
  1099. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  1100. begin
  1101. location.register:=right.location.register;
  1102. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  1103. end
  1104. else
  1105. begin
  1106. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  1107. location.register:=left.location.register;
  1108. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  1109. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  1110. end;
  1111. end
  1112. else
  1113. begin
  1114. { not yet supported }
  1115. internalerror(200610072);
  1116. end
  1117. end;
  1118. procedure tx86addnode.second_addfloat;
  1119. const
  1120. ops_add: array[boolean] of TAsmOp = (A_FADDP,A_FADD);
  1121. ops_mul: array[boolean] of TAsmOp = (A_FMULP,A_FMUL);
  1122. ops_sub: array[boolean] of TAsmOp = (A_FSUBP,A_FSUB);
  1123. ops_rsub: array[boolean] of TAsmOp = (A_FSUBRP,A_FSUBR);
  1124. ops_div: array[boolean] of TAsmOp = (A_FDIVP,A_FDIV);
  1125. ops_rdiv: array[boolean] of TAsmOp = (A_FDIVRP,A_FDIVR);
  1126. var
  1127. op : TAsmOp;
  1128. refnode : tnode;
  1129. hasref : boolean;
  1130. begin
  1131. if use_vectorfpu(resultdef) then
  1132. begin
  1133. if UseAVX then
  1134. second_addfloatavx
  1135. else
  1136. second_addfloatsse;
  1137. exit;
  1138. end;
  1139. pass_left_right;
  1140. prepare_x87_locations(refnode);
  1141. hasref:=assigned(refnode);
  1142. case nodetype of
  1143. addn :
  1144. op:=ops_add[hasref];
  1145. muln :
  1146. op:=ops_mul[hasref];
  1147. subn :
  1148. if (nf_swapped in flags) then
  1149. op:=ops_rsub[hasref]
  1150. else
  1151. op:=ops_sub[hasref];
  1152. slashn :
  1153. if (nf_swapped in flags) then
  1154. op:=ops_rdiv[hasref]
  1155. else
  1156. op:=ops_div[hasref];
  1157. else
  1158. internalerror(2003042214);
  1159. end;
  1160. if hasref then
  1161. emit_ref(op,tcgsize2opsize[refnode.location.size],refnode.location.reference)
  1162. else
  1163. begin
  1164. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  1165. tcgx86(cg).dec_fpu_stack;
  1166. end;
  1167. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  1168. location.register:=NR_ST;
  1169. end;
  1170. procedure tx86addnode.second_cmpfloat;
  1171. {$ifdef i8086}
  1172. var
  1173. tmpref: treference;
  1174. {$endif i8086}
  1175. begin
  1176. if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
  1177. begin
  1178. second_cmpfloatvector;
  1179. exit;
  1180. end;
  1181. pass_left_right;
  1182. force_left_and_right_fpureg;
  1183. {$ifndef x86_64}
  1184. if current_settings.cputype<cpu_Pentium2 then
  1185. begin
  1186. emit_none(A_FCOMPP,S_NO);
  1187. tcgx86(cg).dec_fpu_stack;
  1188. tcgx86(cg).dec_fpu_stack;
  1189. { load fpu flags }
  1190. {$ifdef i8086}
  1191. if current_settings.cputype < cpu_286 then
  1192. begin
  1193. tg.gettemp(current_asmdata.CurrAsmList,2,2,tt_normal,tmpref);
  1194. emit_ref(A_FSTSW,S_NO,tmpref);
  1195. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1196. inc(tmpref.offset);
  1197. emit_ref_reg(A_MOV,S_B,tmpref,NR_AH);
  1198. dec(tmpref.offset);
  1199. emit_none(A_SAHF,S_NO);
  1200. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1201. tg.ungettemp(current_asmdata.CurrAsmList,tmpref);
  1202. end
  1203. else
  1204. {$endif i8086}
  1205. begin
  1206. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1207. emit_reg(A_FNSTSW,S_NO,NR_AX);
  1208. emit_none(A_SAHF,S_NO);
  1209. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1210. end;
  1211. end
  1212. else
  1213. {$endif x86_64}
  1214. begin
  1215. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  1216. { fcomip pops only one fpu register }
  1217. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  1218. tcgx86(cg).dec_fpu_stack;
  1219. tcgx86(cg).dec_fpu_stack;
  1220. end;
  1221. location_reset(location,LOC_FLAGS,OS_NO);
  1222. location.resflags:=getfpuresflags;
  1223. end;
  1224. {*****************************************************************************
  1225. Add64bit
  1226. *****************************************************************************}
  1227. procedure tx86addnode.second_add64bit;
  1228. begin
  1229. {$ifdef cpu64bitalu}
  1230. second_addordinal;
  1231. {$else cpu64bitalu}
  1232. { must be implemented separate }
  1233. internalerror(200402042);
  1234. {$endif cpu64bitalu}
  1235. end;
  1236. procedure tx86addnode.second_cmp64bit;
  1237. begin
  1238. {$ifdef cpu64bitalu}
  1239. second_cmpordinal;
  1240. {$else cpu64bitalu}
  1241. { must be implemented separate }
  1242. internalerror(200402043);
  1243. {$endif cpu64bitalu}
  1244. end;
  1245. {*****************************************************************************
  1246. AddOrdinal
  1247. *****************************************************************************}
  1248. procedure tx86addnode.second_cmpordinal;
  1249. var
  1250. opdef : tdef;
  1251. opsize : tcgsize;
  1252. unsigned : boolean;
  1253. begin
  1254. unsigned:=not(is_signed(left.resultdef)) or
  1255. not(is_signed(right.resultdef));
  1256. opdef:=left.resultdef;
  1257. opsize:=def_cgsize(opdef);
  1258. pass_left_right;
  1259. if (right.location.loc=LOC_CONSTANT) and
  1260. (left.location.loc in [LOC_REFERENCE, LOC_CREFERENCE])
  1261. {$ifdef x86_64}
  1262. and ((not (opsize in [OS_64,OS_S64])) or (
  1263. (right.location.value>=low(longint)) and (right.location.value<=high(longint))
  1264. ))
  1265. {$endif x86_64}
  1266. then
  1267. begin
  1268. emit_const_ref(A_CMP, TCGSize2Opsize[opsize], right.location.value, left.location.reference);
  1269. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1270. end
  1271. else
  1272. begin
  1273. left_must_be_reg(opdef,opsize,false);
  1274. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  1275. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1276. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1277. end;
  1278. location_reset(location,LOC_FLAGS,OS_NO);
  1279. location.resflags:=getresflags(unsigned);
  1280. end;
  1281. begin
  1282. caddnode:=tx86addnode;
  1283. end.