nx86add.pas 53 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. symtype,
  22. cgbase,
  23. cpubase,
  24. node,nadd,ncgadd;
  25. type
  26. tx86addnode = class(tcgaddnode)
  27. protected
  28. function getresflags(unsigned : boolean) : tresflags;
  29. function getfpuresflags : tresflags;
  30. procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  31. procedure force_left_and_right_fpureg;
  32. procedure prepare_x87_locations(out refnode: tnode);
  33. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  34. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  35. procedure second_cmpfloatvector;
  36. procedure second_addfloatsse;
  37. procedure second_addfloatavx;
  38. public
  39. procedure second_addfloat;override;
  40. {$ifndef i8086}
  41. procedure second_addsmallset;override;
  42. {$endif not i8086}
  43. procedure second_add64bit;override;
  44. procedure second_cmpfloat;override;
  45. procedure second_cmpsmallset;override;
  46. procedure second_cmp64bit;override;
  47. procedure second_cmpordinal;override;
  48. {$ifdef SUPPORT_MMX}
  49. procedure second_opmmx;override;
  50. {$endif SUPPORT_MMX}
  51. procedure second_opvector;override;
  52. end;
  53. implementation
  54. uses
  55. globtype,globals,systems,
  56. verbose,cutils,
  57. cpuinfo,
  58. aasmbase,aasmtai,aasmdata,aasmcpu,
  59. symconst,symdef,
  60. cgobj,hlcgobj,cgx86,cga,cgutils,
  61. paramgr,tgobj,ncgutil,
  62. ncon,nset,ninl,
  63. defutil;
  64. {*****************************************************************************
  65. Helpers
  66. *****************************************************************************}
  67. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  68. var
  69. power : longint;
  70. hl4 : tasmlabel;
  71. r : Tregister;
  72. href : treference;
  73. begin
  74. { at this point, left.location.loc should be LOC_REGISTER }
  75. if right.location.loc=LOC_REGISTER then
  76. begin
  77. { right.location is a LOC_REGISTER }
  78. { when swapped another result register }
  79. if (nodetype=subn) and (nf_swapped in flags) then
  80. begin
  81. if extra_not then
  82. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  83. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  84. { newly swapped also set swapped flag }
  85. location_swap(left.location,right.location);
  86. toggleflag(nf_swapped);
  87. end
  88. else
  89. begin
  90. if extra_not then
  91. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  92. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  93. location_swap(left.location,right.location);
  94. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  95. end;
  96. end
  97. else
  98. begin
  99. { right.location is not a LOC_REGISTER }
  100. if (nodetype=subn) and (nf_swapped in flags) then
  101. begin
  102. if extra_not then
  103. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  104. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  105. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  106. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  107. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  108. end
  109. else
  110. begin
  111. { Optimizations when right.location is a constant value }
  112. if (op=A_CMP) and
  113. (nodetype in [equaln,unequaln]) and
  114. (right.location.loc=LOC_CONSTANT) and
  115. (right.location.value=0) then
  116. begin
  117. { 'test $-1,%reg' is transformable into 'test $-1,spilltemp' if %reg needs
  118. spilling, while 'test %reg,%reg' still requires loading into register.
  119. If spilling is not necessary, it is changed back into 'test %reg,%reg' by
  120. peephole optimizer (this optimization is currently available only for i386). }
  121. if (target_info.cpu=cpu_i386) then
  122. emit_const_reg(A_TEST,TCGSize2Opsize[opsize],aint(-1),left.location.register)
  123. else
  124. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  125. end
  126. else
  127. if (op=A_ADD) and
  128. (right.location.loc=LOC_CONSTANT) and
  129. (right.location.value=1) and
  130. not(cs_check_overflow in current_settings.localswitches) then
  131. begin
  132. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  133. end
  134. else
  135. if (op=A_SUB) and
  136. (right.location.loc=LOC_CONSTANT) and
  137. (right.location.value=1) and
  138. not(cs_check_overflow in current_settings.localswitches) and
  139. UseIncDec then
  140. begin
  141. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  142. end
  143. else
  144. if (op=A_IMUL) and
  145. (right.location.loc=LOC_CONSTANT) and
  146. (ispowerof2(int64(right.location.value),power)) and
  147. not(cs_check_overflow in current_settings.localswitches) then
  148. begin
  149. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  150. end
  151. else if (op=A_IMUL) and
  152. (right.location.loc=LOC_CONSTANT) and
  153. (right.location.value>1) and (ispowerof2(int64(right.location.value)-1,power)) and
  154. (power in [1..3]) and
  155. not(cs_check_overflow in current_settings.localswitches) then
  156. begin
  157. reference_reset_base(href,left.location.register,0,0);
  158. href.index:=left.location.register;
  159. href.scalefactor:=int64(right.location.value)-1;
  160. left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  161. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_LEA,TCgSize2OpSize[opsize],href,left.location.register));
  162. end
  163. else
  164. begin
  165. if extra_not then
  166. begin
  167. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  168. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  169. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  170. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  171. end
  172. else
  173. begin
  174. emit_op_right_left(op,opsize);
  175. end;
  176. end;
  177. end;
  178. end;
  179. { only in case of overflow operations }
  180. { produce overflow code }
  181. { we must put it here directly, because sign of operation }
  182. { is in unsigned VAR!! }
  183. if mboverflow then
  184. begin
  185. if cs_check_overflow in current_settings.localswitches then
  186. begin
  187. current_asmdata.getjumplabel(hl4);
  188. if unsigned then
  189. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  190. else
  191. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  192. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  193. cg.a_label(current_asmdata.CurrAsmList,hl4);
  194. end;
  195. end;
  196. end;
  197. procedure tx86addnode.left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  198. begin
  199. { left location is not a register? }
  200. if (left.location.loc<>LOC_REGISTER) then
  201. begin
  202. { if right is register then we can swap the locations }
  203. if (not noswap) and
  204. (right.location.loc=LOC_REGISTER) then
  205. begin
  206. location_swap(left.location,right.location);
  207. toggleflag(nf_swapped);
  208. end
  209. else
  210. begin
  211. { maybe we can reuse a constant register when the
  212. operation is a comparison that doesn't change the
  213. value of the register }
  214. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  215. end;
  216. end;
  217. if (right.location.loc<>LOC_CONSTANT) and
  218. (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then
  219. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  220. if (left.location.loc<>LOC_CONSTANT) and
  221. (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then
  222. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  223. end;
  224. procedure tx86addnode.force_left_and_right_fpureg;
  225. begin
  226. if (right.location.loc<>LOC_FPUREGISTER) then
  227. begin
  228. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  229. if (left.location.loc<>LOC_FPUREGISTER) then
  230. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  231. else
  232. { left was on the stack => swap }
  233. toggleflag(nf_swapped);
  234. end
  235. { the nominator in st0 }
  236. else if (left.location.loc<>LOC_FPUREGISTER) then
  237. begin
  238. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  239. end
  240. else
  241. begin
  242. { fpu operands are always in the wrong order on the stack }
  243. toggleflag(nf_swapped);
  244. end;
  245. end;
  246. { Makes sides suitable for executing an x87 instruction:
  247. if either side is OS_F32/OS_F64-sized LOC_REFERENCE, it is returned in 'refnode'
  248. everything else is loaded to FPU stack. }
  249. procedure tx86addnode.prepare_x87_locations(out refnode: tnode);
  250. begin
  251. refnode:=nil;
  252. case ord(left.location.loc=LOC_FPUREGISTER)+ord(right.location.loc=LOC_FPUREGISTER) of
  253. 0:
  254. begin
  255. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  256. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  257. InternalError(2013090803);
  258. if (left.location.size in [OS_F32,OS_F64]) then
  259. begin
  260. refnode:=left;
  261. toggleflag(nf_swapped);
  262. end
  263. else
  264. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  265. end;
  266. 1:
  267. begin { if left is on the stack then swap. }
  268. if (left.location.loc=LOC_FPUREGISTER) then
  269. refnode:=right
  270. else
  271. refnode:=left;
  272. if not(refnode.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  273. InternalError(2013090801);
  274. if not (refnode.location.size in [OS_F32,OS_F64]) then
  275. begin
  276. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,refnode.location,refnode.resultdef,false);
  277. if (refnode=right) then
  278. toggleflag(nf_swapped);
  279. refnode:=nil;
  280. end
  281. else
  282. begin
  283. if (refnode=left) then
  284. toggleflag(nf_swapped);
  285. end;
  286. end;
  287. 2: { fpu operands are always in the wrong order on the stack }
  288. toggleflag(nf_swapped);
  289. else
  290. InternalError(2013090802);
  291. end;
  292. end;
  293. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  294. {$ifdef x86_64}
  295. var
  296. tmpreg : tregister;
  297. {$endif x86_64}
  298. begin
  299. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  300. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  301. { left must be a register }
  302. case right.location.loc of
  303. LOC_REGISTER,
  304. LOC_CREGISTER :
  305. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  306. LOC_REFERENCE,
  307. LOC_CREFERENCE :
  308. begin
  309. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  310. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  311. end;
  312. LOC_CONSTANT :
  313. begin
  314. {$ifdef x86_64}
  315. { x86_64 only supports signed 32 bits constants directly }
  316. if (opsize in [OS_S64,OS_64]) and
  317. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  318. begin
  319. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  320. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  321. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  322. end
  323. else
  324. {$endif x86_64}
  325. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  326. end;
  327. else
  328. internalerror(200203232);
  329. end;
  330. end;
  331. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  332. begin
  333. case nodetype of
  334. equaln : getresflags:=F_E;
  335. unequaln : getresflags:=F_NE;
  336. else
  337. if not(unsigned) then
  338. begin
  339. if nf_swapped in flags then
  340. case nodetype of
  341. ltn : getresflags:=F_G;
  342. lten : getresflags:=F_GE;
  343. gtn : getresflags:=F_L;
  344. gten : getresflags:=F_LE;
  345. else
  346. internalerror(2013120105);
  347. end
  348. else
  349. case nodetype of
  350. ltn : getresflags:=F_L;
  351. lten : getresflags:=F_LE;
  352. gtn : getresflags:=F_G;
  353. gten : getresflags:=F_GE;
  354. else
  355. internalerror(2013120106);
  356. end;
  357. end
  358. else
  359. begin
  360. if nf_swapped in flags then
  361. case nodetype of
  362. ltn : getresflags:=F_A;
  363. lten : getresflags:=F_AE;
  364. gtn : getresflags:=F_B;
  365. gten : getresflags:=F_BE;
  366. else
  367. internalerror(2013120107);
  368. end
  369. else
  370. case nodetype of
  371. ltn : getresflags:=F_B;
  372. lten : getresflags:=F_BE;
  373. gtn : getresflags:=F_A;
  374. gten : getresflags:=F_AE;
  375. else
  376. internalerror(2013120108);
  377. end;
  378. end;
  379. end;
  380. end;
  381. function tx86addnode.getfpuresflags : tresflags;
  382. begin
  383. if (nodetype=equaln) then
  384. result:=F_FE
  385. else if (nodetype=unequaln) then
  386. result:=F_FNE
  387. else if (nf_swapped in flags) then
  388. case nodetype of
  389. ltn : result:=F_FA;
  390. lten : result:=F_FAE;
  391. gtn : result:=F_FB;
  392. gten : result:=F_FBE;
  393. else
  394. internalerror(2014031402);
  395. end
  396. else
  397. case nodetype of
  398. ltn : result:=F_FB;
  399. lten : result:=F_FBE;
  400. gtn : result:=F_FA;
  401. gten : result:=F_FAE;
  402. else
  403. internalerror(2014031403);
  404. end;
  405. end;
  406. {*****************************************************************************
  407. AddSmallSet
  408. *****************************************************************************}
  409. {$ifndef i8086}
  410. procedure tx86addnode.second_addsmallset;
  411. var
  412. setbase : aint;
  413. opdef : tdef;
  414. opsize : TCGSize;
  415. op : TAsmOp;
  416. extra_not,
  417. noswap : boolean;
  418. all_member_optimization:boolean;
  419. begin
  420. pass_left_right;
  421. noswap:=false;
  422. extra_not:=false;
  423. all_member_optimization:=false;
  424. opdef:=resultdef;
  425. opsize:=int_cgsize(opdef.size);
  426. if (left.resultdef.typ=setdef) then
  427. setbase:=tsetdef(left.resultdef).setbase
  428. else
  429. setbase:=tsetdef(right.resultdef).setbase;
  430. case nodetype of
  431. addn :
  432. begin
  433. { adding elements is not commutative }
  434. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  435. swapleftright;
  436. { are we adding set elements ? }
  437. if right.nodetype=setelementn then
  438. begin
  439. { no range support for smallsets! }
  440. if assigned(tsetelementnode(right).right) then
  441. internalerror(43244);
  442. { btsb isn't supported }
  443. if opsize=OS_8 then
  444. begin
  445. opsize:=OS_32;
  446. opdef:=u32inttype;
  447. end;
  448. { bts requires both elements to be registers }
  449. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  450. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  451. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,right.location,setbase);
  452. op:=A_BTS;
  453. noswap:=true;
  454. end
  455. else
  456. op:=A_OR;
  457. end;
  458. symdifn :
  459. op:=A_XOR;
  460. muln :
  461. op:=A_AND;
  462. subn :
  463. begin
  464. op:=A_AND;
  465. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  466. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  467. all_member_optimization:=true;
  468. if (not(nf_swapped in flags)) and
  469. (right.location.loc=LOC_CONSTANT) then
  470. right.location.value := not(right.location.value)
  471. else if (nf_swapped in flags) and
  472. (left.location.loc=LOC_CONSTANT) then
  473. left.location.value := not(left.location.value)
  474. else
  475. extra_not:=true;
  476. end;
  477. xorn :
  478. op:=A_XOR;
  479. orn :
  480. op:=A_OR;
  481. andn :
  482. op:=A_AND;
  483. else
  484. internalerror(2003042215);
  485. end;
  486. if all_member_optimization then
  487. begin
  488. {A set expression [0..31]-x can be implemented with a simple NOT.}
  489. if nf_swapped in flags then
  490. begin
  491. { newly swapped also set swapped flag }
  492. location_swap(left.location,right.location);
  493. toggleflag(nf_swapped);
  494. end;
  495. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,false);
  496. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  497. location:=right.location;
  498. end
  499. else
  500. begin
  501. { left must be a register }
  502. left_must_be_reg(opdef,opsize,noswap);
  503. emit_generic_code(op,opsize,true,extra_not,false);
  504. location_freetemp(current_asmdata.CurrAsmList,right.location);
  505. { left is always a register and contains the result }
  506. location:=left.location;
  507. end;
  508. { fix the changed opsize we did above because of the missing btsb }
  509. if opsize<>int_cgsize(resultdef.size) then
  510. hlcg.location_force_reg(current_asmdata.CurrAsmList,location,opdef,cgsize_orddef(int_cgsize(resultdef.size)),false);
  511. end;
  512. {$endif not i8086}
  513. procedure tx86addnode.second_cmpsmallset;
  514. var
  515. opdef : tdef;
  516. opsize : TCGSize;
  517. op : TAsmOp;
  518. begin
  519. pass_left_right;
  520. opdef:=left.resultdef;
  521. opsize:=int_cgsize(opdef.size);
  522. case nodetype of
  523. equaln,
  524. unequaln :
  525. op:=A_CMP;
  526. lten,gten:
  527. begin
  528. if (not(nf_swapped in flags) and (nodetype = lten)) or
  529. ((nf_swapped in flags) and (nodetype = gten)) then
  530. swapleftright;
  531. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  532. emit_op_right_left(A_AND,opsize);
  533. op:=A_CMP;
  534. { warning: ugly hack, we need a JE so change the node to equaln }
  535. nodetype:=equaln;
  536. end;
  537. else
  538. internalerror(2003042215);
  539. end;
  540. { left must be a register }
  541. left_must_be_reg(opdef,opsize,false);
  542. emit_generic_code(op,opsize,true,false,false);
  543. location_freetemp(current_asmdata.CurrAsmList,right.location);
  544. location_freetemp(current_asmdata.CurrAsmList,left.location);
  545. location_reset(location,LOC_FLAGS,OS_NO);
  546. location.resflags:=getresflags(true);
  547. end;
  548. {*****************************************************************************
  549. AddMMX
  550. *****************************************************************************}
  551. {$ifdef SUPPORT_MMX}
  552. procedure tx86addnode.second_opmmx;
  553. var
  554. op : TAsmOp;
  555. cmpop : boolean;
  556. mmxbase : tmmxtype;
  557. hreg,
  558. hregister : tregister;
  559. begin
  560. pass_left_right;
  561. cmpop:=false;
  562. mmxbase:=mmx_type(left.resultdef);
  563. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  564. case nodetype of
  565. addn :
  566. begin
  567. if (cs_mmx_saturation in current_settings.localswitches) then
  568. begin
  569. case mmxbase of
  570. mmxs8bit:
  571. op:=A_PADDSB;
  572. mmxu8bit:
  573. op:=A_PADDUSB;
  574. mmxs16bit,mmxfixed16:
  575. op:=A_PADDSW;
  576. mmxu16bit:
  577. op:=A_PADDUSW;
  578. end;
  579. end
  580. else
  581. begin
  582. case mmxbase of
  583. mmxs8bit,mmxu8bit:
  584. op:=A_PADDB;
  585. mmxs16bit,mmxu16bit,mmxfixed16:
  586. op:=A_PADDW;
  587. mmxs32bit,mmxu32bit:
  588. op:=A_PADDD;
  589. end;
  590. end;
  591. end;
  592. muln :
  593. begin
  594. case mmxbase of
  595. mmxs16bit,mmxu16bit:
  596. op:=A_PMULLW;
  597. mmxfixed16:
  598. op:=A_PMULHW;
  599. end;
  600. end;
  601. subn :
  602. begin
  603. if (cs_mmx_saturation in current_settings.localswitches) then
  604. begin
  605. case mmxbase of
  606. mmxs8bit:
  607. op:=A_PSUBSB;
  608. mmxu8bit:
  609. op:=A_PSUBUSB;
  610. mmxs16bit,mmxfixed16:
  611. op:=A_PSUBSB;
  612. mmxu16bit:
  613. op:=A_PSUBUSW;
  614. end;
  615. end
  616. else
  617. begin
  618. case mmxbase of
  619. mmxs8bit,mmxu8bit:
  620. op:=A_PSUBB;
  621. mmxs16bit,mmxu16bit,mmxfixed16:
  622. op:=A_PSUBW;
  623. mmxs32bit,mmxu32bit:
  624. op:=A_PSUBD;
  625. end;
  626. end;
  627. end;
  628. xorn:
  629. op:=A_PXOR;
  630. orn:
  631. op:=A_POR;
  632. andn:
  633. op:=A_PAND;
  634. else
  635. internalerror(2003042214);
  636. end;
  637. { left and right no register? }
  638. { then one must be demanded }
  639. if (left.location.loc<>LOC_MMXREGISTER) then
  640. begin
  641. if (right.location.loc=LOC_MMXREGISTER) then
  642. begin
  643. location_swap(left.location,right.location);
  644. toggleflag(nf_swapped);
  645. end
  646. else
  647. begin
  648. { register variable ? }
  649. if (left.location.loc=LOC_CMMXREGISTER) then
  650. begin
  651. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  652. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  653. end
  654. else
  655. begin
  656. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  657. internalerror(200203245);
  658. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  659. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  660. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  661. end;
  662. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  663. left.location.register:=hregister;
  664. end;
  665. end;
  666. { at this point, left.location.loc should be LOC_MMXREGISTER }
  667. if right.location.loc<>LOC_MMXREGISTER then
  668. begin
  669. if (nodetype=subn) and (nf_swapped in flags) then
  670. begin
  671. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  672. if right.location.loc=LOC_CMMXREGISTER then
  673. begin
  674. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  675. emit_reg_reg(op,S_NO,left.location.register,hreg);
  676. end
  677. else
  678. begin
  679. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  680. internalerror(200203247);
  681. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  682. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  683. emit_reg_reg(op,S_NO,left.location.register,hreg);
  684. end;
  685. location.register:=hreg;
  686. end
  687. else
  688. begin
  689. if (right.location.loc=LOC_CMMXREGISTER) then
  690. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  691. else
  692. begin
  693. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  694. internalerror(200203246);
  695. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  696. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  697. end;
  698. location.register:=left.location.register;
  699. end;
  700. end
  701. else
  702. begin
  703. { right.location=LOC_MMXREGISTER }
  704. if (nodetype=subn) and (nf_swapped in flags) then
  705. begin
  706. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  707. location_swap(left.location,right.location);
  708. toggleflag(nf_swapped);
  709. end
  710. else
  711. begin
  712. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  713. end;
  714. location.register:=left.location.register;
  715. end;
  716. location_freetemp(current_asmdata.CurrAsmList,right.location);
  717. if cmpop then
  718. location_freetemp(current_asmdata.CurrAsmList,left.location);
  719. end;
  720. {$endif SUPPORT_MMX}
  721. {*****************************************************************************
  722. AddFloat
  723. *****************************************************************************}
  724. procedure tx86addnode.second_addfloatsse;
  725. var
  726. op : topcg;
  727. sqr_sum : boolean;
  728. tmp : tnode;
  729. begin
  730. sqr_sum:=false;
  731. if (current_settings.fputype>=fpu_sse3) and
  732. use_vectorfpu(resultdef) and
  733. (nodetype in [addn,subn]) and
  734. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  735. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  736. begin
  737. sqr_sum:=true;
  738. tmp:=tinlinenode(left).left;
  739. tinlinenode(left).left:=nil;
  740. left.free;
  741. left:=tmp;
  742. tmp:=tinlinenode(right).left;
  743. tinlinenode(right).left:=nil;
  744. right.free;
  745. right:=tmp;
  746. end;
  747. pass_left_right;
  748. { fpu operands are always in reversed order on the stack }
  749. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  750. toggleflag(nf_swapped);
  751. if (nf_swapped in flags) then
  752. { can't use swapleftright if both are on the fpu stack, since then }
  753. { both are "R_ST" -> nothing would change -> manually switch }
  754. if (left.location.loc = LOC_FPUREGISTER) and
  755. (right.location.loc = LOC_FPUREGISTER) then
  756. emit_none(A_FXCH,S_NO)
  757. else
  758. swapleftright;
  759. case nodetype of
  760. addn :
  761. op:=OP_ADD;
  762. muln :
  763. op:=OP_MUL;
  764. subn :
  765. op:=OP_SUB;
  766. slashn :
  767. op:=OP_DIV;
  768. else
  769. internalerror(200312231);
  770. end;
  771. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  772. if sqr_sum then
  773. begin
  774. if nf_swapped in flags then
  775. swapleftright;
  776. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  777. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  778. location:=left.location;
  779. if is_double(resultdef) then
  780. begin
  781. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  782. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  783. case nodetype of
  784. addn:
  785. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  786. subn:
  787. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  788. else
  789. internalerror(201108162);
  790. end;
  791. end
  792. else
  793. begin
  794. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  795. { ensure that bits 64..127 contain valid values }
  796. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  797. { the data is now in bits 0..32 and 64..95 }
  798. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  799. case nodetype of
  800. addn:
  801. begin
  802. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  803. end;
  804. subn:
  805. begin
  806. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  807. end;
  808. else
  809. internalerror(201108163);
  810. end;
  811. end
  812. end
  813. { we can use only right as left operand if the operation is commutative }
  814. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  815. begin
  816. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  817. cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.location.size,location.size,right.location.register,location.register,mms_movescalar);
  818. { force floating point reg. location to be written to memory,
  819. we don't force it to mm register because writing to memory
  820. allows probably shorter code because there is no direct fpu->mm register
  821. copy instruction
  822. }
  823. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  824. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  825. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  826. end
  827. else
  828. begin
  829. if nf_swapped in flags then
  830. swapleftright;
  831. { force floating point reg. location to be written to memory,
  832. we don't force it to mm register because writing to memory
  833. allows probably shorter code because there is no direct fpu->mm register
  834. copy instruction
  835. }
  836. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  837. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  838. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  839. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  840. { force floating point reg. location to be written to memory,
  841. we don't force it to mm register because writing to memory
  842. allows probably shorter code because there is no direct fpu->mm register
  843. copy instruction
  844. }
  845. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  846. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  847. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  848. end;
  849. end;
  850. procedure tx86addnode.second_addfloatavx;
  851. var
  852. op : topcg;
  853. sqr_sum : boolean;
  854. tmp : tnode;
  855. begin
  856. sqr_sum:=false;
  857. {$ifdef dummy}
  858. if (current_settings.fputype>=fpu_sse3) and
  859. use_vectorfpu(resultdef) and
  860. (nodetype in [addn,subn]) and
  861. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  862. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  863. begin
  864. sqr_sum:=true;
  865. tmp:=tinlinenode(left).left;
  866. tinlinenode(left).left:=nil;
  867. left.free;
  868. left:=tmp;
  869. tmp:=tinlinenode(right).left;
  870. tinlinenode(right).left:=nil;
  871. right.free;
  872. right:=tmp;
  873. end;
  874. {$endif dummy}
  875. pass_left_right;
  876. { fpu operands are always in reversed order on the stack }
  877. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  878. toggleflag(nf_swapped);
  879. if (nf_swapped in flags) then
  880. { can't use swapleftright if both are on the fpu stack, since then }
  881. { both are "R_ST" -> nothing would change -> manually switch }
  882. if (left.location.loc = LOC_FPUREGISTER) and
  883. (right.location.loc = LOC_FPUREGISTER) then
  884. emit_none(A_FXCH,S_NO)
  885. else
  886. swapleftright;
  887. case nodetype of
  888. addn :
  889. op:=OP_ADD;
  890. muln :
  891. op:=OP_MUL;
  892. subn :
  893. op:=OP_SUB;
  894. slashn :
  895. op:=OP_DIV;
  896. else
  897. internalerror(200312231);
  898. end;
  899. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  900. if sqr_sum then
  901. begin
  902. if nf_swapped in flags then
  903. swapleftright;
  904. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  905. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  906. location:=left.location;
  907. if is_double(resultdef) then
  908. begin
  909. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  910. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  911. case nodetype of
  912. addn:
  913. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  914. subn:
  915. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  916. else
  917. internalerror(201108162);
  918. end;
  919. end
  920. else
  921. begin
  922. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  923. { ensure that bits 64..127 contain valid values }
  924. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  925. { the data is now in bits 0..32 and 64..95 }
  926. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  927. case nodetype of
  928. addn:
  929. begin
  930. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  931. end;
  932. subn:
  933. begin
  934. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  935. end;
  936. else
  937. internalerror(201108163);
  938. end;
  939. end
  940. end
  941. { left*2 ? }
  942. else if (nodetype=muln) and is_constrealnode(right) and is_number_float(trealconstnode(right).value_real) and (trealconstnode(right).value_real=2) then
  943. begin
  944. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  945. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  946. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  947. left.location.register,
  948. left.location.register,
  949. location.register,
  950. mms_movescalar);
  951. end
  952. { right*2 ? }
  953. else if (nodetype=muln) and is_constrealnode(left) and is_number_float(trealconstnode(left).value_real) and (trealconstnode(left).value_real=2) then
  954. begin
  955. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,right.location.size);
  956. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  957. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  958. right.location.register,
  959. right.location.register,
  960. location.register,
  961. mms_movescalar);
  962. end
  963. { we can use only right as left operand if the operation is commutative }
  964. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  965. begin
  966. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  967. { force floating point reg. location to be written to memory,
  968. we don't force it to mm register because writing to memory
  969. allows probably shorter code because there is no direct fpu->mm register
  970. copy instruction
  971. }
  972. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  973. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  974. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  975. left.location,
  976. right.location.register,
  977. location.register,
  978. mms_movescalar);
  979. end
  980. else
  981. begin
  982. if (nf_swapped in flags) then
  983. swapleftright;
  984. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  985. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  986. { force floating point reg. location to be written to memory,
  987. we don't force it to mm register because writing to memory
  988. allows probably shorter code because there is no direct fpu->mm register
  989. copy instruction
  990. }
  991. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  992. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  993. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  994. right.location,
  995. left.location.register,
  996. location.register,
  997. mms_movescalar);
  998. end;
  999. end;
  1000. procedure tx86addnode.second_cmpfloatvector;
  1001. var
  1002. op : tasmop;
  1003. const
  1004. ops_single: array[boolean] of tasmop = (A_COMISS,A_VCOMISS);
  1005. ops_double: array[boolean] of tasmop = (A_COMISD,A_VCOMISD);
  1006. begin
  1007. if is_single(left.resultdef) then
  1008. op:=ops_single[UseAVX]
  1009. else if is_double(left.resultdef) then
  1010. op:=ops_double[UseAVX]
  1011. else
  1012. internalerror(200402222);
  1013. pass_left_right;
  1014. location_reset(location,LOC_FLAGS,OS_NO);
  1015. { Direct move fpu->mm register is not possible, so force any fpu operands to
  1016. memory (not to mm registers because one of the memory locations can be used
  1017. directly in compare instruction, yielding shorter code) }
  1018. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1019. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  1020. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1021. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1022. if (right.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1023. begin
  1024. case left.location.loc of
  1025. LOC_REFERENCE,LOC_CREFERENCE:
  1026. begin
  1027. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  1028. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  1029. end;
  1030. LOC_MMREGISTER,LOC_CMMREGISTER:
  1031. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  1032. else
  1033. internalerror(200402221);
  1034. end;
  1035. toggleflag(nf_swapped);
  1036. end
  1037. else
  1038. begin
  1039. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1040. case right.location.loc of
  1041. LOC_REFERENCE,LOC_CREFERENCE:
  1042. begin
  1043. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  1044. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  1045. end;
  1046. LOC_MMREGISTER,LOC_CMMREGISTER:
  1047. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  1048. else
  1049. internalerror(200402223);
  1050. end;
  1051. end;
  1052. location.resflags:=getfpuresflags;
  1053. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1054. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1055. end;
  1056. procedure tx86addnode.second_opvector;
  1057. var
  1058. op : topcg;
  1059. begin
  1060. pass_left_right;
  1061. if (nf_swapped in flags) then
  1062. swapleftright;
  1063. case nodetype of
  1064. addn :
  1065. op:=OP_ADD;
  1066. muln :
  1067. op:=OP_MUL;
  1068. subn :
  1069. op:=OP_SUB;
  1070. slashn :
  1071. op:=OP_DIV;
  1072. else
  1073. internalerror(200610071);
  1074. end;
  1075. if fits_in_mm_register(left.resultdef) then
  1076. begin
  1077. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  1078. { we can use only right as left operand if the operation is commutative }
  1079. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  1080. begin
  1081. location.register:=right.location.register;
  1082. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  1083. end
  1084. else
  1085. begin
  1086. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  1087. location.register:=left.location.register;
  1088. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  1089. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  1090. end;
  1091. end
  1092. else
  1093. begin
  1094. { not yet supported }
  1095. internalerror(200610072);
  1096. end
  1097. end;
  1098. procedure tx86addnode.second_addfloat;
  1099. const
  1100. ops_add: array[boolean] of TAsmOp = (A_FADDP,A_FADD);
  1101. ops_mul: array[boolean] of TAsmOp = (A_FMULP,A_FMUL);
  1102. ops_sub: array[boolean] of TAsmOp = (A_FSUBP,A_FSUB);
  1103. ops_rsub: array[boolean] of TAsmOp = (A_FSUBRP,A_FSUBR);
  1104. ops_div: array[boolean] of TAsmOp = (A_FDIVP,A_FDIV);
  1105. ops_rdiv: array[boolean] of TAsmOp = (A_FDIVRP,A_FDIVR);
  1106. var
  1107. op : TAsmOp;
  1108. refnode : tnode;
  1109. hasref : boolean;
  1110. begin
  1111. if use_vectorfpu(resultdef) then
  1112. begin
  1113. if UseAVX then
  1114. second_addfloatavx
  1115. else
  1116. second_addfloatsse;
  1117. exit;
  1118. end;
  1119. pass_left_right;
  1120. prepare_x87_locations(refnode);
  1121. hasref:=assigned(refnode);
  1122. case nodetype of
  1123. addn :
  1124. op:=ops_add[hasref];
  1125. muln :
  1126. op:=ops_mul[hasref];
  1127. subn :
  1128. if (nf_swapped in flags) then
  1129. op:=ops_rsub[hasref]
  1130. else
  1131. op:=ops_sub[hasref];
  1132. slashn :
  1133. if (nf_swapped in flags) then
  1134. op:=ops_rdiv[hasref]
  1135. else
  1136. op:=ops_div[hasref];
  1137. else
  1138. internalerror(2003042214);
  1139. end;
  1140. if hasref then
  1141. emit_ref(op,tcgsize2opsize[refnode.location.size],refnode.location.reference)
  1142. else
  1143. begin
  1144. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  1145. tcgx86(cg).dec_fpu_stack;
  1146. end;
  1147. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  1148. location.register:=NR_ST;
  1149. end;
  1150. procedure tx86addnode.second_cmpfloat;
  1151. {$ifdef i8086}
  1152. var
  1153. tmpref: treference;
  1154. {$endif i8086}
  1155. begin
  1156. if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
  1157. begin
  1158. second_cmpfloatvector;
  1159. exit;
  1160. end;
  1161. pass_left_right;
  1162. force_left_and_right_fpureg;
  1163. {$ifndef x86_64}
  1164. if current_settings.cputype<cpu_Pentium2 then
  1165. begin
  1166. emit_none(A_FCOMPP,S_NO);
  1167. tcgx86(cg).dec_fpu_stack;
  1168. tcgx86(cg).dec_fpu_stack;
  1169. { load fpu flags }
  1170. {$ifdef i8086}
  1171. if current_settings.cputype < cpu_286 then
  1172. begin
  1173. tg.gettemp(current_asmdata.CurrAsmList,2,2,tt_normal,tmpref);
  1174. emit_ref(A_FSTSW,S_NO,tmpref);
  1175. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1176. inc(tmpref.offset);
  1177. emit_ref_reg(A_MOV,S_B,tmpref,NR_AH);
  1178. dec(tmpref.offset);
  1179. emit_none(A_SAHF,S_NO);
  1180. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1181. tg.ungettemp(current_asmdata.CurrAsmList,tmpref);
  1182. end
  1183. else
  1184. {$endif i8086}
  1185. begin
  1186. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1187. emit_reg(A_FNSTSW,S_NO,NR_AX);
  1188. emit_none(A_SAHF,S_NO);
  1189. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1190. end;
  1191. end
  1192. else
  1193. {$endif x86_64}
  1194. begin
  1195. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  1196. { fcomip pops only one fpu register }
  1197. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  1198. tcgx86(cg).dec_fpu_stack;
  1199. tcgx86(cg).dec_fpu_stack;
  1200. end;
  1201. location_reset(location,LOC_FLAGS,OS_NO);
  1202. location.resflags:=getfpuresflags;
  1203. end;
  1204. {*****************************************************************************
  1205. Add64bit
  1206. *****************************************************************************}
  1207. procedure tx86addnode.second_add64bit;
  1208. begin
  1209. {$ifdef cpu64bitalu}
  1210. second_addordinal;
  1211. {$else cpu64bitalu}
  1212. { must be implemented separate }
  1213. internalerror(200402042);
  1214. {$endif cpu64bitalu}
  1215. end;
  1216. procedure tx86addnode.second_cmp64bit;
  1217. begin
  1218. {$ifdef cpu64bitalu}
  1219. second_cmpordinal;
  1220. {$else cpu64bitalu}
  1221. { must be implemented separate }
  1222. internalerror(200402043);
  1223. {$endif cpu64bitalu}
  1224. end;
  1225. {*****************************************************************************
  1226. AddOrdinal
  1227. *****************************************************************************}
  1228. procedure tx86addnode.second_cmpordinal;
  1229. var
  1230. opdef : tdef;
  1231. opsize : tcgsize;
  1232. unsigned : boolean;
  1233. begin
  1234. unsigned:=not(is_signed(left.resultdef)) or
  1235. not(is_signed(right.resultdef));
  1236. opdef:=left.resultdef;
  1237. opsize:=def_cgsize(opdef);
  1238. pass_left_right;
  1239. if (right.location.loc=LOC_CONSTANT) and
  1240. (left.location.loc in [LOC_REFERENCE, LOC_CREFERENCE])
  1241. {$ifdef x86_64}
  1242. and ((not (opsize in [OS_64,OS_S64])) or (
  1243. (right.location.value>=low(longint)) and (right.location.value<=high(longint))
  1244. ))
  1245. {$endif x86_64}
  1246. then
  1247. begin
  1248. emit_const_ref(A_CMP, TCGSize2Opsize[opsize], right.location.value, left.location.reference);
  1249. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1250. end
  1251. else
  1252. begin
  1253. left_must_be_reg(opdef,opsize,false);
  1254. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  1255. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1256. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1257. end;
  1258. location_reset(location,LOC_FLAGS,OS_NO);
  1259. location.resflags:=getresflags(unsigned);
  1260. end;
  1261. begin
  1262. caddnode:=tx86addnode;
  1263. end.