nx86add.pas 63 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. symtype,
  22. cgbase,
  23. cpubase,
  24. node,nadd,ncgadd;
  25. type
  26. tx86addnode = class(tcgaddnode)
  27. protected
  28. function getresflags(unsigned : boolean) : tresflags;
  29. function getfpuresflags : tresflags;
  30. procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  31. procedure force_left_and_right_fpureg;
  32. procedure prepare_x87_locations(out refnode: tnode);
  33. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  34. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  35. procedure second_cmpfloatvector;
  36. procedure second_addfloatsse;
  37. procedure second_addfloatavx;
  38. public
  39. function use_fma : boolean;override;
  40. procedure second_addfloat;override;
  41. {$ifndef i8086}
  42. procedure second_addsmallset;override;
  43. {$endif not i8086}
  44. procedure second_add64bit;override;
  45. procedure second_cmpfloat;override;
  46. procedure second_cmpsmallset;override;
  47. procedure second_cmp64bit;override;
  48. procedure second_cmpordinal;override;
  49. procedure second_addordinal;override;
  50. {$ifdef SUPPORT_MMX}
  51. procedure second_opmmx;override;
  52. {$endif SUPPORT_MMX}
  53. procedure second_opvector;override;
  54. end;
  55. implementation
  56. uses
  57. globtype,globals,
  58. verbose,cutils,compinnr,
  59. cpuinfo,
  60. aasmbase,aasmdata,aasmcpu,
  61. symconst,symdef,
  62. cgobj,hlcgobj,cgx86,cga,cgutils,
  63. tgobj,ncgutil,
  64. ncon,nset,ninl,
  65. defutil;
  66. { Range check must be disabled explicitly as the code serves
  67. on three different architecture sizes }
  68. {$R-}
  69. {*****************************************************************************
  70. Helpers
  71. *****************************************************************************}
  72. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  73. var
  74. power : longint;
  75. hl4 : tasmlabel;
  76. r : Tregister;
  77. href : treference;
  78. overflowcheck: boolean;
  79. begin
  80. overflowcheck:=needoverflowcheck;
  81. { at this point, left.location.loc should be LOC_REGISTER }
  82. if right.location.loc=LOC_REGISTER then
  83. begin
  84. { right.location is a LOC_REGISTER }
  85. { when swapped another result register }
  86. if (nodetype=subn) and (nf_swapped in flags) then
  87. begin
  88. if extra_not then
  89. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  90. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  91. { newly swapped also set swapped flag }
  92. location_swap(left.location,right.location);
  93. toggleflag(nf_swapped);
  94. end
  95. else
  96. begin
  97. if extra_not then
  98. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  99. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  100. location_swap(left.location,right.location);
  101. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  102. end;
  103. end
  104. else
  105. begin
  106. { right.location is not a LOC_REGISTER }
  107. if (nodetype=subn) and (nf_swapped in flags) then
  108. begin
  109. if extra_not then
  110. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  111. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  112. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  113. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  114. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  115. end
  116. else
  117. begin
  118. { Optimizations when right.location is a constant value }
  119. if (op=A_CMP) and
  120. (nodetype in [equaln,unequaln]) and
  121. (right.location.loc=LOC_CONSTANT) and
  122. (right.location.value=0) then
  123. begin
  124. { 'test $-1,%reg' is transformable into 'test $-1,spilltemp' if %reg needs
  125. spilling, while 'test %reg,%reg' still requires loading into register.
  126. If spilling is not necessary, it is changed back into 'test %reg,%reg' by
  127. peephole optimizer (this optimization is currently available only for i386). }
  128. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  129. {$ifdef i386}
  130. emit_const_reg(A_TEST,TCGSize2Opsize[opsize],aint(-1),left.location.register)
  131. {$else i386}
  132. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  133. {$endif i386}
  134. end
  135. else
  136. if (op=A_ADD) and
  137. (right.location.loc=LOC_CONSTANT) and
  138. (right.location.value=1) and
  139. not overflowcheck and
  140. UseIncDec then
  141. begin
  142. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  143. end
  144. else
  145. if (op=A_SUB) and
  146. (right.location.loc=LOC_CONSTANT) and
  147. (right.location.value=1) and
  148. overflowcheck and
  149. UseIncDec then
  150. begin
  151. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  152. end
  153. else
  154. if (op=A_IMUL) and
  155. (right.location.loc=LOC_CONSTANT) and
  156. (ispowerof2(int64(right.location.value),power)) and
  157. overflowcheck then
  158. begin
  159. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  160. end
  161. else if (op=A_IMUL) and
  162. (right.location.loc=LOC_CONSTANT) and
  163. (right.location.value>1) and (ispowerof2(int64(right.location.value)-1,power)) and
  164. (power in [1..3]) and
  165. not overflowcheck then
  166. begin
  167. reference_reset_base(href,left.location.register,0,ctempposinvalid,0,[]);
  168. href.index:=left.location.register;
  169. href.scalefactor:=int64(right.location.value)-1;
  170. left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  171. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_LEA,TCgSize2OpSize[opsize],href,left.location.register));
  172. end
  173. else
  174. begin
  175. if extra_not then
  176. begin
  177. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  178. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  179. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  180. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  181. end
  182. else
  183. begin
  184. emit_op_right_left(op,opsize);
  185. end;
  186. end;
  187. end;
  188. end;
  189. { only in case of overflow operations }
  190. { produce overflow code }
  191. { we must put it here directly, because sign of operation }
  192. { is in unsigned VAR!! }
  193. if mboverflow then
  194. begin
  195. if overflowcheck then
  196. begin
  197. current_asmdata.getjumplabel(hl4);
  198. if unsigned then
  199. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  200. else
  201. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  202. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  203. cg.a_label(current_asmdata.CurrAsmList,hl4);
  204. end;
  205. end;
  206. end;
  207. procedure tx86addnode.left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  208. begin
  209. { left location is not a register? }
  210. if (left.location.loc<>LOC_REGISTER) then
  211. begin
  212. { if right is register then we can swap the locations }
  213. if (not noswap) and
  214. (right.location.loc=LOC_REGISTER) then
  215. begin
  216. location_swap(left.location,right.location);
  217. toggleflag(nf_swapped);
  218. end
  219. else if (not noswap) and
  220. (right.location.loc=LOC_CREGISTER) then
  221. begin
  222. location_swap(left.location,right.location);
  223. toggleflag(nf_swapped);
  224. { maybe we can reuse a constant register when the
  225. operation is a comparison that doesn't change the
  226. value of the register }
  227. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  228. location:=left.location;
  229. end
  230. else
  231. begin
  232. { maybe we can reuse a constant register when the
  233. operation is a comparison that doesn't change the
  234. value of the register }
  235. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  236. end;
  237. end;
  238. if (right.location.loc<>LOC_CONSTANT) and
  239. (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then
  240. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  241. if (left.location.loc<>LOC_CONSTANT) and
  242. (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then
  243. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  244. end;
  245. procedure tx86addnode.force_left_and_right_fpureg;
  246. begin
  247. if (right.location.loc<>LOC_FPUREGISTER) then
  248. begin
  249. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  250. if (left.location.loc<>LOC_FPUREGISTER) then
  251. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  252. else
  253. { left was on the stack => swap }
  254. toggleflag(nf_swapped);
  255. end
  256. { the nominator in st0 }
  257. else if (left.location.loc<>LOC_FPUREGISTER) then
  258. begin
  259. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  260. end
  261. else
  262. begin
  263. { fpu operands are always in the wrong order on the stack }
  264. toggleflag(nf_swapped);
  265. end;
  266. end;
  267. { Makes sides suitable for executing an x87 instruction:
  268. if either side is OS_F32/OS_F64-sized LOC_REFERENCE, it is returned in 'refnode'
  269. everything else is loaded to FPU stack. }
  270. procedure tx86addnode.prepare_x87_locations(out refnode: tnode);
  271. begin
  272. refnode:=nil;
  273. { later on, no mm registers are allowed, so transfer everything to memory here
  274. below it is loaded into an fpu register if neede }
  275. if left.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then
  276. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  277. if right.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then
  278. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  279. case ord(left.location.loc=LOC_FPUREGISTER)+ord(right.location.loc=LOC_FPUREGISTER) of
  280. 0:
  281. begin
  282. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  283. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  284. InternalError(2013090803);
  285. if (left.location.size in [OS_F32,OS_F64]) then
  286. begin
  287. refnode:=left;
  288. toggleflag(nf_swapped);
  289. end
  290. else
  291. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  292. end;
  293. 1:
  294. begin { if left is on the stack then swap. }
  295. if (left.location.loc=LOC_FPUREGISTER) then
  296. refnode:=right
  297. else
  298. refnode:=left;
  299. if not(refnode.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  300. InternalError(2013090801);
  301. if not (refnode.location.size in [OS_F32,OS_F64]) then
  302. begin
  303. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,refnode.location,refnode.resultdef,false);
  304. if (refnode=right) then
  305. toggleflag(nf_swapped);
  306. refnode:=nil;
  307. end
  308. else
  309. begin
  310. if (refnode=left) then
  311. toggleflag(nf_swapped);
  312. end;
  313. end;
  314. 2: { fpu operands are always in the wrong order on the stack }
  315. toggleflag(nf_swapped);
  316. else
  317. InternalError(2013090802);
  318. end;
  319. end;
  320. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  321. {$ifdef x86_64}
  322. var
  323. tmpreg : tregister;
  324. {$endif x86_64}
  325. begin
  326. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  327. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  328. { left must be a register }
  329. case right.location.loc of
  330. LOC_REGISTER,
  331. LOC_CREGISTER :
  332. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  333. LOC_REFERENCE,
  334. LOC_CREFERENCE :
  335. begin
  336. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  337. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  338. end;
  339. LOC_CONSTANT :
  340. begin
  341. {$ifdef x86_64}
  342. { x86_64 only supports signed 32 bits constants directly }
  343. if (opsize in [OS_S64,OS_64]) and
  344. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  345. begin
  346. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  347. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  348. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  349. end
  350. else
  351. {$endif x86_64}
  352. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  353. end;
  354. else
  355. internalerror(200203232);
  356. end;
  357. end;
  358. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  359. begin
  360. case nodetype of
  361. equaln : getresflags:=F_E;
  362. unequaln : getresflags:=F_NE;
  363. else
  364. if not(unsigned) then
  365. begin
  366. if nf_swapped in flags then
  367. case nodetype of
  368. ltn : getresflags:=F_G;
  369. lten : getresflags:=F_GE;
  370. gtn : getresflags:=F_L;
  371. gten : getresflags:=F_LE;
  372. else
  373. internalerror(2013120105);
  374. end
  375. else
  376. case nodetype of
  377. ltn : getresflags:=F_L;
  378. lten : getresflags:=F_LE;
  379. gtn : getresflags:=F_G;
  380. gten : getresflags:=F_GE;
  381. else
  382. internalerror(2013120106);
  383. end;
  384. end
  385. else
  386. begin
  387. if nf_swapped in flags then
  388. case nodetype of
  389. ltn : getresflags:=F_A;
  390. lten : getresflags:=F_AE;
  391. gtn : getresflags:=F_B;
  392. gten : getresflags:=F_BE;
  393. else
  394. internalerror(2013120107);
  395. end
  396. else
  397. case nodetype of
  398. ltn : getresflags:=F_B;
  399. lten : getresflags:=F_BE;
  400. gtn : getresflags:=F_A;
  401. gten : getresflags:=F_AE;
  402. else
  403. internalerror(2013120108);
  404. end;
  405. end;
  406. end;
  407. end;
  408. function tx86addnode.getfpuresflags : tresflags;
  409. begin
  410. if (nodetype=equaln) then
  411. result:=F_FE
  412. else if (nodetype=unequaln) then
  413. result:=F_FNE
  414. else if (nf_swapped in flags) then
  415. case nodetype of
  416. ltn : result:=F_FA;
  417. lten : result:=F_FAE;
  418. gtn : result:=F_FB;
  419. gten : result:=F_FBE;
  420. else
  421. internalerror(2014031402);
  422. end
  423. else
  424. case nodetype of
  425. ltn : result:=F_FB;
  426. lten : result:=F_FBE;
  427. gtn : result:=F_FA;
  428. gten : result:=F_FAE;
  429. else
  430. internalerror(2014031403);
  431. end;
  432. end;
  433. {*****************************************************************************
  434. AddSmallSet
  435. *****************************************************************************}
  436. {$ifndef i8086}
  437. procedure tx86addnode.second_addsmallset;
  438. var
  439. setbase : aint;
  440. opdef : tdef;
  441. opsize : TCGSize;
  442. op : TAsmOp;
  443. extra_not,
  444. noswap : boolean;
  445. all_member_optimization:boolean;
  446. begin
  447. pass_left_right;
  448. noswap:=false;
  449. extra_not:=false;
  450. all_member_optimization:=false;
  451. opdef:=resultdef;
  452. opsize:=int_cgsize(opdef.size);
  453. if (left.resultdef.typ=setdef) then
  454. setbase:=tsetdef(left.resultdef).setbase
  455. else
  456. setbase:=tsetdef(right.resultdef).setbase;
  457. case nodetype of
  458. addn :
  459. begin
  460. { adding elements is not commutative }
  461. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  462. swapleftright;
  463. { are we adding set elements ? }
  464. if right.nodetype=setelementn then
  465. begin
  466. { no range support for smallsets! }
  467. if assigned(tsetelementnode(right).right) then
  468. internalerror(43244);
  469. { btsb isn't supported }
  470. if opsize=OS_8 then
  471. begin
  472. opsize:=OS_32;
  473. opdef:=u32inttype;
  474. end;
  475. { bts requires both elements to be registers }
  476. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  477. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  478. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,opdef,right.location,setbase);
  479. op:=A_BTS;
  480. noswap:=true;
  481. end
  482. else
  483. op:=A_OR;
  484. end;
  485. symdifn :
  486. op:=A_XOR;
  487. muln :
  488. op:=A_AND;
  489. subn :
  490. begin
  491. op:=A_AND;
  492. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  493. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  494. all_member_optimization:=true;
  495. if (not(nf_swapped in flags)) and
  496. (right.location.loc=LOC_CONSTANT) then
  497. right.location.value := not(right.location.value)
  498. else if (nf_swapped in flags) and
  499. (left.location.loc=LOC_CONSTANT) then
  500. left.location.value := not(left.location.value)
  501. else
  502. extra_not:=true;
  503. end;
  504. xorn :
  505. op:=A_XOR;
  506. orn :
  507. op:=A_OR;
  508. andn :
  509. op:=A_AND;
  510. else
  511. internalerror(2003042215);
  512. end;
  513. if all_member_optimization then
  514. begin
  515. {A set expression [0..31]-x can be implemented with a simple NOT.}
  516. if nf_swapped in flags then
  517. begin
  518. { newly swapped also set swapped flag }
  519. location_swap(left.location,right.location);
  520. toggleflag(nf_swapped);
  521. end;
  522. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,false);
  523. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  524. location:=right.location;
  525. end
  526. else
  527. begin
  528. { can we use the BMI1 instruction andn? }
  529. if (op=A_AND) and extra_not and (CPUX86_HAS_BMI1 in cpu_capabilities[current_settings.cputype]) and
  530. (resultdef.size in [4{$ifdef x86_64},8{$endif x86_64}]) then
  531. begin
  532. location_reset(location,LOC_REGISTER,left.location.size);
  533. location.register:=cg.getintregister(current_asmdata.currAsmList,left.location.size);
  534. if nf_swapped in flags then
  535. begin
  536. location_swap(left.location,right.location);
  537. toggleflag(nf_swapped);
  538. end;
  539. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,true);
  540. if not(left.location.loc in [LOC_CREGISTER,LOC_REGISTER,LOC_CREFERENCE,LOC_REFERENCE]) then
  541. hlcg.location_force_reg(current_asmdata.currAsmList,left.location,left.resultdef,opdef,true);
  542. case left.location.loc of
  543. LOC_CREGISTER,LOC_REGISTER:
  544. emit_reg_reg_reg(A_ANDN,TCGSize2Opsize[opsize],left.location.register,right.location.register,location.register);
  545. LOC_CREFERENCE,LOC_REFERENCE:
  546. emit_ref_reg_reg(A_ANDN,TCGSize2Opsize[opsize],left.location.reference,right.location.register,location.register);
  547. else
  548. Internalerror(2018040201);
  549. end;
  550. end
  551. else
  552. begin
  553. { left must be a register }
  554. left_must_be_reg(opdef,opsize,noswap);
  555. emit_generic_code(op,opsize,true,extra_not,false);
  556. location_freetemp(current_asmdata.CurrAsmList,right.location);
  557. { left is always a register and contains the result }
  558. location:=left.location;
  559. end;
  560. end;
  561. { fix the changed opsize we did above because of the missing btsb }
  562. if opsize<>int_cgsize(resultdef.size) then
  563. hlcg.location_force_reg(current_asmdata.CurrAsmList,location,opdef,cgsize_orddef(int_cgsize(resultdef.size)),false);
  564. end;
  565. {$endif not i8086}
  566. procedure tx86addnode.second_cmpsmallset;
  567. var
  568. opdef : tdef;
  569. opsize : TCGSize;
  570. op : TAsmOp;
  571. begin
  572. pass_left_right;
  573. opdef:=left.resultdef;
  574. opsize:=int_cgsize(opdef.size);
  575. case nodetype of
  576. equaln,
  577. unequaln :
  578. op:=A_CMP;
  579. lten,gten:
  580. begin
  581. if (not(nf_swapped in flags) and (nodetype = lten)) or
  582. ((nf_swapped in flags) and (nodetype = gten)) then
  583. swapleftright;
  584. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  585. emit_op_right_left(A_AND,opsize);
  586. op:=A_CMP;
  587. { warning: ugly hack, we need a JE so change the node to equaln }
  588. nodetype:=equaln;
  589. end;
  590. else
  591. internalerror(2003042215);
  592. end;
  593. { left must be a register }
  594. left_must_be_reg(opdef,opsize,false);
  595. emit_generic_code(op,opsize,true,false,false);
  596. location_freetemp(current_asmdata.CurrAsmList,right.location);
  597. location_freetemp(current_asmdata.CurrAsmList,left.location);
  598. location_reset(location,LOC_FLAGS,OS_NO);
  599. location.resflags:=getresflags(true);
  600. end;
  601. {*****************************************************************************
  602. AddMMX
  603. *****************************************************************************}
  604. {$ifdef SUPPORT_MMX}
  605. procedure tx86addnode.second_opmmx;
  606. var
  607. op : TAsmOp;
  608. cmpop : boolean;
  609. mmxbase : tmmxtype;
  610. hreg,
  611. hregister : tregister;
  612. begin
  613. pass_left_right;
  614. cmpop:=false;
  615. op:=A_NOP;
  616. mmxbase:=mmx_type(left.resultdef);
  617. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  618. case nodetype of
  619. addn :
  620. begin
  621. if (cs_mmx_saturation in current_settings.localswitches) then
  622. begin
  623. case mmxbase of
  624. mmxs8bit:
  625. op:=A_PADDSB;
  626. mmxu8bit:
  627. op:=A_PADDUSB;
  628. mmxs16bit,mmxfixed16:
  629. op:=A_PADDSW;
  630. mmxu16bit:
  631. op:=A_PADDUSW;
  632. else
  633. ;
  634. end;
  635. end
  636. else
  637. begin
  638. case mmxbase of
  639. mmxs8bit,mmxu8bit:
  640. op:=A_PADDB;
  641. mmxs16bit,mmxu16bit,mmxfixed16:
  642. op:=A_PADDW;
  643. mmxs32bit,mmxu32bit:
  644. op:=A_PADDD;
  645. else
  646. ;
  647. end;
  648. end;
  649. end;
  650. muln :
  651. begin
  652. case mmxbase of
  653. mmxs16bit,mmxu16bit:
  654. op:=A_PMULLW;
  655. mmxfixed16:
  656. op:=A_PMULHW;
  657. else
  658. ;
  659. end;
  660. end;
  661. subn :
  662. begin
  663. if (cs_mmx_saturation in current_settings.localswitches) then
  664. begin
  665. case mmxbase of
  666. mmxs8bit:
  667. op:=A_PSUBSB;
  668. mmxu8bit:
  669. op:=A_PSUBUSB;
  670. mmxs16bit,mmxfixed16:
  671. op:=A_PSUBSB;
  672. mmxu16bit:
  673. op:=A_PSUBUSW;
  674. else
  675. ;
  676. end;
  677. end
  678. else
  679. begin
  680. case mmxbase of
  681. mmxs8bit,mmxu8bit:
  682. op:=A_PSUBB;
  683. mmxs16bit,mmxu16bit,mmxfixed16:
  684. op:=A_PSUBW;
  685. mmxs32bit,mmxu32bit:
  686. op:=A_PSUBD;
  687. else
  688. ;
  689. end;
  690. end;
  691. end;
  692. xorn:
  693. op:=A_PXOR;
  694. orn:
  695. op:=A_POR;
  696. andn:
  697. op:=A_PAND;
  698. else
  699. internalerror(2003042214);
  700. end;
  701. if op = A_NOP then
  702. internalerror(201408201);
  703. { left and right no register? }
  704. { then one must be demanded }
  705. if (left.location.loc<>LOC_MMXREGISTER) then
  706. begin
  707. if (right.location.loc=LOC_MMXREGISTER) then
  708. begin
  709. location_swap(left.location,right.location);
  710. toggleflag(nf_swapped);
  711. end
  712. else
  713. begin
  714. { register variable ? }
  715. if (left.location.loc=LOC_CMMXREGISTER) then
  716. begin
  717. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  718. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  719. end
  720. else
  721. begin
  722. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  723. internalerror(200203245);
  724. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  725. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  726. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  727. end;
  728. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  729. left.location.register:=hregister;
  730. end;
  731. end;
  732. { at this point, left.location.loc should be LOC_MMXREGISTER }
  733. if right.location.loc<>LOC_MMXREGISTER then
  734. begin
  735. if (nodetype=subn) and (nf_swapped in flags) then
  736. begin
  737. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  738. if right.location.loc=LOC_CMMXREGISTER then
  739. begin
  740. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  741. emit_reg_reg(op,S_NO,left.location.register,hreg);
  742. end
  743. else
  744. begin
  745. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  746. internalerror(200203247);
  747. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  748. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  749. emit_reg_reg(op,S_NO,left.location.register,hreg);
  750. end;
  751. location.register:=hreg;
  752. end
  753. else
  754. begin
  755. if (right.location.loc=LOC_CMMXREGISTER) then
  756. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  757. else
  758. begin
  759. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  760. internalerror(200203246);
  761. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  762. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  763. end;
  764. location.register:=left.location.register;
  765. end;
  766. end
  767. else
  768. begin
  769. { right.location=LOC_MMXREGISTER }
  770. if (nodetype=subn) and (nf_swapped in flags) then
  771. begin
  772. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  773. location_swap(left.location,right.location);
  774. toggleflag(nf_swapped);
  775. end
  776. else
  777. begin
  778. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  779. end;
  780. location.register:=left.location.register;
  781. end;
  782. location_freetemp(current_asmdata.CurrAsmList,right.location);
  783. if cmpop then
  784. location_freetemp(current_asmdata.CurrAsmList,left.location);
  785. end;
  786. {$endif SUPPORT_MMX}
  787. {*****************************************************************************
  788. AddFloat
  789. *****************************************************************************}
  790. procedure tx86addnode.second_addfloatsse;
  791. var
  792. op : topcg;
  793. sqr_sum : boolean;
  794. tmp : tnode;
  795. begin
  796. sqr_sum:=false;
  797. if (current_settings.fputype>=fpu_sse3) and
  798. use_vectorfpu(resultdef) and
  799. (nodetype in [addn,subn]) and
  800. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  801. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  802. begin
  803. sqr_sum:=true;
  804. tmp:=tinlinenode(left).left;
  805. tinlinenode(left).left:=nil;
  806. left.free;
  807. left:=tmp;
  808. tmp:=tinlinenode(right).left;
  809. tinlinenode(right).left:=nil;
  810. right.free;
  811. right:=tmp;
  812. end;
  813. pass_left_right;
  814. { fpu operands are always in reversed order on the stack }
  815. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  816. toggleflag(nf_swapped);
  817. if (nf_swapped in flags) then
  818. { can't use swapleftright if both are on the fpu stack, since then }
  819. { both are "R_ST" -> nothing would change -> manually switch }
  820. if (left.location.loc = LOC_FPUREGISTER) and
  821. (right.location.loc = LOC_FPUREGISTER) then
  822. emit_none(A_FXCH,S_NO)
  823. else
  824. swapleftright;
  825. case nodetype of
  826. addn :
  827. op:=OP_ADD;
  828. muln :
  829. op:=OP_MUL;
  830. subn :
  831. op:=OP_SUB;
  832. slashn :
  833. op:=OP_DIV;
  834. else
  835. internalerror(200312231);
  836. end;
  837. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  838. if sqr_sum then
  839. begin
  840. if nf_swapped in flags then
  841. swapleftright;
  842. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  843. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  844. location:=left.location;
  845. if is_double(resultdef) then
  846. begin
  847. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  848. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  849. case nodetype of
  850. addn:
  851. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  852. subn:
  853. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  854. else
  855. internalerror(201108162);
  856. end;
  857. end
  858. else
  859. begin
  860. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  861. { ensure that bits 64..127 contain valid values }
  862. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  863. { the data is now in bits 0..32 and 64..95 }
  864. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  865. case nodetype of
  866. addn:
  867. begin
  868. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  869. end;
  870. subn:
  871. begin
  872. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  873. end;
  874. else
  875. internalerror(201108163);
  876. end;
  877. end
  878. end
  879. { we can use only right as left operand if the operation is commutative }
  880. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  881. begin
  882. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  883. cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.location.size,location.size,right.location.register,location.register,mms_movescalar);
  884. { force floating point reg. location to be written to memory,
  885. we don't force it to mm register because writing to memory
  886. allows probably shorter code because there is no direct fpu->mm register
  887. copy instruction
  888. }
  889. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  890. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  891. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  892. end
  893. else
  894. begin
  895. if nf_swapped in flags then
  896. swapleftright;
  897. { force floating point reg. location to be written to memory,
  898. we don't force it to mm register because writing to memory
  899. allows probably shorter code because there is no direct fpu->mm register
  900. copy instruction
  901. }
  902. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  903. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  904. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  905. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  906. { force floating point reg. location to be written to memory,
  907. we don't force it to mm register because writing to memory
  908. allows probably shorter code because there is no direct fpu->mm register
  909. copy instruction
  910. }
  911. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  912. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  913. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  914. end;
  915. end;
  916. procedure tx86addnode.second_addfloatavx;
  917. var
  918. op : topcg;
  919. sqr_sum : boolean;
  920. {$ifdef dummy}
  921. tmp : tnode;
  922. {$endif dummy}
  923. begin
  924. sqr_sum:=false;
  925. {$ifdef dummy}
  926. if (current_settings.fputype>=fpu_sse3) and
  927. use_vectorfpu(resultdef) and
  928. (nodetype in [addn,subn]) and
  929. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  930. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  931. begin
  932. sqr_sum:=true;
  933. tmp:=tinlinenode(left).left;
  934. tinlinenode(left).left:=nil;
  935. left.free;
  936. left:=tmp;
  937. tmp:=tinlinenode(right).left;
  938. tinlinenode(right).left:=nil;
  939. right.free;
  940. right:=tmp;
  941. end;
  942. {$endif dummy}
  943. pass_left_right;
  944. { fpu operands are always in reversed order on the stack }
  945. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  946. toggleflag(nf_swapped);
  947. if (nf_swapped in flags) then
  948. { can't use swapleftright if both are on the fpu stack, since then }
  949. { both are "R_ST" -> nothing would change -> manually switch }
  950. if (left.location.loc = LOC_FPUREGISTER) and
  951. (right.location.loc = LOC_FPUREGISTER) then
  952. emit_none(A_FXCH,S_NO)
  953. else
  954. swapleftright;
  955. case nodetype of
  956. addn :
  957. op:=OP_ADD;
  958. muln :
  959. op:=OP_MUL;
  960. subn :
  961. op:=OP_SUB;
  962. slashn :
  963. op:=OP_DIV;
  964. else
  965. internalerror(200312231);
  966. end;
  967. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  968. if sqr_sum then
  969. begin
  970. if nf_swapped in flags then
  971. swapleftright;
  972. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  973. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  974. location:=left.location;
  975. if is_double(resultdef) then
  976. begin
  977. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  978. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  979. case nodetype of
  980. addn:
  981. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  982. subn:
  983. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  984. else
  985. internalerror(201108162);
  986. end;
  987. end
  988. else
  989. begin
  990. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  991. { ensure that bits 64..127 contain valid values }
  992. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  993. { the data is now in bits 0..32 and 64..95 }
  994. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  995. case nodetype of
  996. addn:
  997. begin
  998. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  999. end;
  1000. subn:
  1001. begin
  1002. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  1003. end;
  1004. else
  1005. internalerror(201108163);
  1006. end;
  1007. end
  1008. end
  1009. { left*2 ? }
  1010. else if (nodetype=muln) and is_constrealnode(right) and is_number_float(trealconstnode(right).value_real) and (trealconstnode(right).value_real=2) then
  1011. begin
  1012. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  1013. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1014. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  1015. left.location.register,
  1016. left.location.register,
  1017. location.register,
  1018. mms_movescalar);
  1019. end
  1020. { right*2 ? }
  1021. else if (nodetype=muln) and is_constrealnode(left) and is_number_float(trealconstnode(left).value_real) and (trealconstnode(left).value_real=2) then
  1022. begin
  1023. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,right.location.size);
  1024. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  1025. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  1026. right.location.register,
  1027. right.location.register,
  1028. location.register,
  1029. mms_movescalar);
  1030. end
  1031. { we can use only right as left operand if the operation is commutative }
  1032. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  1033. begin
  1034. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  1035. { force floating point reg. location to be written to memory,
  1036. we don't force it to mm register because writing to memory
  1037. allows probably shorter code because there is no direct fpu->mm register
  1038. copy instruction
  1039. }
  1040. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1041. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  1042. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  1043. left.location,
  1044. right.location.register,
  1045. location.register,
  1046. mms_movescalar);
  1047. end
  1048. else
  1049. begin
  1050. if (nf_swapped in flags) then
  1051. swapleftright;
  1052. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1053. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  1054. { force floating point reg. location to be written to memory,
  1055. we don't force it to mm register because writing to memory
  1056. allows probably shorter code because there is no direct fpu->mm register
  1057. copy instruction
  1058. }
  1059. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1060. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1061. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  1062. right.location,
  1063. left.location.register,
  1064. location.register,
  1065. mms_movescalar);
  1066. end;
  1067. end;
  1068. function tx86addnode.use_fma : boolean;
  1069. begin
  1070. {$ifndef i8086}
  1071. { test if the result stays in an xmm register, fiddeling with fpu registers and fma makes no sense }
  1072. Result:=use_vectorfpu(resultdef) and
  1073. ((cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[]);
  1074. {$else i8086}
  1075. Result:=inherited use_fma;
  1076. {$endif i8086}
  1077. end;
  1078. procedure tx86addnode.second_cmpfloatvector;
  1079. var
  1080. op : tasmop;
  1081. const
  1082. ops_single: array[boolean] of tasmop = (A_COMISS,A_VCOMISS);
  1083. ops_double: array[boolean] of tasmop = (A_COMISD,A_VCOMISD);
  1084. begin
  1085. if is_single(left.resultdef) then
  1086. op:=ops_single[UseAVX]
  1087. else if is_double(left.resultdef) then
  1088. op:=ops_double[UseAVX]
  1089. else
  1090. internalerror(200402222);
  1091. pass_left_right;
  1092. location_reset(location,LOC_FLAGS,OS_NO);
  1093. { Direct move fpu->mm register is not possible, so force any fpu operands to
  1094. memory (not to mm registers because one of the memory locations can be used
  1095. directly in compare instruction, yielding shorter code) }
  1096. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1097. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  1098. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1099. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1100. if (right.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1101. begin
  1102. case left.location.loc of
  1103. LOC_REFERENCE,LOC_CREFERENCE:
  1104. begin
  1105. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  1106. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  1107. end;
  1108. LOC_MMREGISTER,LOC_CMMREGISTER:
  1109. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  1110. else
  1111. internalerror(200402221);
  1112. end;
  1113. toggleflag(nf_swapped);
  1114. end
  1115. else
  1116. begin
  1117. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1118. case right.location.loc of
  1119. LOC_REFERENCE,LOC_CREFERENCE:
  1120. begin
  1121. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  1122. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  1123. end;
  1124. LOC_MMREGISTER,LOC_CMMREGISTER:
  1125. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  1126. else
  1127. internalerror(200402223);
  1128. end;
  1129. end;
  1130. location.resflags:=getfpuresflags;
  1131. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1132. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1133. end;
  1134. procedure tx86addnode.second_opvector;
  1135. var
  1136. op : topcg;
  1137. begin
  1138. pass_left_right;
  1139. if (nf_swapped in flags) then
  1140. swapleftright;
  1141. case nodetype of
  1142. addn :
  1143. op:=OP_ADD;
  1144. muln :
  1145. op:=OP_MUL;
  1146. subn :
  1147. op:=OP_SUB;
  1148. slashn :
  1149. op:=OP_DIV;
  1150. else
  1151. internalerror(200610071);
  1152. end;
  1153. if fits_in_mm_register(left.resultdef) then
  1154. begin
  1155. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  1156. { we can use only right as left operand if the operation is commutative }
  1157. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  1158. begin
  1159. location.register:=right.location.register;
  1160. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  1161. end
  1162. else
  1163. begin
  1164. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  1165. location.register:=left.location.register;
  1166. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  1167. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  1168. end;
  1169. end
  1170. else
  1171. begin
  1172. { not yet supported }
  1173. internalerror(200610072);
  1174. end
  1175. end;
  1176. procedure tx86addnode.second_addfloat;
  1177. const
  1178. ops_add: array[boolean] of TAsmOp = (A_FADDP,A_FADD);
  1179. ops_mul: array[boolean] of TAsmOp = (A_FMULP,A_FMUL);
  1180. ops_sub: array[boolean] of TAsmOp = (A_FSUBP,A_FSUB);
  1181. ops_rsub: array[boolean] of TAsmOp = (A_FSUBRP,A_FSUBR);
  1182. ops_div: array[boolean] of TAsmOp = (A_FDIVP,A_FDIV);
  1183. ops_rdiv: array[boolean] of TAsmOp = (A_FDIVRP,A_FDIVR);
  1184. var
  1185. op : TAsmOp;
  1186. refnode : tnode;
  1187. hasref : boolean;
  1188. begin
  1189. if use_vectorfpu(resultdef) then
  1190. begin
  1191. if UseAVX then
  1192. second_addfloatavx
  1193. else
  1194. second_addfloatsse;
  1195. exit;
  1196. end;
  1197. pass_left_right;
  1198. prepare_x87_locations(refnode);
  1199. hasref:=assigned(refnode);
  1200. case nodetype of
  1201. addn :
  1202. op:=ops_add[hasref];
  1203. muln :
  1204. op:=ops_mul[hasref];
  1205. subn :
  1206. if (nf_swapped in flags) then
  1207. op:=ops_rsub[hasref]
  1208. else
  1209. op:=ops_sub[hasref];
  1210. slashn :
  1211. if (nf_swapped in flags) then
  1212. op:=ops_rdiv[hasref]
  1213. else
  1214. op:=ops_div[hasref];
  1215. else
  1216. internalerror(2003042214);
  1217. end;
  1218. if hasref then
  1219. emit_ref(op,tcgsize2opsize[refnode.location.size],refnode.location.reference)
  1220. else
  1221. begin
  1222. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  1223. tcgx86(cg).dec_fpu_stack;
  1224. end;
  1225. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  1226. location.register:=NR_ST;
  1227. end;
  1228. procedure tx86addnode.second_cmpfloat;
  1229. {$ifdef i8086}
  1230. var
  1231. tmpref: treference;
  1232. {$endif i8086}
  1233. begin
  1234. if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
  1235. begin
  1236. second_cmpfloatvector;
  1237. exit;
  1238. end;
  1239. pass_left_right;
  1240. force_left_and_right_fpureg;
  1241. {$ifndef x86_64}
  1242. if current_settings.cputype<cpu_Pentium2 then
  1243. begin
  1244. emit_none(A_FCOMPP,S_NO);
  1245. tcgx86(cg).dec_fpu_stack;
  1246. tcgx86(cg).dec_fpu_stack;
  1247. { load fpu flags }
  1248. {$ifdef i8086}
  1249. if current_settings.cputype < cpu_286 then
  1250. begin
  1251. tg.gettemp(current_asmdata.CurrAsmList,2,2,tt_normal,tmpref);
  1252. emit_ref(A_FSTSW,S_NO,tmpref);
  1253. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1254. inc(tmpref.offset);
  1255. emit_ref_reg(A_MOV,S_B,tmpref,NR_AH);
  1256. dec(tmpref.offset);
  1257. emit_none(A_SAHF,S_NO);
  1258. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1259. tg.ungettemp(current_asmdata.CurrAsmList,tmpref);
  1260. end
  1261. else
  1262. {$endif i8086}
  1263. begin
  1264. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1265. emit_reg(A_FNSTSW,S_NO,NR_AX);
  1266. emit_none(A_SAHF,S_NO);
  1267. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1268. end;
  1269. end
  1270. else
  1271. {$endif x86_64}
  1272. begin
  1273. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  1274. { fcomip pops only one fpu register }
  1275. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  1276. tcgx86(cg).dec_fpu_stack;
  1277. tcgx86(cg).dec_fpu_stack;
  1278. end;
  1279. location_reset(location,LOC_FLAGS,OS_NO);
  1280. location.resflags:=getfpuresflags;
  1281. end;
  1282. {*****************************************************************************
  1283. Add64bit
  1284. *****************************************************************************}
  1285. procedure tx86addnode.second_add64bit;
  1286. begin
  1287. {$ifdef cpu64bitalu}
  1288. second_addordinal;
  1289. {$else cpu64bitalu}
  1290. { must be implemented separate }
  1291. internalerror(200402042);
  1292. {$endif cpu64bitalu}
  1293. end;
  1294. procedure tx86addnode.second_cmp64bit;
  1295. begin
  1296. {$ifdef cpu64bitalu}
  1297. second_cmpordinal;
  1298. {$else cpu64bitalu}
  1299. { must be implemented separate }
  1300. internalerror(200402043);
  1301. {$endif cpu64bitalu}
  1302. end;
  1303. {*****************************************************************************
  1304. AddOrdinal
  1305. *****************************************************************************}
  1306. procedure tx86addnode.second_addordinal;
  1307. var
  1308. opsize : tcgsize;
  1309. unsigned : boolean;
  1310. cgop : topcg;
  1311. checkoverflow : Boolean;
  1312. ovloc : tlocation;
  1313. tmpreg : TRegister;
  1314. begin
  1315. { determine if the comparison will be unsigned }
  1316. unsigned:=not(is_signed(left.resultdef)) or
  1317. not(is_signed(right.resultdef));
  1318. { assume no overflow checking is require }
  1319. checkoverflow := false;
  1320. ovloc.loc:=LOC_VOID;
  1321. case nodetype of
  1322. addn:
  1323. begin
  1324. cgop:=OP_ADD;
  1325. checkoverflow:=true;
  1326. end;
  1327. xorn :
  1328. begin
  1329. cgop:=OP_XOR;
  1330. end;
  1331. orn :
  1332. begin
  1333. cgop:=OP_OR;
  1334. end;
  1335. andn:
  1336. begin
  1337. cgop:=OP_AND;
  1338. end;
  1339. muln:
  1340. begin
  1341. checkoverflow:=true;
  1342. if unsigned then
  1343. cgop:=OP_MUL
  1344. else
  1345. cgop:=OP_IMUL;
  1346. end;
  1347. subn :
  1348. begin
  1349. checkoverflow:=true;
  1350. cgop:=OP_SUB;
  1351. end;
  1352. else
  1353. internalerror(2015022501);
  1354. end;
  1355. checkoverflow:=
  1356. checkoverflow and
  1357. needoverflowcheck;
  1358. opsize:=def_cgsize(left.resultdef);
  1359. pass_left_right;
  1360. { do have to allocate a register? If yes, then three opcode instructions are better }
  1361. if ((left.location.loc<>LOC_REGISTER) and (right.location.loc<>LOC_REGISTER)) or
  1362. ((nodetype=addn) and (left.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_CONSTANT]) and (right.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_CONSTANT])) then
  1363. begin
  1364. { allocate registers }
  1365. force_reg_left_right(false,true);
  1366. set_result_location_reg;
  1367. if nodetype<>subn then
  1368. begin
  1369. if (right.location.loc<>LOC_CONSTANT) then
  1370. hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,cgop,resultdef,
  1371. left.location.register,right.location.register,
  1372. location.register,checkoverflow,ovloc)
  1373. else
  1374. hlcg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,cgop,resultdef,
  1375. right.location.value,left.location.register,
  1376. location.register,checkoverflow,ovloc);
  1377. end
  1378. else { subtract is a special case since its not commutative }
  1379. begin
  1380. if (nf_swapped in flags) then
  1381. swapleftright;
  1382. if left.location.loc<>LOC_CONSTANT then
  1383. begin
  1384. if right.location.loc<>LOC_CONSTANT then
  1385. hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
  1386. right.location.register,left.location.register,
  1387. location.register,checkoverflow,ovloc)
  1388. else
  1389. hlcg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
  1390. right.location.value,left.location.register,
  1391. location.register,checkoverflow,ovloc);
  1392. end
  1393. else
  1394. begin
  1395. tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  1396. hlcg.a_load_const_reg(current_asmdata.CurrAsmList,resultdef,
  1397. left.location.value,tmpreg);
  1398. hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
  1399. right.location.register,tmpreg,location.register,checkoverflow,ovloc);
  1400. end;
  1401. end
  1402. end
  1403. else
  1404. begin
  1405. { at least one location is a register, re-use it, so we can try two operand opcodes }
  1406. if left.location.loc<>LOC_REGISTER then
  1407. begin
  1408. if right.location.loc<>LOC_REGISTER then
  1409. begin
  1410. { tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1411. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,left.location,tmpreg);
  1412. location_reset(left.location,LOC_REGISTER,opsize);
  1413. left.location.register:=tmpreg;
  1414. }
  1415. Internalerror(2018031102);
  1416. end
  1417. else
  1418. begin
  1419. location_swap(left.location,right.location);
  1420. toggleflag(nf_swapped);
  1421. end;
  1422. end;
  1423. { at this point, left.location.loc should be LOC_REGISTER }
  1424. if right.location.loc=LOC_REGISTER then
  1425. begin
  1426. { when swapped another result register }
  1427. if (nodetype=subn) and (nf_swapped in flags) then
  1428. begin
  1429. cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,
  1430. left.location.register,right.location.register);
  1431. location_swap(left.location,right.location);
  1432. toggleflag(nf_swapped);
  1433. end
  1434. else
  1435. cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,
  1436. right.location.register,left.location.register);
  1437. end
  1438. else
  1439. begin
  1440. { right.location<>LOC_REGISTER }
  1441. if right.location.loc in [LOC_CSUBSETREF,LOC_CSUBSETREG,LOC_SUBSETREF,LOC_SUBSETREG] then
  1442. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,left.resultdef,true);
  1443. if (nodetype=subn) and (nf_swapped in flags) then
  1444. begin
  1445. tmpreg:=left.location.register;
  1446. left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1447. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,left.location.register);
  1448. cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,tmpreg,left.location.register);
  1449. end
  1450. else
  1451. cg.a_op_loc_reg(current_asmdata.CurrAsmList,cgop,opsize,right.location,left.location.register);
  1452. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1453. end;
  1454. location_copy(location,left.location);
  1455. end;
  1456. { emit overflow check if required }
  1457. if checkoverflow then
  1458. cg.g_overflowcheck_loc(current_asmdata.CurrAsmList,Location,resultdef,ovloc);
  1459. end;
  1460. procedure tx86addnode.second_cmpordinal;
  1461. var
  1462. opdef : tdef;
  1463. opsize : tcgsize;
  1464. unsigned : boolean;
  1465. begin
  1466. unsigned:=not(is_signed(left.resultdef)) or
  1467. not(is_signed(right.resultdef));
  1468. opdef:=left.resultdef;
  1469. opsize:=def_cgsize(opdef);
  1470. pass_left_right;
  1471. if (right.location.loc=LOC_CONSTANT) and
  1472. (left.location.loc in [LOC_REFERENCE, LOC_CREFERENCE])
  1473. {$ifdef x86_64}
  1474. and ((not (opsize in [OS_64,OS_S64])) or (
  1475. (right.location.value>=low(longint)) and (right.location.value<=high(longint))
  1476. ))
  1477. {$endif x86_64}
  1478. then
  1479. begin
  1480. emit_const_ref(A_CMP, TCGSize2Opsize[opsize], right.location.value, left.location.reference);
  1481. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1482. end
  1483. else
  1484. begin
  1485. left_must_be_reg(opdef,opsize,false);
  1486. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  1487. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1488. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1489. end;
  1490. location_reset(location,LOC_FLAGS,OS_NO);
  1491. location.resflags:=getresflags(unsigned);
  1492. end;
  1493. begin
  1494. caddnode:=tx86addnode;
  1495. end.