nx86add.pas 64 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. symtype,
  22. cgbase,
  23. cpubase,
  24. node,nadd,ncgadd;
  25. type
  26. tx86addnode = class(tcgaddnode)
  27. protected
  28. function getresflags(unsigned : boolean) : tresflags;
  29. function getfpuresflags : tresflags;
  30. procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  31. procedure force_left_and_right_fpureg;
  32. procedure prepare_x87_locations(out refnode: tnode);
  33. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  34. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  35. procedure second_cmpfloatvector;
  36. procedure second_addfloatsse;
  37. procedure second_addfloatavx;
  38. public
  39. function pass_1 : tnode;override;
  40. function use_fma : boolean;override;
  41. procedure second_addfloat;override;
  42. {$ifndef i8086}
  43. procedure second_addsmallset;override;
  44. {$endif not i8086}
  45. procedure second_add64bit;override;
  46. procedure second_cmpfloat;override;
  47. procedure second_cmpsmallset;override;
  48. procedure second_cmp64bit;override;
  49. procedure second_cmpordinal;override;
  50. procedure second_addordinal;override;
  51. {$ifdef SUPPORT_MMX}
  52. procedure second_opmmx;override;
  53. {$endif SUPPORT_MMX}
  54. procedure second_opvector;override;
  55. end;
  56. implementation
  57. uses
  58. globtype,globals,
  59. verbose,cutils,compinnr,
  60. cpuinfo,
  61. aasmbase,aasmdata,aasmcpu,
  62. symconst,symdef,
  63. cgobj,hlcgobj,cgx86,cga,cgutils,
  64. tgobj,ncgutil,
  65. ncon,nset,ninl,ncnv,
  66. defutil,
  67. htypechk;
  68. { Range check must be disabled explicitly as the code serves
  69. on three different architecture sizes }
  70. {$R-}
  71. {*****************************************************************************
  72. Helpers
  73. *****************************************************************************}
  74. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  75. var
  76. power : longint;
  77. hl4 : tasmlabel;
  78. r : Tregister;
  79. href : treference;
  80. overflowcheck: boolean;
  81. begin
  82. overflowcheck:=needoverflowcheck;
  83. { at this point, left.location.loc should be LOC_REGISTER }
  84. if right.location.loc=LOC_REGISTER then
  85. begin
  86. { right.location is a LOC_REGISTER }
  87. { when swapped another result register }
  88. if (nodetype=subn) and (nf_swapped in flags) then
  89. begin
  90. if extra_not then
  91. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  92. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  93. { newly swapped also set swapped flag }
  94. location_swap(left.location,right.location);
  95. toggleflag(nf_swapped);
  96. end
  97. else
  98. begin
  99. if extra_not then
  100. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  101. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  102. location_swap(left.location,right.location);
  103. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  104. end;
  105. end
  106. else
  107. begin
  108. { right.location is not a LOC_REGISTER }
  109. if (nodetype=subn) and (nf_swapped in flags) then
  110. begin
  111. if extra_not then
  112. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  113. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  114. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  115. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  116. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  117. end
  118. else
  119. begin
  120. { Optimizations when right.location is a constant value }
  121. if (op=A_CMP) and
  122. (nodetype in [equaln,unequaln]) and
  123. (right.location.loc=LOC_CONSTANT) and
  124. (right.location.value=0) then
  125. begin
  126. { 'test $-1,%reg' is transformable into 'test $-1,spilltemp' if %reg needs
  127. spilling, while 'test %reg,%reg' still requires loading into register.
  128. If spilling is not necessary, it is changed back into 'test %reg,%reg' by
  129. peephole optimizer (this optimization is currently available only for i386). }
  130. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  131. {$ifdef i386}
  132. emit_const_reg(A_TEST,TCGSize2Opsize[opsize],aint(-1),left.location.register)
  133. {$else i386}
  134. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  135. {$endif i386}
  136. end
  137. else
  138. if (op=A_ADD) and
  139. (right.location.loc=LOC_CONSTANT) and
  140. (right.location.value=1) and
  141. not overflowcheck and
  142. UseIncDec then
  143. begin
  144. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  145. end
  146. else
  147. if (op=A_SUB) and
  148. (right.location.loc=LOC_CONSTANT) and
  149. (right.location.value=1) and
  150. overflowcheck and
  151. UseIncDec then
  152. begin
  153. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  154. end
  155. else
  156. if (op=A_IMUL) and
  157. (right.location.loc=LOC_CONSTANT) and
  158. (ispowerof2(int64(right.location.value),power)) and
  159. overflowcheck then
  160. begin
  161. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  162. end
  163. else if (op=A_IMUL) and
  164. (right.location.loc=LOC_CONSTANT) and
  165. (right.location.value>1) and (ispowerof2(int64(right.location.value)-1,power)) and
  166. (power in [1..3]) and
  167. not overflowcheck then
  168. begin
  169. reference_reset_base(href,left.location.register,0,ctempposinvalid,0,[]);
  170. href.index:=left.location.register;
  171. href.scalefactor:=int64(right.location.value)-1;
  172. left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  173. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_LEA,TCgSize2OpSize[opsize],href,left.location.register));
  174. end
  175. else
  176. begin
  177. if extra_not then
  178. begin
  179. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  180. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  181. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  182. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  183. end
  184. else
  185. begin
  186. emit_op_right_left(op,opsize);
  187. end;
  188. end;
  189. end;
  190. end;
  191. { only in case of overflow operations }
  192. { produce overflow code }
  193. { we must put it here directly, because sign of operation }
  194. { is in unsigned VAR!! }
  195. if mboverflow then
  196. begin
  197. if overflowcheck then
  198. begin
  199. current_asmdata.getjumplabel(hl4);
  200. if unsigned then
  201. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  202. else
  203. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  204. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  205. cg.a_label(current_asmdata.CurrAsmList,hl4);
  206. end;
  207. end;
  208. end;
  209. procedure tx86addnode.left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  210. begin
  211. { left location is not a register? }
  212. if (left.location.loc<>LOC_REGISTER) then
  213. begin
  214. { if right is register then we can swap the locations }
  215. if (not noswap) and
  216. (right.location.loc=LOC_REGISTER) then
  217. begin
  218. location_swap(left.location,right.location);
  219. toggleflag(nf_swapped);
  220. end
  221. else if (not noswap) and
  222. (right.location.loc=LOC_CREGISTER) then
  223. begin
  224. location_swap(left.location,right.location);
  225. toggleflag(nf_swapped);
  226. { maybe we can reuse a constant register when the
  227. operation is a comparison that doesn't change the
  228. value of the register }
  229. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  230. location:=left.location;
  231. end
  232. else
  233. begin
  234. { maybe we can reuse a constant register when the
  235. operation is a comparison that doesn't change the
  236. value of the register }
  237. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  238. end;
  239. end;
  240. if (right.location.loc<>LOC_CONSTANT) and
  241. (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then
  242. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  243. if (left.location.loc<>LOC_CONSTANT) and
  244. (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then
  245. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  246. end;
  247. procedure tx86addnode.force_left_and_right_fpureg;
  248. begin
  249. if (right.location.loc<>LOC_FPUREGISTER) then
  250. begin
  251. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  252. if (left.location.loc<>LOC_FPUREGISTER) then
  253. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  254. else
  255. { left was on the stack => swap }
  256. toggleflag(nf_swapped);
  257. end
  258. { the nominator in st0 }
  259. else if (left.location.loc<>LOC_FPUREGISTER) then
  260. begin
  261. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  262. end
  263. else
  264. begin
  265. { fpu operands are always in the wrong order on the stack }
  266. toggleflag(nf_swapped);
  267. end;
  268. end;
  269. { Makes sides suitable for executing an x87 instruction:
  270. if either side is OS_F32/OS_F64-sized LOC_REFERENCE, it is returned in 'refnode'
  271. everything else is loaded to FPU stack. }
  272. procedure tx86addnode.prepare_x87_locations(out refnode: tnode);
  273. begin
  274. refnode:=nil;
  275. { later on, no mm registers are allowed, so transfer everything to memory here
  276. below it is loaded into an fpu register if neede }
  277. if left.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then
  278. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  279. if right.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then
  280. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  281. case ord(left.location.loc=LOC_FPUREGISTER)+ord(right.location.loc=LOC_FPUREGISTER) of
  282. 0:
  283. begin
  284. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  285. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  286. InternalError(2013090803);
  287. if (left.location.size in [OS_F32,OS_F64]) then
  288. begin
  289. refnode:=left;
  290. toggleflag(nf_swapped);
  291. end
  292. else
  293. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  294. end;
  295. 1:
  296. begin { if left is on the stack then swap. }
  297. if (left.location.loc=LOC_FPUREGISTER) then
  298. refnode:=right
  299. else
  300. refnode:=left;
  301. if not(refnode.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  302. InternalError(2013090801);
  303. if not (refnode.location.size in [OS_F32,OS_F64]) then
  304. begin
  305. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,refnode.location,refnode.resultdef,false);
  306. if (refnode=right) then
  307. toggleflag(nf_swapped);
  308. refnode:=nil;
  309. end
  310. else
  311. begin
  312. if (refnode=left) then
  313. toggleflag(nf_swapped);
  314. end;
  315. end;
  316. 2: { fpu operands are always in the wrong order on the stack }
  317. toggleflag(nf_swapped);
  318. else
  319. InternalError(2013090802);
  320. end;
  321. end;
  322. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  323. {$ifdef x86_64}
  324. var
  325. tmpreg : tregister;
  326. {$endif x86_64}
  327. begin
  328. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  329. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  330. { left must be a register }
  331. case right.location.loc of
  332. LOC_REGISTER,
  333. LOC_CREGISTER :
  334. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  335. LOC_REFERENCE,
  336. LOC_CREFERENCE :
  337. begin
  338. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  339. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  340. end;
  341. LOC_CONSTANT :
  342. begin
  343. {$ifdef x86_64}
  344. { x86_64 only supports signed 32 bits constants directly }
  345. if (opsize in [OS_S64,OS_64]) and
  346. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  347. begin
  348. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  349. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  350. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  351. end
  352. else
  353. {$endif x86_64}
  354. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  355. end;
  356. else
  357. internalerror(200203232);
  358. end;
  359. end;
  360. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  361. begin
  362. case nodetype of
  363. equaln : getresflags:=F_E;
  364. unequaln : getresflags:=F_NE;
  365. else
  366. if not(unsigned) then
  367. begin
  368. if nf_swapped in flags then
  369. case nodetype of
  370. ltn : getresflags:=F_G;
  371. lten : getresflags:=F_GE;
  372. gtn : getresflags:=F_L;
  373. gten : getresflags:=F_LE;
  374. else
  375. internalerror(2013120105);
  376. end
  377. else
  378. case nodetype of
  379. ltn : getresflags:=F_L;
  380. lten : getresflags:=F_LE;
  381. gtn : getresflags:=F_G;
  382. gten : getresflags:=F_GE;
  383. else
  384. internalerror(2013120106);
  385. end;
  386. end
  387. else
  388. begin
  389. if nf_swapped in flags then
  390. case nodetype of
  391. ltn : getresflags:=F_A;
  392. lten : getresflags:=F_AE;
  393. gtn : getresflags:=F_B;
  394. gten : getresflags:=F_BE;
  395. else
  396. internalerror(2013120107);
  397. end
  398. else
  399. case nodetype of
  400. ltn : getresflags:=F_B;
  401. lten : getresflags:=F_BE;
  402. gtn : getresflags:=F_A;
  403. gten : getresflags:=F_AE;
  404. else
  405. internalerror(2013120108);
  406. end;
  407. end;
  408. end;
  409. end;
  410. function tx86addnode.getfpuresflags : tresflags;
  411. begin
  412. if (nodetype=equaln) then
  413. result:=F_FE
  414. else if (nodetype=unequaln) then
  415. result:=F_FNE
  416. else if (nf_swapped in flags) then
  417. case nodetype of
  418. ltn : result:=F_FA;
  419. lten : result:=F_FAE;
  420. gtn : result:=F_FB;
  421. gten : result:=F_FBE;
  422. else
  423. internalerror(2014031402);
  424. end
  425. else
  426. case nodetype of
  427. ltn : result:=F_FB;
  428. lten : result:=F_FBE;
  429. gtn : result:=F_FA;
  430. gten : result:=F_FAE;
  431. else
  432. internalerror(2014031403);
  433. end;
  434. end;
  435. {*****************************************************************************
  436. AddSmallSet
  437. *****************************************************************************}
  438. {$ifndef i8086}
  439. procedure tx86addnode.second_addsmallset;
  440. var
  441. setbase : aint;
  442. opdef : tdef;
  443. opsize : TCGSize;
  444. op : TAsmOp;
  445. extra_not,
  446. noswap : boolean;
  447. all_member_optimization:boolean;
  448. begin
  449. pass_left_right;
  450. noswap:=false;
  451. extra_not:=false;
  452. all_member_optimization:=false;
  453. opdef:=resultdef;
  454. opsize:=int_cgsize(opdef.size);
  455. if (left.resultdef.typ=setdef) then
  456. setbase:=tsetdef(left.resultdef).setbase
  457. else
  458. setbase:=tsetdef(right.resultdef).setbase;
  459. case nodetype of
  460. addn :
  461. begin
  462. { adding elements is not commutative }
  463. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  464. swapleftright;
  465. { are we adding set elements ? }
  466. if right.nodetype=setelementn then
  467. begin
  468. { no range support for smallsets! }
  469. if assigned(tsetelementnode(right).right) then
  470. internalerror(43244);
  471. { btsb isn't supported }
  472. if opsize=OS_8 then
  473. begin
  474. opsize:=OS_32;
  475. opdef:=u32inttype;
  476. end;
  477. { bts requires both elements to be registers }
  478. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  479. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  480. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,opdef,right.location,setbase);
  481. op:=A_BTS;
  482. noswap:=true;
  483. end
  484. else
  485. op:=A_OR;
  486. end;
  487. symdifn :
  488. op:=A_XOR;
  489. muln :
  490. op:=A_AND;
  491. subn :
  492. begin
  493. op:=A_AND;
  494. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  495. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  496. all_member_optimization:=true;
  497. if (not(nf_swapped in flags)) and
  498. (right.location.loc=LOC_CONSTANT) then
  499. right.location.value := not(right.location.value)
  500. else if (nf_swapped in flags) and
  501. (left.location.loc=LOC_CONSTANT) then
  502. left.location.value := not(left.location.value)
  503. else
  504. extra_not:=true;
  505. end;
  506. xorn :
  507. op:=A_XOR;
  508. orn :
  509. op:=A_OR;
  510. andn :
  511. op:=A_AND;
  512. else
  513. internalerror(2003042215);
  514. end;
  515. if all_member_optimization then
  516. begin
  517. {A set expression [0..31]-x can be implemented with a simple NOT.}
  518. if nf_swapped in flags then
  519. begin
  520. { newly swapped also set swapped flag }
  521. location_swap(left.location,right.location);
  522. toggleflag(nf_swapped);
  523. end;
  524. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,false);
  525. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  526. location:=right.location;
  527. end
  528. else
  529. begin
  530. { can we use the BMI1 instruction andn? }
  531. if (op=A_AND) and extra_not and (CPUX86_HAS_BMI1 in cpu_capabilities[current_settings.cputype]) and
  532. (resultdef.size in [4{$ifdef x86_64},8{$endif x86_64}]) then
  533. begin
  534. location_reset(location,LOC_REGISTER,left.location.size);
  535. location.register:=cg.getintregister(current_asmdata.currAsmList,left.location.size);
  536. if nf_swapped in flags then
  537. begin
  538. location_swap(left.location,right.location);
  539. toggleflag(nf_swapped);
  540. end;
  541. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,true);
  542. if not(left.location.loc in [LOC_CREGISTER,LOC_REGISTER,LOC_CREFERENCE,LOC_REFERENCE]) then
  543. hlcg.location_force_reg(current_asmdata.currAsmList,left.location,left.resultdef,opdef,true);
  544. case left.location.loc of
  545. LOC_CREGISTER,LOC_REGISTER:
  546. emit_reg_reg_reg(A_ANDN,TCGSize2Opsize[opsize],left.location.register,right.location.register,location.register);
  547. LOC_CREFERENCE,LOC_REFERENCE:
  548. emit_ref_reg_reg(A_ANDN,TCGSize2Opsize[opsize],left.location.reference,right.location.register,location.register);
  549. else
  550. Internalerror(2018040201);
  551. end;
  552. end
  553. else
  554. begin
  555. { left must be a register }
  556. left_must_be_reg(opdef,opsize,noswap);
  557. emit_generic_code(op,opsize,true,extra_not,false);
  558. location_freetemp(current_asmdata.CurrAsmList,right.location);
  559. { left is always a register and contains the result }
  560. location:=left.location;
  561. end;
  562. end;
  563. { fix the changed opsize we did above because of the missing btsb }
  564. if opsize<>int_cgsize(resultdef.size) then
  565. hlcg.location_force_reg(current_asmdata.CurrAsmList,location,opdef,cgsize_orddef(int_cgsize(resultdef.size)),false);
  566. end;
  567. {$endif not i8086}
  568. procedure tx86addnode.second_cmpsmallset;
  569. var
  570. opdef : tdef;
  571. opsize : TCGSize;
  572. op : TAsmOp;
  573. begin
  574. pass_left_right;
  575. opdef:=left.resultdef;
  576. opsize:=int_cgsize(opdef.size);
  577. case nodetype of
  578. equaln,
  579. unequaln :
  580. op:=A_CMP;
  581. lten,gten:
  582. begin
  583. if (not(nf_swapped in flags) and (nodetype = lten)) or
  584. ((nf_swapped in flags) and (nodetype = gten)) then
  585. swapleftright;
  586. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  587. emit_op_right_left(A_AND,opsize);
  588. op:=A_CMP;
  589. { warning: ugly hack, we need a JE so change the node to equaln }
  590. nodetype:=equaln;
  591. end;
  592. else
  593. internalerror(2003042215);
  594. end;
  595. { left must be a register }
  596. left_must_be_reg(opdef,opsize,false);
  597. emit_generic_code(op,opsize,true,false,false);
  598. location_freetemp(current_asmdata.CurrAsmList,right.location);
  599. location_freetemp(current_asmdata.CurrAsmList,left.location);
  600. location_reset(location,LOC_FLAGS,OS_NO);
  601. location.resflags:=getresflags(true);
  602. end;
  603. {*****************************************************************************
  604. AddMMX
  605. *****************************************************************************}
  606. {$ifdef SUPPORT_MMX}
  607. procedure tx86addnode.second_opmmx;
  608. var
  609. op : TAsmOp;
  610. cmpop : boolean;
  611. mmxbase : tmmxtype;
  612. hreg,
  613. hregister : tregister;
  614. begin
  615. pass_left_right;
  616. cmpop:=false;
  617. op:=A_NOP;
  618. mmxbase:=mmx_type(left.resultdef);
  619. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  620. case nodetype of
  621. addn :
  622. begin
  623. if (cs_mmx_saturation in current_settings.localswitches) then
  624. begin
  625. case mmxbase of
  626. mmxs8bit:
  627. op:=A_PADDSB;
  628. mmxu8bit:
  629. op:=A_PADDUSB;
  630. mmxs16bit,mmxfixed16:
  631. op:=A_PADDSW;
  632. mmxu16bit:
  633. op:=A_PADDUSW;
  634. else
  635. ;
  636. end;
  637. end
  638. else
  639. begin
  640. case mmxbase of
  641. mmxs8bit,mmxu8bit:
  642. op:=A_PADDB;
  643. mmxs16bit,mmxu16bit,mmxfixed16:
  644. op:=A_PADDW;
  645. mmxs32bit,mmxu32bit:
  646. op:=A_PADDD;
  647. else
  648. ;
  649. end;
  650. end;
  651. end;
  652. muln :
  653. begin
  654. case mmxbase of
  655. mmxs16bit,mmxu16bit:
  656. op:=A_PMULLW;
  657. mmxfixed16:
  658. op:=A_PMULHW;
  659. else
  660. ;
  661. end;
  662. end;
  663. subn :
  664. begin
  665. if (cs_mmx_saturation in current_settings.localswitches) then
  666. begin
  667. case mmxbase of
  668. mmxs8bit:
  669. op:=A_PSUBSB;
  670. mmxu8bit:
  671. op:=A_PSUBUSB;
  672. mmxs16bit,mmxfixed16:
  673. op:=A_PSUBSB;
  674. mmxu16bit:
  675. op:=A_PSUBUSW;
  676. else
  677. ;
  678. end;
  679. end
  680. else
  681. begin
  682. case mmxbase of
  683. mmxs8bit,mmxu8bit:
  684. op:=A_PSUBB;
  685. mmxs16bit,mmxu16bit,mmxfixed16:
  686. op:=A_PSUBW;
  687. mmxs32bit,mmxu32bit:
  688. op:=A_PSUBD;
  689. else
  690. ;
  691. end;
  692. end;
  693. end;
  694. xorn:
  695. op:=A_PXOR;
  696. orn:
  697. op:=A_POR;
  698. andn:
  699. op:=A_PAND;
  700. else
  701. internalerror(2003042214);
  702. end;
  703. if op = A_NOP then
  704. internalerror(201408201);
  705. { left and right no register? }
  706. { then one must be demanded }
  707. if (left.location.loc<>LOC_MMXREGISTER) then
  708. begin
  709. if (right.location.loc=LOC_MMXREGISTER) then
  710. begin
  711. location_swap(left.location,right.location);
  712. toggleflag(nf_swapped);
  713. end
  714. else
  715. begin
  716. { register variable ? }
  717. if (left.location.loc=LOC_CMMXREGISTER) then
  718. begin
  719. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  720. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  721. end
  722. else
  723. begin
  724. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  725. internalerror(200203245);
  726. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  727. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  728. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  729. end;
  730. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  731. left.location.register:=hregister;
  732. end;
  733. end;
  734. { at this point, left.location.loc should be LOC_MMXREGISTER }
  735. if right.location.loc<>LOC_MMXREGISTER then
  736. begin
  737. if (nodetype=subn) and (nf_swapped in flags) then
  738. begin
  739. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  740. if right.location.loc=LOC_CMMXREGISTER then
  741. begin
  742. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  743. emit_reg_reg(op,S_NO,left.location.register,hreg);
  744. end
  745. else
  746. begin
  747. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  748. internalerror(200203247);
  749. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  750. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  751. emit_reg_reg(op,S_NO,left.location.register,hreg);
  752. end;
  753. location.register:=hreg;
  754. end
  755. else
  756. begin
  757. if (right.location.loc=LOC_CMMXREGISTER) then
  758. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  759. else
  760. begin
  761. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  762. internalerror(200203246);
  763. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  764. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  765. end;
  766. location.register:=left.location.register;
  767. end;
  768. end
  769. else
  770. begin
  771. { right.location=LOC_MMXREGISTER }
  772. if (nodetype=subn) and (nf_swapped in flags) then
  773. begin
  774. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  775. location_swap(left.location,right.location);
  776. toggleflag(nf_swapped);
  777. end
  778. else
  779. begin
  780. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  781. end;
  782. location.register:=left.location.register;
  783. end;
  784. location_freetemp(current_asmdata.CurrAsmList,right.location);
  785. if cmpop then
  786. location_freetemp(current_asmdata.CurrAsmList,left.location);
  787. end;
  788. {$endif SUPPORT_MMX}
  789. {*****************************************************************************
  790. AddFloat
  791. *****************************************************************************}
  792. procedure tx86addnode.second_addfloatsse;
  793. var
  794. op : topcg;
  795. sqr_sum : boolean;
  796. tmp : tnode;
  797. begin
  798. sqr_sum:=false;
  799. if (current_settings.fputype>=fpu_sse3) and
  800. use_vectorfpu(resultdef) and
  801. (nodetype in [addn,subn]) and
  802. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  803. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  804. begin
  805. sqr_sum:=true;
  806. tmp:=tinlinenode(left).left;
  807. tinlinenode(left).left:=nil;
  808. left.free;
  809. left:=tmp;
  810. tmp:=tinlinenode(right).left;
  811. tinlinenode(right).left:=nil;
  812. right.free;
  813. right:=tmp;
  814. end;
  815. pass_left_right;
  816. { fpu operands are always in reversed order on the stack }
  817. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  818. toggleflag(nf_swapped);
  819. if (nf_swapped in flags) then
  820. { can't use swapleftright if both are on the fpu stack, since then }
  821. { both are "R_ST" -> nothing would change -> manually switch }
  822. if (left.location.loc = LOC_FPUREGISTER) and
  823. (right.location.loc = LOC_FPUREGISTER) then
  824. emit_none(A_FXCH,S_NO)
  825. else
  826. swapleftright;
  827. case nodetype of
  828. addn :
  829. op:=OP_ADD;
  830. muln :
  831. op:=OP_MUL;
  832. subn :
  833. op:=OP_SUB;
  834. slashn :
  835. op:=OP_DIV;
  836. else
  837. internalerror(200312231);
  838. end;
  839. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  840. if sqr_sum then
  841. begin
  842. if nf_swapped in flags then
  843. swapleftright;
  844. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  845. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  846. location:=left.location;
  847. if is_double(resultdef) then
  848. begin
  849. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  850. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  851. case nodetype of
  852. addn:
  853. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  854. subn:
  855. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  856. else
  857. internalerror(201108162);
  858. end;
  859. end
  860. else
  861. begin
  862. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  863. { ensure that bits 64..127 contain valid values }
  864. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  865. { the data is now in bits 0..32 and 64..95 }
  866. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  867. case nodetype of
  868. addn:
  869. begin
  870. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  871. end;
  872. subn:
  873. begin
  874. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  875. end;
  876. else
  877. internalerror(201108163);
  878. end;
  879. end
  880. end
  881. { we can use only right as left operand if the operation is commutative }
  882. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  883. begin
  884. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  885. cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.location.size,location.size,right.location.register,location.register,mms_movescalar);
  886. { force floating point reg. location to be written to memory,
  887. we don't force it to mm register because writing to memory
  888. allows probably shorter code because there is no direct fpu->mm register
  889. copy instruction
  890. }
  891. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  892. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  893. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  894. end
  895. else
  896. begin
  897. if nf_swapped in flags then
  898. swapleftright;
  899. { force floating point reg. location to be written to memory,
  900. we don't force it to mm register because writing to memory
  901. allows probably shorter code because there is no direct fpu->mm register
  902. copy instruction
  903. }
  904. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  905. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  906. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  907. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  908. { force floating point reg. location to be written to memory,
  909. we don't force it to mm register because writing to memory
  910. allows probably shorter code because there is no direct fpu->mm register
  911. copy instruction
  912. }
  913. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  914. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  915. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  916. end;
  917. end;
  918. procedure tx86addnode.second_addfloatavx;
  919. var
  920. op : topcg;
  921. sqr_sum : boolean;
  922. {$ifdef dummy}
  923. tmp : tnode;
  924. {$endif dummy}
  925. begin
  926. sqr_sum:=false;
  927. {$ifdef dummy}
  928. if (current_settings.fputype>=fpu_sse3) and
  929. use_vectorfpu(resultdef) and
  930. (nodetype in [addn,subn]) and
  931. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  932. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  933. begin
  934. sqr_sum:=true;
  935. tmp:=tinlinenode(left).left;
  936. tinlinenode(left).left:=nil;
  937. left.free;
  938. left:=tmp;
  939. tmp:=tinlinenode(right).left;
  940. tinlinenode(right).left:=nil;
  941. right.free;
  942. right:=tmp;
  943. end;
  944. {$endif dummy}
  945. pass_left_right;
  946. { fpu operands are always in reversed order on the stack }
  947. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  948. toggleflag(nf_swapped);
  949. if (nf_swapped in flags) then
  950. { can't use swapleftright if both are on the fpu stack, since then }
  951. { both are "R_ST" -> nothing would change -> manually switch }
  952. if (left.location.loc = LOC_FPUREGISTER) and
  953. (right.location.loc = LOC_FPUREGISTER) then
  954. emit_none(A_FXCH,S_NO)
  955. else
  956. swapleftright;
  957. case nodetype of
  958. addn :
  959. op:=OP_ADD;
  960. muln :
  961. op:=OP_MUL;
  962. subn :
  963. op:=OP_SUB;
  964. slashn :
  965. op:=OP_DIV;
  966. else
  967. internalerror(200312231);
  968. end;
  969. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  970. if sqr_sum then
  971. begin
  972. if nf_swapped in flags then
  973. swapleftright;
  974. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  975. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  976. location:=left.location;
  977. if is_double(resultdef) then
  978. begin
  979. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  980. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  981. case nodetype of
  982. addn:
  983. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  984. subn:
  985. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  986. else
  987. internalerror(201108162);
  988. end;
  989. end
  990. else
  991. begin
  992. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  993. { ensure that bits 64..127 contain valid values }
  994. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  995. { the data is now in bits 0..32 and 64..95 }
  996. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  997. case nodetype of
  998. addn:
  999. begin
  1000. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  1001. end;
  1002. subn:
  1003. begin
  1004. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  1005. end;
  1006. else
  1007. internalerror(201108163);
  1008. end;
  1009. end
  1010. end
  1011. { left*2 ? }
  1012. else if (nodetype=muln) and is_constrealnode(right) and is_number_float(trealconstnode(right).value_real) and (trealconstnode(right).value_real=2) then
  1013. begin
  1014. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  1015. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1016. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  1017. left.location.register,
  1018. left.location.register,
  1019. location.register,
  1020. mms_movescalar);
  1021. end
  1022. { right*2 ? }
  1023. else if (nodetype=muln) and is_constrealnode(left) and is_number_float(trealconstnode(left).value_real) and (trealconstnode(left).value_real=2) then
  1024. begin
  1025. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,right.location.size);
  1026. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  1027. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  1028. right.location.register,
  1029. right.location.register,
  1030. location.register,
  1031. mms_movescalar);
  1032. end
  1033. { we can use only right as left operand if the operation is commutative }
  1034. else if (right.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) and (op in [OP_ADD,OP_MUL]) then
  1035. begin
  1036. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  1037. { force floating point reg. location to be written to memory,
  1038. we don't force it to mm register because writing to memory
  1039. allows probably shorter code because there is no direct fpu->mm register
  1040. copy instruction
  1041. }
  1042. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1043. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  1044. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  1045. left.location,
  1046. right.location.register,
  1047. location.register,
  1048. mms_movescalar);
  1049. end
  1050. else
  1051. begin
  1052. if (nf_swapped in flags) then
  1053. swapleftright;
  1054. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1055. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  1056. { force floating point reg. location to be written to memory,
  1057. we don't force it to mm register because writing to memory
  1058. allows probably shorter code because there is no direct fpu->mm register
  1059. copy instruction
  1060. }
  1061. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1062. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1063. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  1064. right.location,
  1065. left.location.register,
  1066. location.register,
  1067. mms_movescalar);
  1068. end;
  1069. end;
  1070. function tx86addnode.pass_1: tnode;
  1071. begin
  1072. { on x86, we do not support fpu registers, so in case of operations using the x87, it
  1073. is normally useful, not to put the operands into registers which would be mm register }
  1074. if ((left.resultdef.typ=floatdef) or (right.resultdef.typ=floatdef)) and
  1075. (not(use_vectorfpu(left.resultdef)) and not(use_vectorfpu(right.resultdef)) and
  1076. not(use_vectorfpu(resultdef))) then
  1077. begin
  1078. make_not_regable(left,[ra_addr_regable]);
  1079. make_not_regable(right,[ra_addr_regable]);
  1080. end;
  1081. Result:=inherited pass_1;
  1082. end;
  1083. function tx86addnode.use_fma : boolean;
  1084. begin
  1085. {$ifndef i8086}
  1086. { test if the result stays in an xmm register, fiddeling with fpu registers and fma makes no sense }
  1087. Result:=use_vectorfpu(resultdef) and
  1088. ((cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[]);
  1089. {$else i8086}
  1090. Result:=inherited use_fma;
  1091. {$endif i8086}
  1092. end;
  1093. procedure tx86addnode.second_cmpfloatvector;
  1094. var
  1095. op : tasmop;
  1096. const
  1097. ops_single: array[boolean] of tasmop = (A_COMISS,A_VCOMISS);
  1098. ops_double: array[boolean] of tasmop = (A_COMISD,A_VCOMISD);
  1099. begin
  1100. if is_single(left.resultdef) then
  1101. op:=ops_single[UseAVX]
  1102. else if is_double(left.resultdef) then
  1103. op:=ops_double[UseAVX]
  1104. else
  1105. internalerror(200402222);
  1106. pass_left_right;
  1107. location_reset(location,LOC_FLAGS,OS_NO);
  1108. { Direct move fpu->mm register is not possible, so force any fpu operands to
  1109. memory (not to mm registers because one of the memory locations can be used
  1110. directly in compare instruction, yielding shorter code) }
  1111. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1112. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  1113. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1114. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1115. if (right.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1116. begin
  1117. case left.location.loc of
  1118. LOC_REFERENCE,LOC_CREFERENCE:
  1119. begin
  1120. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  1121. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  1122. end;
  1123. LOC_MMREGISTER,LOC_CMMREGISTER:
  1124. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  1125. else
  1126. internalerror(200402221);
  1127. end;
  1128. toggleflag(nf_swapped);
  1129. end
  1130. else
  1131. begin
  1132. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1133. case right.location.loc of
  1134. LOC_REFERENCE,LOC_CREFERENCE:
  1135. begin
  1136. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  1137. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  1138. end;
  1139. LOC_MMREGISTER,LOC_CMMREGISTER:
  1140. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  1141. else
  1142. internalerror(200402223);
  1143. end;
  1144. end;
  1145. location.resflags:=getfpuresflags;
  1146. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1147. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1148. end;
  1149. procedure tx86addnode.second_opvector;
  1150. var
  1151. op : topcg;
  1152. begin
  1153. pass_left_right;
  1154. if (nf_swapped in flags) then
  1155. swapleftright;
  1156. case nodetype of
  1157. addn :
  1158. op:=OP_ADD;
  1159. muln :
  1160. op:=OP_MUL;
  1161. subn :
  1162. op:=OP_SUB;
  1163. slashn :
  1164. op:=OP_DIV;
  1165. else
  1166. internalerror(200610071);
  1167. end;
  1168. if fits_in_mm_register(left.resultdef) then
  1169. begin
  1170. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  1171. { we can use only right as left operand if the operation is commutative }
  1172. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  1173. begin
  1174. location.register:=right.location.register;
  1175. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  1176. end
  1177. else
  1178. begin
  1179. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  1180. location.register:=left.location.register;
  1181. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  1182. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  1183. end;
  1184. end
  1185. else
  1186. begin
  1187. { not yet supported }
  1188. internalerror(200610072);
  1189. end
  1190. end;
  1191. procedure tx86addnode.second_addfloat;
  1192. const
  1193. ops_add: array[boolean] of TAsmOp = (A_FADDP,A_FADD);
  1194. ops_mul: array[boolean] of TAsmOp = (A_FMULP,A_FMUL);
  1195. ops_sub: array[boolean] of TAsmOp = (A_FSUBP,A_FSUB);
  1196. ops_rsub: array[boolean] of TAsmOp = (A_FSUBRP,A_FSUBR);
  1197. ops_div: array[boolean] of TAsmOp = (A_FDIVP,A_FDIV);
  1198. ops_rdiv: array[boolean] of TAsmOp = (A_FDIVRP,A_FDIVR);
  1199. var
  1200. op : TAsmOp;
  1201. refnode, hp: tnode;
  1202. hasref : boolean;
  1203. begin
  1204. if use_vectorfpu(resultdef) then
  1205. begin
  1206. if UseAVX then
  1207. second_addfloatavx
  1208. else
  1209. second_addfloatsse;
  1210. exit;
  1211. end;
  1212. { can the operation do the conversion? }
  1213. if (left.nodetype=typeconvn) and (is_double(ttypeconvnode(left).left.resultdef) or is_single(ttypeconvnode(left).left.resultdef)) then
  1214. begin
  1215. hp:=left;
  1216. left:=ttypeconvnode(left).left;
  1217. ttypeconvnode(hp).left:=nil;
  1218. hp.Free;
  1219. end;
  1220. if (right.nodetype=typeconvn) and (is_double(ttypeconvnode(right).left.resultdef) or is_single(ttypeconvnode(right).left.resultdef)) then
  1221. begin
  1222. hp:=right;
  1223. right:=ttypeconvnode(right).left;
  1224. ttypeconvnode(hp).left:=nil;
  1225. hp.Free;
  1226. end;
  1227. pass_left_right;
  1228. prepare_x87_locations(refnode);
  1229. hasref:=assigned(refnode);
  1230. case nodetype of
  1231. addn :
  1232. op:=ops_add[hasref];
  1233. muln :
  1234. op:=ops_mul[hasref];
  1235. subn :
  1236. if (nf_swapped in flags) then
  1237. op:=ops_rsub[hasref]
  1238. else
  1239. op:=ops_sub[hasref];
  1240. slashn :
  1241. if (nf_swapped in flags) then
  1242. op:=ops_rdiv[hasref]
  1243. else
  1244. op:=ops_div[hasref];
  1245. else
  1246. internalerror(2003042214);
  1247. end;
  1248. if hasref then
  1249. emit_ref(op,tcgsize2opsize[refnode.location.size],refnode.location.reference)
  1250. else
  1251. begin
  1252. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  1253. tcgx86(cg).dec_fpu_stack;
  1254. end;
  1255. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  1256. location.register:=NR_ST;
  1257. end;
  1258. procedure tx86addnode.second_cmpfloat;
  1259. {$ifdef i8086}
  1260. var
  1261. tmpref: treference;
  1262. {$endif i8086}
  1263. begin
  1264. if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
  1265. begin
  1266. second_cmpfloatvector;
  1267. exit;
  1268. end;
  1269. pass_left_right;
  1270. force_left_and_right_fpureg;
  1271. {$ifndef x86_64}
  1272. if current_settings.cputype<cpu_Pentium2 then
  1273. begin
  1274. emit_none(A_FCOMPP,S_NO);
  1275. tcgx86(cg).dec_fpu_stack;
  1276. tcgx86(cg).dec_fpu_stack;
  1277. { load fpu flags }
  1278. {$ifdef i8086}
  1279. if current_settings.cputype < cpu_286 then
  1280. begin
  1281. tg.gettemp(current_asmdata.CurrAsmList,2,2,tt_normal,tmpref);
  1282. emit_ref(A_FSTSW,S_NO,tmpref);
  1283. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1284. inc(tmpref.offset);
  1285. emit_ref_reg(A_MOV,S_B,tmpref,NR_AH);
  1286. dec(tmpref.offset);
  1287. emit_none(A_SAHF,S_NO);
  1288. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1289. tg.ungettemp(current_asmdata.CurrAsmList,tmpref);
  1290. end
  1291. else
  1292. {$endif i8086}
  1293. begin
  1294. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1295. emit_reg(A_FNSTSW,S_NO,NR_AX);
  1296. emit_none(A_SAHF,S_NO);
  1297. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1298. end;
  1299. end
  1300. else
  1301. {$endif x86_64}
  1302. begin
  1303. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  1304. { fcomip pops only one fpu register }
  1305. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  1306. tcgx86(cg).dec_fpu_stack;
  1307. tcgx86(cg).dec_fpu_stack;
  1308. end;
  1309. location_reset(location,LOC_FLAGS,OS_NO);
  1310. location.resflags:=getfpuresflags;
  1311. end;
  1312. {*****************************************************************************
  1313. Add64bit
  1314. *****************************************************************************}
  1315. procedure tx86addnode.second_add64bit;
  1316. begin
  1317. {$ifdef cpu64bitalu}
  1318. second_addordinal;
  1319. {$else cpu64bitalu}
  1320. { must be implemented separate }
  1321. internalerror(200402042);
  1322. {$endif cpu64bitalu}
  1323. end;
  1324. procedure tx86addnode.second_cmp64bit;
  1325. begin
  1326. {$ifdef cpu64bitalu}
  1327. second_cmpordinal;
  1328. {$else cpu64bitalu}
  1329. { must be implemented separate }
  1330. internalerror(200402043);
  1331. {$endif cpu64bitalu}
  1332. end;
  1333. {*****************************************************************************
  1334. AddOrdinal
  1335. *****************************************************************************}
  1336. procedure tx86addnode.second_addordinal;
  1337. var
  1338. opsize : tcgsize;
  1339. unsigned : boolean;
  1340. cgop : topcg;
  1341. checkoverflow : Boolean;
  1342. ovloc : tlocation;
  1343. tmpreg : TRegister;
  1344. begin
  1345. { determine if the comparison will be unsigned }
  1346. unsigned:=not(is_signed(left.resultdef)) or
  1347. not(is_signed(right.resultdef));
  1348. { assume no overflow checking is require }
  1349. checkoverflow := false;
  1350. ovloc.loc:=LOC_VOID;
  1351. case nodetype of
  1352. addn:
  1353. begin
  1354. cgop:=OP_ADD;
  1355. checkoverflow:=true;
  1356. end;
  1357. xorn :
  1358. begin
  1359. cgop:=OP_XOR;
  1360. end;
  1361. orn :
  1362. begin
  1363. cgop:=OP_OR;
  1364. end;
  1365. andn:
  1366. begin
  1367. cgop:=OP_AND;
  1368. end;
  1369. muln:
  1370. begin
  1371. checkoverflow:=true;
  1372. if unsigned then
  1373. cgop:=OP_MUL
  1374. else
  1375. cgop:=OP_IMUL;
  1376. end;
  1377. subn :
  1378. begin
  1379. checkoverflow:=true;
  1380. cgop:=OP_SUB;
  1381. end;
  1382. else
  1383. internalerror(2015022501);
  1384. end;
  1385. checkoverflow:=
  1386. checkoverflow and
  1387. needoverflowcheck;
  1388. opsize:=def_cgsize(left.resultdef);
  1389. pass_left_right;
  1390. { do have to allocate a register? If yes, then three opcode instructions are better, however for sub three op code instructions
  1391. make no sense if right is a reference }
  1392. if ((left.location.loc<>LOC_REGISTER) and (right.location.loc<>LOC_REGISTER) and ((nodetype<>subn) or not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]))) or
  1393. ((nodetype=addn) and (left.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_CONSTANT]) and (right.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_CONSTANT])) then
  1394. begin
  1395. { allocate registers }
  1396. force_reg_left_right(false,true);
  1397. set_result_location_reg;
  1398. if nodetype<>subn then
  1399. begin
  1400. if (right.location.loc<>LOC_CONSTANT) then
  1401. hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,cgop,resultdef,
  1402. left.location.register,right.location.register,
  1403. location.register,checkoverflow,ovloc)
  1404. else
  1405. hlcg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,cgop,resultdef,
  1406. right.location.value,left.location.register,
  1407. location.register,checkoverflow,ovloc);
  1408. end
  1409. else { subtract is a special case since its not commutative }
  1410. begin
  1411. if (nf_swapped in flags) then
  1412. swapleftright;
  1413. if left.location.loc<>LOC_CONSTANT then
  1414. begin
  1415. if right.location.loc<>LOC_CONSTANT then
  1416. hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
  1417. right.location.register,left.location.register,
  1418. location.register,checkoverflow,ovloc)
  1419. else
  1420. hlcg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
  1421. right.location.value,left.location.register,
  1422. location.register,checkoverflow,ovloc);
  1423. end
  1424. else
  1425. begin
  1426. tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  1427. hlcg.a_load_const_reg(current_asmdata.CurrAsmList,resultdef,
  1428. left.location.value,tmpreg);
  1429. hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
  1430. right.location.register,tmpreg,location.register,checkoverflow,ovloc);
  1431. end;
  1432. end
  1433. end
  1434. else
  1435. begin
  1436. { at least one location should be a register, if yes, try to re-use it, so we can try two operand opcodes }
  1437. if left.location.loc<>LOC_REGISTER then
  1438. begin
  1439. if right.location.loc<>LOC_REGISTER then
  1440. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false)
  1441. else
  1442. begin
  1443. location_swap(left.location,right.location);
  1444. toggleflag(nf_swapped);
  1445. end;
  1446. end;
  1447. { at this point, left.location.loc should be LOC_REGISTER }
  1448. if right.location.loc=LOC_REGISTER then
  1449. begin
  1450. { when swapped another result register }
  1451. if (nodetype=subn) and (nf_swapped in flags) then
  1452. begin
  1453. cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,
  1454. left.location.register,right.location.register);
  1455. location_swap(left.location,right.location);
  1456. toggleflag(nf_swapped);
  1457. end
  1458. else
  1459. cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,
  1460. right.location.register,left.location.register);
  1461. end
  1462. else
  1463. begin
  1464. { right.location<>LOC_REGISTER }
  1465. if right.location.loc in [LOC_CSUBSETREF,LOC_CSUBSETREG,LOC_SUBSETREF,LOC_SUBSETREG] then
  1466. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,left.resultdef,true);
  1467. if (nodetype=subn) and (nf_swapped in flags) then
  1468. begin
  1469. tmpreg:=left.location.register;
  1470. left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1471. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,left.location.register);
  1472. cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,tmpreg,left.location.register);
  1473. end
  1474. else
  1475. cg.a_op_loc_reg(current_asmdata.CurrAsmList,cgop,opsize,right.location,left.location.register);
  1476. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1477. end;
  1478. location_copy(location,left.location);
  1479. end;
  1480. { emit overflow check if required }
  1481. if checkoverflow then
  1482. cg.g_overflowcheck_loc(current_asmdata.CurrAsmList,Location,resultdef,ovloc);
  1483. end;
  1484. procedure tx86addnode.second_cmpordinal;
  1485. var
  1486. opdef : tdef;
  1487. opsize : tcgsize;
  1488. unsigned : boolean;
  1489. begin
  1490. unsigned:=not(is_signed(left.resultdef)) or
  1491. not(is_signed(right.resultdef));
  1492. opdef:=left.resultdef;
  1493. opsize:=def_cgsize(opdef);
  1494. pass_left_right;
  1495. if (right.location.loc=LOC_CONSTANT) and
  1496. (left.location.loc in [LOC_REFERENCE, LOC_CREFERENCE])
  1497. {$ifdef x86_64}
  1498. and ((not (opsize in [OS_64,OS_S64])) or (
  1499. (right.location.value>=low(longint)) and (right.location.value<=high(longint))
  1500. ))
  1501. {$endif x86_64}
  1502. then
  1503. begin
  1504. emit_const_ref(A_CMP, TCGSize2Opsize[opsize], right.location.value, left.location.reference);
  1505. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1506. end
  1507. else
  1508. begin
  1509. left_must_be_reg(opdef,opsize,false);
  1510. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  1511. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1512. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1513. end;
  1514. location_reset(location,LOC_FLAGS,OS_NO);
  1515. location.resflags:=getresflags(unsigned);
  1516. end;
  1517. begin
  1518. caddnode:=tx86addnode;
  1519. end.