nx86add.pas 63 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. symtype,
  22. cgbase,
  23. cpubase,
  24. node,nadd,ncgadd;
  25. type
  26. tx86addnode = class(tcgaddnode)
  27. protected
  28. function getresflags(unsigned : boolean) : tresflags;
  29. function getfpuresflags : tresflags;
  30. procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  31. procedure force_left_and_right_fpureg;
  32. procedure prepare_x87_locations(out refnode: tnode);
  33. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  34. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  35. procedure second_cmpfloatvector;
  36. procedure second_addfloatsse;
  37. procedure second_addfloatavx;
  38. public
  39. function use_fma : boolean;override;
  40. procedure second_addfloat;override;
  41. {$ifndef i8086}
  42. procedure second_addsmallset;override;
  43. {$endif not i8086}
  44. procedure second_add64bit;override;
  45. procedure second_cmpfloat;override;
  46. procedure second_cmpsmallset;override;
  47. procedure second_cmp64bit;override;
  48. procedure second_cmpordinal;override;
  49. procedure second_addordinal;override;
  50. {$ifdef SUPPORT_MMX}
  51. procedure second_opmmx;override;
  52. {$endif SUPPORT_MMX}
  53. procedure second_opvector;override;
  54. end;
  55. implementation
  56. uses
  57. globtype,globals,
  58. verbose,cutils,compinnr,
  59. cpuinfo,
  60. aasmbase,aasmdata,aasmcpu,
  61. symconst,symdef,
  62. cgobj,hlcgobj,cgx86,cga,cgutils,
  63. tgobj,ncgutil,
  64. ncon,nset,ninl,
  65. defutil;
  66. { Range check must be disabled explicitly as the code serves
  67. on three different architecture sizes }
  68. {$R-}
  69. {*****************************************************************************
  70. Helpers
  71. *****************************************************************************}
  72. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  73. var
  74. power : longint;
  75. hl4 : tasmlabel;
  76. r : Tregister;
  77. href : treference;
  78. begin
  79. { at this point, left.location.loc should be LOC_REGISTER }
  80. if right.location.loc=LOC_REGISTER then
  81. begin
  82. { right.location is a LOC_REGISTER }
  83. { when swapped another result register }
  84. if (nodetype=subn) and (nf_swapped in flags) then
  85. begin
  86. if extra_not then
  87. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  88. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  89. { newly swapped also set swapped flag }
  90. location_swap(left.location,right.location);
  91. toggleflag(nf_swapped);
  92. end
  93. else
  94. begin
  95. if extra_not then
  96. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  97. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  98. location_swap(left.location,right.location);
  99. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  100. end;
  101. end
  102. else
  103. begin
  104. { right.location is not a LOC_REGISTER }
  105. if (nodetype=subn) and (nf_swapped in flags) then
  106. begin
  107. if extra_not then
  108. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  109. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  110. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  111. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  112. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  113. end
  114. else
  115. begin
  116. { Optimizations when right.location is a constant value }
  117. if (op=A_CMP) and
  118. (nodetype in [equaln,unequaln]) and
  119. (right.location.loc=LOC_CONSTANT) and
  120. (right.location.value=0) then
  121. begin
  122. { 'test $-1,%reg' is transformable into 'test $-1,spilltemp' if %reg needs
  123. spilling, while 'test %reg,%reg' still requires loading into register.
  124. If spilling is not necessary, it is changed back into 'test %reg,%reg' by
  125. peephole optimizer (this optimization is currently available only for i386). }
  126. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  127. {$ifdef i386}
  128. emit_const_reg(A_TEST,TCGSize2Opsize[opsize],aint(-1),left.location.register)
  129. {$else i386}
  130. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  131. {$endif i386}
  132. end
  133. else
  134. if (op=A_ADD) and
  135. (right.location.loc=LOC_CONSTANT) and
  136. (right.location.value=1) and
  137. not(cs_check_overflow in current_settings.localswitches) and
  138. UseIncDec then
  139. begin
  140. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  141. end
  142. else
  143. if (op=A_SUB) and
  144. (right.location.loc=LOC_CONSTANT) and
  145. (right.location.value=1) and
  146. not(cs_check_overflow in current_settings.localswitches) and
  147. UseIncDec then
  148. begin
  149. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  150. end
  151. else
  152. if (op=A_IMUL) and
  153. (right.location.loc=LOC_CONSTANT) and
  154. (ispowerof2(int64(right.location.value),power)) and
  155. not(cs_check_overflow in current_settings.localswitches) then
  156. begin
  157. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  158. end
  159. else if (op=A_IMUL) and
  160. (right.location.loc=LOC_CONSTANT) and
  161. (right.location.value>1) and (ispowerof2(int64(right.location.value)-1,power)) and
  162. (power in [1..3]) and
  163. not(cs_check_overflow in current_settings.localswitches) then
  164. begin
  165. reference_reset_base(href,left.location.register,0,ctempposinvalid,0,[]);
  166. href.index:=left.location.register;
  167. href.scalefactor:=int64(right.location.value)-1;
  168. left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  169. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_LEA,TCgSize2OpSize[opsize],href,left.location.register));
  170. end
  171. else
  172. begin
  173. if extra_not then
  174. begin
  175. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  176. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  177. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  178. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  179. end
  180. else
  181. begin
  182. emit_op_right_left(op,opsize);
  183. end;
  184. end;
  185. end;
  186. end;
  187. { only in case of overflow operations }
  188. { produce overflow code }
  189. { we must put it here directly, because sign of operation }
  190. { is in unsigned VAR!! }
  191. if mboverflow then
  192. begin
  193. if cs_check_overflow in current_settings.localswitches then
  194. begin
  195. current_asmdata.getjumplabel(hl4);
  196. if unsigned then
  197. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  198. else
  199. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  200. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  201. cg.a_label(current_asmdata.CurrAsmList,hl4);
  202. end;
  203. end;
  204. end;
  205. procedure tx86addnode.left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  206. begin
  207. { left location is not a register? }
  208. if (left.location.loc<>LOC_REGISTER) then
  209. begin
  210. { if right is register then we can swap the locations }
  211. if (not noswap) and
  212. (right.location.loc=LOC_REGISTER) then
  213. begin
  214. location_swap(left.location,right.location);
  215. toggleflag(nf_swapped);
  216. end
  217. else if (not noswap) and
  218. (right.location.loc=LOC_CREGISTER) then
  219. begin
  220. location_swap(left.location,right.location);
  221. toggleflag(nf_swapped);
  222. { maybe we can reuse a constant register when the
  223. operation is a comparison that doesn't change the
  224. value of the register }
  225. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  226. location:=left.location;
  227. end
  228. else
  229. begin
  230. { maybe we can reuse a constant register when the
  231. operation is a comparison that doesn't change the
  232. value of the register }
  233. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  234. end;
  235. end;
  236. if (right.location.loc<>LOC_CONSTANT) and
  237. (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then
  238. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  239. if (left.location.loc<>LOC_CONSTANT) and
  240. (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then
  241. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  242. end;
  243. procedure tx86addnode.force_left_and_right_fpureg;
  244. begin
  245. if (right.location.loc<>LOC_FPUREGISTER) then
  246. begin
  247. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  248. if (left.location.loc<>LOC_FPUREGISTER) then
  249. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  250. else
  251. { left was on the stack => swap }
  252. toggleflag(nf_swapped);
  253. end
  254. { the nominator in st0 }
  255. else if (left.location.loc<>LOC_FPUREGISTER) then
  256. begin
  257. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  258. end
  259. else
  260. begin
  261. { fpu operands are always in the wrong order on the stack }
  262. toggleflag(nf_swapped);
  263. end;
  264. end;
  265. { Makes sides suitable for executing an x87 instruction:
  266. if either side is OS_F32/OS_F64-sized LOC_REFERENCE, it is returned in 'refnode'
  267. everything else is loaded to FPU stack. }
  268. procedure tx86addnode.prepare_x87_locations(out refnode: tnode);
  269. begin
  270. refnode:=nil;
  271. { later on, no mm registers are allowed, so transfer everything to memory here
  272. below it is loaded into an fpu register if neede }
  273. if left.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then
  274. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  275. if right.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then
  276. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  277. case ord(left.location.loc=LOC_FPUREGISTER)+ord(right.location.loc=LOC_FPUREGISTER) of
  278. 0:
  279. begin
  280. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  281. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  282. InternalError(2013090803);
  283. if (left.location.size in [OS_F32,OS_F64]) then
  284. begin
  285. refnode:=left;
  286. toggleflag(nf_swapped);
  287. end
  288. else
  289. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  290. end;
  291. 1:
  292. begin { if left is on the stack then swap. }
  293. if (left.location.loc=LOC_FPUREGISTER) then
  294. refnode:=right
  295. else
  296. refnode:=left;
  297. if not(refnode.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  298. InternalError(2013090801);
  299. if not (refnode.location.size in [OS_F32,OS_F64]) then
  300. begin
  301. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,refnode.location,refnode.resultdef,false);
  302. if (refnode=right) then
  303. toggleflag(nf_swapped);
  304. refnode:=nil;
  305. end
  306. else
  307. begin
  308. if (refnode=left) then
  309. toggleflag(nf_swapped);
  310. end;
  311. end;
  312. 2: { fpu operands are always in the wrong order on the stack }
  313. toggleflag(nf_swapped);
  314. else
  315. InternalError(2013090802);
  316. end;
  317. end;
  318. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  319. {$ifdef x86_64}
  320. var
  321. tmpreg : tregister;
  322. {$endif x86_64}
  323. begin
  324. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  325. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  326. { left must be a register }
  327. case right.location.loc of
  328. LOC_REGISTER,
  329. LOC_CREGISTER :
  330. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  331. LOC_REFERENCE,
  332. LOC_CREFERENCE :
  333. begin
  334. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  335. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  336. end;
  337. LOC_CONSTANT :
  338. begin
  339. {$ifdef x86_64}
  340. { x86_64 only supports signed 32 bits constants directly }
  341. if (opsize in [OS_S64,OS_64]) and
  342. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  343. begin
  344. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  345. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  346. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  347. end
  348. else
  349. {$endif x86_64}
  350. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  351. end;
  352. else
  353. internalerror(200203232);
  354. end;
  355. end;
  356. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  357. begin
  358. case nodetype of
  359. equaln : getresflags:=F_E;
  360. unequaln : getresflags:=F_NE;
  361. else
  362. if not(unsigned) then
  363. begin
  364. if nf_swapped in flags then
  365. case nodetype of
  366. ltn : getresflags:=F_G;
  367. lten : getresflags:=F_GE;
  368. gtn : getresflags:=F_L;
  369. gten : getresflags:=F_LE;
  370. else
  371. internalerror(2013120105);
  372. end
  373. else
  374. case nodetype of
  375. ltn : getresflags:=F_L;
  376. lten : getresflags:=F_LE;
  377. gtn : getresflags:=F_G;
  378. gten : getresflags:=F_GE;
  379. else
  380. internalerror(2013120106);
  381. end;
  382. end
  383. else
  384. begin
  385. if nf_swapped in flags then
  386. case nodetype of
  387. ltn : getresflags:=F_A;
  388. lten : getresflags:=F_AE;
  389. gtn : getresflags:=F_B;
  390. gten : getresflags:=F_BE;
  391. else
  392. internalerror(2013120107);
  393. end
  394. else
  395. case nodetype of
  396. ltn : getresflags:=F_B;
  397. lten : getresflags:=F_BE;
  398. gtn : getresflags:=F_A;
  399. gten : getresflags:=F_AE;
  400. else
  401. internalerror(2013120108);
  402. end;
  403. end;
  404. end;
  405. end;
  406. function tx86addnode.getfpuresflags : tresflags;
  407. begin
  408. if (nodetype=equaln) then
  409. result:=F_FE
  410. else if (nodetype=unequaln) then
  411. result:=F_FNE
  412. else if (nf_swapped in flags) then
  413. case nodetype of
  414. ltn : result:=F_FA;
  415. lten : result:=F_FAE;
  416. gtn : result:=F_FB;
  417. gten : result:=F_FBE;
  418. else
  419. internalerror(2014031402);
  420. end
  421. else
  422. case nodetype of
  423. ltn : result:=F_FB;
  424. lten : result:=F_FBE;
  425. gtn : result:=F_FA;
  426. gten : result:=F_FAE;
  427. else
  428. internalerror(2014031403);
  429. end;
  430. end;
  431. {*****************************************************************************
  432. AddSmallSet
  433. *****************************************************************************}
  434. {$ifndef i8086}
  435. procedure tx86addnode.second_addsmallset;
  436. var
  437. setbase : aint;
  438. opdef : tdef;
  439. opsize : TCGSize;
  440. op : TAsmOp;
  441. extra_not,
  442. noswap : boolean;
  443. all_member_optimization:boolean;
  444. begin
  445. pass_left_right;
  446. noswap:=false;
  447. extra_not:=false;
  448. all_member_optimization:=false;
  449. opdef:=resultdef;
  450. opsize:=int_cgsize(opdef.size);
  451. if (left.resultdef.typ=setdef) then
  452. setbase:=tsetdef(left.resultdef).setbase
  453. else
  454. setbase:=tsetdef(right.resultdef).setbase;
  455. case nodetype of
  456. addn :
  457. begin
  458. { adding elements is not commutative }
  459. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  460. swapleftright;
  461. { are we adding set elements ? }
  462. if right.nodetype=setelementn then
  463. begin
  464. { no range support for smallsets! }
  465. if assigned(tsetelementnode(right).right) then
  466. internalerror(43244);
  467. { btsb isn't supported }
  468. if opsize=OS_8 then
  469. begin
  470. opsize:=OS_32;
  471. opdef:=u32inttype;
  472. end;
  473. { bts requires both elements to be registers }
  474. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  475. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  476. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,opdef,right.location,setbase);
  477. op:=A_BTS;
  478. noswap:=true;
  479. end
  480. else
  481. op:=A_OR;
  482. end;
  483. symdifn :
  484. op:=A_XOR;
  485. muln :
  486. op:=A_AND;
  487. subn :
  488. begin
  489. op:=A_AND;
  490. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  491. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  492. all_member_optimization:=true;
  493. if (not(nf_swapped in flags)) and
  494. (right.location.loc=LOC_CONSTANT) then
  495. right.location.value := not(right.location.value)
  496. else if (nf_swapped in flags) and
  497. (left.location.loc=LOC_CONSTANT) then
  498. left.location.value := not(left.location.value)
  499. else
  500. extra_not:=true;
  501. end;
  502. xorn :
  503. op:=A_XOR;
  504. orn :
  505. op:=A_OR;
  506. andn :
  507. op:=A_AND;
  508. else
  509. internalerror(2003042215);
  510. end;
  511. if all_member_optimization then
  512. begin
  513. {A set expression [0..31]-x can be implemented with a simple NOT.}
  514. if nf_swapped in flags then
  515. begin
  516. { newly swapped also set swapped flag }
  517. location_swap(left.location,right.location);
  518. toggleflag(nf_swapped);
  519. end;
  520. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,false);
  521. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  522. location:=right.location;
  523. end
  524. else
  525. begin
  526. { can we use the BMI1 instruction andn? }
  527. if (op=A_AND) and extra_not and (CPUX86_HAS_BMI1 in cpu_capabilities[current_settings.cputype]) and
  528. (resultdef.size in [4{$ifdef x86_64},8{$endif x86_64}]) then
  529. begin
  530. location_reset(location,LOC_REGISTER,left.location.size);
  531. location.register:=cg.getintregister(current_asmdata.currAsmList,left.location.size);
  532. if nf_swapped in flags then
  533. begin
  534. location_swap(left.location,right.location);
  535. toggleflag(nf_swapped);
  536. end;
  537. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,true);
  538. if not(left.location.loc in [LOC_CREGISTER,LOC_REGISTER,LOC_CREFERENCE,LOC_REFERENCE]) then
  539. hlcg.location_force_reg(current_asmdata.currAsmList,left.location,left.resultdef,opdef,true);
  540. case left.location.loc of
  541. LOC_CREGISTER,LOC_REGISTER:
  542. emit_reg_reg_reg(A_ANDN,TCGSize2Opsize[opsize],left.location.register,right.location.register,location.register);
  543. LOC_CREFERENCE,LOC_REFERENCE:
  544. emit_ref_reg_reg(A_ANDN,TCGSize2Opsize[opsize],left.location.reference,right.location.register,location.register);
  545. else
  546. Internalerror(2018040201);
  547. end;
  548. end
  549. else
  550. begin
  551. { left must be a register }
  552. left_must_be_reg(opdef,opsize,noswap);
  553. emit_generic_code(op,opsize,true,extra_not,false);
  554. location_freetemp(current_asmdata.CurrAsmList,right.location);
  555. { left is always a register and contains the result }
  556. location:=left.location;
  557. end;
  558. end;
  559. { fix the changed opsize we did above because of the missing btsb }
  560. if opsize<>int_cgsize(resultdef.size) then
  561. hlcg.location_force_reg(current_asmdata.CurrAsmList,location,opdef,cgsize_orddef(int_cgsize(resultdef.size)),false);
  562. end;
  563. {$endif not i8086}
  564. procedure tx86addnode.second_cmpsmallset;
  565. var
  566. opdef : tdef;
  567. opsize : TCGSize;
  568. op : TAsmOp;
  569. begin
  570. pass_left_right;
  571. opdef:=left.resultdef;
  572. opsize:=int_cgsize(opdef.size);
  573. case nodetype of
  574. equaln,
  575. unequaln :
  576. op:=A_CMP;
  577. lten,gten:
  578. begin
  579. if (not(nf_swapped in flags) and (nodetype = lten)) or
  580. ((nf_swapped in flags) and (nodetype = gten)) then
  581. swapleftright;
  582. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  583. emit_op_right_left(A_AND,opsize);
  584. op:=A_CMP;
  585. { warning: ugly hack, we need a JE so change the node to equaln }
  586. nodetype:=equaln;
  587. end;
  588. else
  589. internalerror(2003042215);
  590. end;
  591. { left must be a register }
  592. left_must_be_reg(opdef,opsize,false);
  593. emit_generic_code(op,opsize,true,false,false);
  594. location_freetemp(current_asmdata.CurrAsmList,right.location);
  595. location_freetemp(current_asmdata.CurrAsmList,left.location);
  596. location_reset(location,LOC_FLAGS,OS_NO);
  597. location.resflags:=getresflags(true);
  598. end;
  599. {*****************************************************************************
  600. AddMMX
  601. *****************************************************************************}
  602. {$ifdef SUPPORT_MMX}
  603. procedure tx86addnode.second_opmmx;
  604. var
  605. op : TAsmOp;
  606. cmpop : boolean;
  607. mmxbase : tmmxtype;
  608. hreg,
  609. hregister : tregister;
  610. begin
  611. pass_left_right;
  612. cmpop:=false;
  613. op:=A_NOP;
  614. mmxbase:=mmx_type(left.resultdef);
  615. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  616. case nodetype of
  617. addn :
  618. begin
  619. if (cs_mmx_saturation in current_settings.localswitches) then
  620. begin
  621. case mmxbase of
  622. mmxs8bit:
  623. op:=A_PADDSB;
  624. mmxu8bit:
  625. op:=A_PADDUSB;
  626. mmxs16bit,mmxfixed16:
  627. op:=A_PADDSW;
  628. mmxu16bit:
  629. op:=A_PADDUSW;
  630. else
  631. ;
  632. end;
  633. end
  634. else
  635. begin
  636. case mmxbase of
  637. mmxs8bit,mmxu8bit:
  638. op:=A_PADDB;
  639. mmxs16bit,mmxu16bit,mmxfixed16:
  640. op:=A_PADDW;
  641. mmxs32bit,mmxu32bit:
  642. op:=A_PADDD;
  643. else
  644. ;
  645. end;
  646. end;
  647. end;
  648. muln :
  649. begin
  650. case mmxbase of
  651. mmxs16bit,mmxu16bit:
  652. op:=A_PMULLW;
  653. mmxfixed16:
  654. op:=A_PMULHW;
  655. else
  656. ;
  657. end;
  658. end;
  659. subn :
  660. begin
  661. if (cs_mmx_saturation in current_settings.localswitches) then
  662. begin
  663. case mmxbase of
  664. mmxs8bit:
  665. op:=A_PSUBSB;
  666. mmxu8bit:
  667. op:=A_PSUBUSB;
  668. mmxs16bit,mmxfixed16:
  669. op:=A_PSUBSB;
  670. mmxu16bit:
  671. op:=A_PSUBUSW;
  672. else
  673. ;
  674. end;
  675. end
  676. else
  677. begin
  678. case mmxbase of
  679. mmxs8bit,mmxu8bit:
  680. op:=A_PSUBB;
  681. mmxs16bit,mmxu16bit,mmxfixed16:
  682. op:=A_PSUBW;
  683. mmxs32bit,mmxu32bit:
  684. op:=A_PSUBD;
  685. else
  686. ;
  687. end;
  688. end;
  689. end;
  690. xorn:
  691. op:=A_PXOR;
  692. orn:
  693. op:=A_POR;
  694. andn:
  695. op:=A_PAND;
  696. else
  697. internalerror(2003042214);
  698. end;
  699. if op = A_NOP then
  700. internalerror(201408201);
  701. { left and right no register? }
  702. { then one must be demanded }
  703. if (left.location.loc<>LOC_MMXREGISTER) then
  704. begin
  705. if (right.location.loc=LOC_MMXREGISTER) then
  706. begin
  707. location_swap(left.location,right.location);
  708. toggleflag(nf_swapped);
  709. end
  710. else
  711. begin
  712. { register variable ? }
  713. if (left.location.loc=LOC_CMMXREGISTER) then
  714. begin
  715. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  716. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  717. end
  718. else
  719. begin
  720. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  721. internalerror(200203245);
  722. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  723. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  724. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  725. end;
  726. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  727. left.location.register:=hregister;
  728. end;
  729. end;
  730. { at this point, left.location.loc should be LOC_MMXREGISTER }
  731. if right.location.loc<>LOC_MMXREGISTER then
  732. begin
  733. if (nodetype=subn) and (nf_swapped in flags) then
  734. begin
  735. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  736. if right.location.loc=LOC_CMMXREGISTER then
  737. begin
  738. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  739. emit_reg_reg(op,S_NO,left.location.register,hreg);
  740. end
  741. else
  742. begin
  743. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  744. internalerror(200203247);
  745. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  746. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  747. emit_reg_reg(op,S_NO,left.location.register,hreg);
  748. end;
  749. location.register:=hreg;
  750. end
  751. else
  752. begin
  753. if (right.location.loc=LOC_CMMXREGISTER) then
  754. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  755. else
  756. begin
  757. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  758. internalerror(200203246);
  759. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  760. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  761. end;
  762. location.register:=left.location.register;
  763. end;
  764. end
  765. else
  766. begin
  767. { right.location=LOC_MMXREGISTER }
  768. if (nodetype=subn) and (nf_swapped in flags) then
  769. begin
  770. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  771. location_swap(left.location,right.location);
  772. toggleflag(nf_swapped);
  773. end
  774. else
  775. begin
  776. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  777. end;
  778. location.register:=left.location.register;
  779. end;
  780. location_freetemp(current_asmdata.CurrAsmList,right.location);
  781. if cmpop then
  782. location_freetemp(current_asmdata.CurrAsmList,left.location);
  783. end;
  784. {$endif SUPPORT_MMX}
  785. {*****************************************************************************
  786. AddFloat
  787. *****************************************************************************}
  788. procedure tx86addnode.second_addfloatsse;
  789. var
  790. op : topcg;
  791. sqr_sum : boolean;
  792. tmp : tnode;
  793. begin
  794. sqr_sum:=false;
  795. if (current_settings.fputype>=fpu_sse3) and
  796. use_vectorfpu(resultdef) and
  797. (nodetype in [addn,subn]) and
  798. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  799. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  800. begin
  801. sqr_sum:=true;
  802. tmp:=tinlinenode(left).left;
  803. tinlinenode(left).left:=nil;
  804. left.free;
  805. left:=tmp;
  806. tmp:=tinlinenode(right).left;
  807. tinlinenode(right).left:=nil;
  808. right.free;
  809. right:=tmp;
  810. end;
  811. pass_left_right;
  812. { fpu operands are always in reversed order on the stack }
  813. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  814. toggleflag(nf_swapped);
  815. if (nf_swapped in flags) then
  816. { can't use swapleftright if both are on the fpu stack, since then }
  817. { both are "R_ST" -> nothing would change -> manually switch }
  818. if (left.location.loc = LOC_FPUREGISTER) and
  819. (right.location.loc = LOC_FPUREGISTER) then
  820. emit_none(A_FXCH,S_NO)
  821. else
  822. swapleftright;
  823. case nodetype of
  824. addn :
  825. op:=OP_ADD;
  826. muln :
  827. op:=OP_MUL;
  828. subn :
  829. op:=OP_SUB;
  830. slashn :
  831. op:=OP_DIV;
  832. else
  833. internalerror(200312231);
  834. end;
  835. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  836. if sqr_sum then
  837. begin
  838. if nf_swapped in flags then
  839. swapleftright;
  840. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  841. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  842. location:=left.location;
  843. if is_double(resultdef) then
  844. begin
  845. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  846. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  847. case nodetype of
  848. addn:
  849. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  850. subn:
  851. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  852. else
  853. internalerror(201108162);
  854. end;
  855. end
  856. else
  857. begin
  858. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  859. { ensure that bits 64..127 contain valid values }
  860. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  861. { the data is now in bits 0..32 and 64..95 }
  862. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  863. case nodetype of
  864. addn:
  865. begin
  866. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  867. end;
  868. subn:
  869. begin
  870. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  871. end;
  872. else
  873. internalerror(201108163);
  874. end;
  875. end
  876. end
  877. { we can use only right as left operand if the operation is commutative }
  878. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  879. begin
  880. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  881. cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.location.size,location.size,right.location.register,location.register,mms_movescalar);
  882. { force floating point reg. location to be written to memory,
  883. we don't force it to mm register because writing to memory
  884. allows probably shorter code because there is no direct fpu->mm register
  885. copy instruction
  886. }
  887. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  888. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  889. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  890. end
  891. else
  892. begin
  893. if nf_swapped in flags then
  894. swapleftright;
  895. { force floating point reg. location to be written to memory,
  896. we don't force it to mm register because writing to memory
  897. allows probably shorter code because there is no direct fpu->mm register
  898. copy instruction
  899. }
  900. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  901. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  902. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  903. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  904. { force floating point reg. location to be written to memory,
  905. we don't force it to mm register because writing to memory
  906. allows probably shorter code because there is no direct fpu->mm register
  907. copy instruction
  908. }
  909. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  910. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  911. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  912. end;
  913. end;
  914. procedure tx86addnode.second_addfloatavx;
  915. var
  916. op : topcg;
  917. sqr_sum : boolean;
  918. {$ifdef dummy}
  919. tmp : tnode;
  920. {$endif dummy}
  921. begin
  922. sqr_sum:=false;
  923. {$ifdef dummy}
  924. if (current_settings.fputype>=fpu_sse3) and
  925. use_vectorfpu(resultdef) and
  926. (nodetype in [addn,subn]) and
  927. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  928. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  929. begin
  930. sqr_sum:=true;
  931. tmp:=tinlinenode(left).left;
  932. tinlinenode(left).left:=nil;
  933. left.free;
  934. left:=tmp;
  935. tmp:=tinlinenode(right).left;
  936. tinlinenode(right).left:=nil;
  937. right.free;
  938. right:=tmp;
  939. end;
  940. {$endif dummy}
  941. pass_left_right;
  942. { fpu operands are always in reversed order on the stack }
  943. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  944. toggleflag(nf_swapped);
  945. if (nf_swapped in flags) then
  946. { can't use swapleftright if both are on the fpu stack, since then }
  947. { both are "R_ST" -> nothing would change -> manually switch }
  948. if (left.location.loc = LOC_FPUREGISTER) and
  949. (right.location.loc = LOC_FPUREGISTER) then
  950. emit_none(A_FXCH,S_NO)
  951. else
  952. swapleftright;
  953. case nodetype of
  954. addn :
  955. op:=OP_ADD;
  956. muln :
  957. op:=OP_MUL;
  958. subn :
  959. op:=OP_SUB;
  960. slashn :
  961. op:=OP_DIV;
  962. else
  963. internalerror(200312231);
  964. end;
  965. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  966. if sqr_sum then
  967. begin
  968. if nf_swapped in flags then
  969. swapleftright;
  970. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  971. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  972. location:=left.location;
  973. if is_double(resultdef) then
  974. begin
  975. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  976. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  977. case nodetype of
  978. addn:
  979. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  980. subn:
  981. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  982. else
  983. internalerror(201108162);
  984. end;
  985. end
  986. else
  987. begin
  988. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  989. { ensure that bits 64..127 contain valid values }
  990. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  991. { the data is now in bits 0..32 and 64..95 }
  992. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  993. case nodetype of
  994. addn:
  995. begin
  996. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  997. end;
  998. subn:
  999. begin
  1000. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  1001. end;
  1002. else
  1003. internalerror(201108163);
  1004. end;
  1005. end
  1006. end
  1007. { left*2 ? }
  1008. else if (nodetype=muln) and is_constrealnode(right) and is_number_float(trealconstnode(right).value_real) and (trealconstnode(right).value_real=2) then
  1009. begin
  1010. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  1011. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1012. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  1013. left.location.register,
  1014. left.location.register,
  1015. location.register,
  1016. mms_movescalar);
  1017. end
  1018. { right*2 ? }
  1019. else if (nodetype=muln) and is_constrealnode(left) and is_number_float(trealconstnode(left).value_real) and (trealconstnode(left).value_real=2) then
  1020. begin
  1021. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,right.location.size);
  1022. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  1023. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  1024. right.location.register,
  1025. right.location.register,
  1026. location.register,
  1027. mms_movescalar);
  1028. end
  1029. { we can use only right as left operand if the operation is commutative }
  1030. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  1031. begin
  1032. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  1033. { force floating point reg. location to be written to memory,
  1034. we don't force it to mm register because writing to memory
  1035. allows probably shorter code because there is no direct fpu->mm register
  1036. copy instruction
  1037. }
  1038. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1039. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  1040. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  1041. left.location,
  1042. right.location.register,
  1043. location.register,
  1044. mms_movescalar);
  1045. end
  1046. else
  1047. begin
  1048. if (nf_swapped in flags) then
  1049. swapleftright;
  1050. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1051. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  1052. { force floating point reg. location to be written to memory,
  1053. we don't force it to mm register because writing to memory
  1054. allows probably shorter code because there is no direct fpu->mm register
  1055. copy instruction
  1056. }
  1057. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1058. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1059. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  1060. right.location,
  1061. left.location.register,
  1062. location.register,
  1063. mms_movescalar);
  1064. end;
  1065. end;
  1066. function tx86addnode.use_fma : boolean;
  1067. begin
  1068. {$ifndef i8086}
  1069. { test if the result stays in an xmm register, fiddeling with fpu registers and fma makes no sense }
  1070. Result:=use_vectorfpu(resultdef) and
  1071. ((cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[]);
  1072. {$else i8086}
  1073. Result:=inherited use_fma;
  1074. {$endif i8086}
  1075. end;
  1076. procedure tx86addnode.second_cmpfloatvector;
  1077. var
  1078. op : tasmop;
  1079. const
  1080. ops_single: array[boolean] of tasmop = (A_COMISS,A_VCOMISS);
  1081. ops_double: array[boolean] of tasmop = (A_COMISD,A_VCOMISD);
  1082. begin
  1083. if is_single(left.resultdef) then
  1084. op:=ops_single[UseAVX]
  1085. else if is_double(left.resultdef) then
  1086. op:=ops_double[UseAVX]
  1087. else
  1088. internalerror(200402222);
  1089. pass_left_right;
  1090. location_reset(location,LOC_FLAGS,OS_NO);
  1091. { Direct move fpu->mm register is not possible, so force any fpu operands to
  1092. memory (not to mm registers because one of the memory locations can be used
  1093. directly in compare instruction, yielding shorter code) }
  1094. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1095. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  1096. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1097. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1098. if (right.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1099. begin
  1100. case left.location.loc of
  1101. LOC_REFERENCE,LOC_CREFERENCE:
  1102. begin
  1103. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  1104. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  1105. end;
  1106. LOC_MMREGISTER,LOC_CMMREGISTER:
  1107. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  1108. else
  1109. internalerror(200402221);
  1110. end;
  1111. toggleflag(nf_swapped);
  1112. end
  1113. else
  1114. begin
  1115. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1116. case right.location.loc of
  1117. LOC_REFERENCE,LOC_CREFERENCE:
  1118. begin
  1119. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  1120. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  1121. end;
  1122. LOC_MMREGISTER,LOC_CMMREGISTER:
  1123. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  1124. else
  1125. internalerror(200402223);
  1126. end;
  1127. end;
  1128. location.resflags:=getfpuresflags;
  1129. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1130. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1131. end;
  1132. procedure tx86addnode.second_opvector;
  1133. var
  1134. op : topcg;
  1135. begin
  1136. pass_left_right;
  1137. if (nf_swapped in flags) then
  1138. swapleftright;
  1139. case nodetype of
  1140. addn :
  1141. op:=OP_ADD;
  1142. muln :
  1143. op:=OP_MUL;
  1144. subn :
  1145. op:=OP_SUB;
  1146. slashn :
  1147. op:=OP_DIV;
  1148. else
  1149. internalerror(200610071);
  1150. end;
  1151. if fits_in_mm_register(left.resultdef) then
  1152. begin
  1153. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  1154. { we can use only right as left operand if the operation is commutative }
  1155. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  1156. begin
  1157. location.register:=right.location.register;
  1158. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  1159. end
  1160. else
  1161. begin
  1162. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  1163. location.register:=left.location.register;
  1164. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  1165. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  1166. end;
  1167. end
  1168. else
  1169. begin
  1170. { not yet supported }
  1171. internalerror(200610072);
  1172. end
  1173. end;
  1174. procedure tx86addnode.second_addfloat;
  1175. const
  1176. ops_add: array[boolean] of TAsmOp = (A_FADDP,A_FADD);
  1177. ops_mul: array[boolean] of TAsmOp = (A_FMULP,A_FMUL);
  1178. ops_sub: array[boolean] of TAsmOp = (A_FSUBP,A_FSUB);
  1179. ops_rsub: array[boolean] of TAsmOp = (A_FSUBRP,A_FSUBR);
  1180. ops_div: array[boolean] of TAsmOp = (A_FDIVP,A_FDIV);
  1181. ops_rdiv: array[boolean] of TAsmOp = (A_FDIVRP,A_FDIVR);
  1182. var
  1183. op : TAsmOp;
  1184. refnode : tnode;
  1185. hasref : boolean;
  1186. begin
  1187. if use_vectorfpu(resultdef) then
  1188. begin
  1189. if UseAVX then
  1190. second_addfloatavx
  1191. else
  1192. second_addfloatsse;
  1193. exit;
  1194. end;
  1195. pass_left_right;
  1196. prepare_x87_locations(refnode);
  1197. hasref:=assigned(refnode);
  1198. case nodetype of
  1199. addn :
  1200. op:=ops_add[hasref];
  1201. muln :
  1202. op:=ops_mul[hasref];
  1203. subn :
  1204. if (nf_swapped in flags) then
  1205. op:=ops_rsub[hasref]
  1206. else
  1207. op:=ops_sub[hasref];
  1208. slashn :
  1209. if (nf_swapped in flags) then
  1210. op:=ops_rdiv[hasref]
  1211. else
  1212. op:=ops_div[hasref];
  1213. else
  1214. internalerror(2003042214);
  1215. end;
  1216. if hasref then
  1217. emit_ref(op,tcgsize2opsize[refnode.location.size],refnode.location.reference)
  1218. else
  1219. begin
  1220. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  1221. tcgx86(cg).dec_fpu_stack;
  1222. end;
  1223. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  1224. location.register:=NR_ST;
  1225. end;
  1226. procedure tx86addnode.second_cmpfloat;
  1227. {$ifdef i8086}
  1228. var
  1229. tmpref: treference;
  1230. {$endif i8086}
  1231. begin
  1232. if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
  1233. begin
  1234. second_cmpfloatvector;
  1235. exit;
  1236. end;
  1237. pass_left_right;
  1238. force_left_and_right_fpureg;
  1239. {$ifndef x86_64}
  1240. if current_settings.cputype<cpu_Pentium2 then
  1241. begin
  1242. emit_none(A_FCOMPP,S_NO);
  1243. tcgx86(cg).dec_fpu_stack;
  1244. tcgx86(cg).dec_fpu_stack;
  1245. { load fpu flags }
  1246. {$ifdef i8086}
  1247. if current_settings.cputype < cpu_286 then
  1248. begin
  1249. tg.gettemp(current_asmdata.CurrAsmList,2,2,tt_normal,tmpref);
  1250. emit_ref(A_FSTSW,S_NO,tmpref);
  1251. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1252. inc(tmpref.offset);
  1253. emit_ref_reg(A_MOV,S_B,tmpref,NR_AH);
  1254. dec(tmpref.offset);
  1255. emit_none(A_SAHF,S_NO);
  1256. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1257. tg.ungettemp(current_asmdata.CurrAsmList,tmpref);
  1258. end
  1259. else
  1260. {$endif i8086}
  1261. begin
  1262. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1263. emit_reg(A_FNSTSW,S_NO,NR_AX);
  1264. emit_none(A_SAHF,S_NO);
  1265. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1266. end;
  1267. end
  1268. else
  1269. {$endif x86_64}
  1270. begin
  1271. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  1272. { fcomip pops only one fpu register }
  1273. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  1274. tcgx86(cg).dec_fpu_stack;
  1275. tcgx86(cg).dec_fpu_stack;
  1276. end;
  1277. location_reset(location,LOC_FLAGS,OS_NO);
  1278. location.resflags:=getfpuresflags;
  1279. end;
  1280. {*****************************************************************************
  1281. Add64bit
  1282. *****************************************************************************}
  1283. procedure tx86addnode.second_add64bit;
  1284. begin
  1285. {$ifdef cpu64bitalu}
  1286. second_addordinal;
  1287. {$else cpu64bitalu}
  1288. { must be implemented separate }
  1289. internalerror(200402042);
  1290. {$endif cpu64bitalu}
  1291. end;
  1292. procedure tx86addnode.second_cmp64bit;
  1293. begin
  1294. {$ifdef cpu64bitalu}
  1295. second_cmpordinal;
  1296. {$else cpu64bitalu}
  1297. { must be implemented separate }
  1298. internalerror(200402043);
  1299. {$endif cpu64bitalu}
  1300. end;
  1301. {*****************************************************************************
  1302. AddOrdinal
  1303. *****************************************************************************}
  1304. procedure tx86addnode.second_addordinal;
  1305. var
  1306. opsize : tcgsize;
  1307. unsigned : boolean;
  1308. cgop : topcg;
  1309. checkoverflow : Boolean;
  1310. ovloc : tlocation;
  1311. tmpreg : TRegister;
  1312. begin
  1313. { determine if the comparison will be unsigned }
  1314. unsigned:=not(is_signed(left.resultdef)) or
  1315. not(is_signed(right.resultdef));
  1316. { assume no overflow checking is require }
  1317. checkoverflow := false;
  1318. ovloc.loc:=LOC_VOID;
  1319. case nodetype of
  1320. addn:
  1321. begin
  1322. cgop:=OP_ADD;
  1323. checkoverflow:=true;
  1324. end;
  1325. xorn :
  1326. begin
  1327. cgop:=OP_XOR;
  1328. end;
  1329. orn :
  1330. begin
  1331. cgop:=OP_OR;
  1332. end;
  1333. andn:
  1334. begin
  1335. cgop:=OP_AND;
  1336. end;
  1337. muln:
  1338. begin
  1339. checkoverflow:=true;
  1340. if unsigned then
  1341. cgop:=OP_MUL
  1342. else
  1343. cgop:=OP_IMUL;
  1344. end;
  1345. subn :
  1346. begin
  1347. checkoverflow:=true;
  1348. cgop:=OP_SUB;
  1349. end;
  1350. else
  1351. internalerror(2015022501);
  1352. end;
  1353. checkoverflow:=
  1354. checkoverflow and
  1355. (left.resultdef.typ<>pointerdef) and
  1356. (right.resultdef.typ<>pointerdef) and
  1357. (cs_check_overflow in current_settings.localswitches);
  1358. opsize:=def_cgsize(left.resultdef);
  1359. pass_left_right;
  1360. { do have to allocate a register? If yes, then three opcode instructions are better }
  1361. if ((left.location.loc<>LOC_REGISTER) and (right.location.loc<>LOC_REGISTER)) or
  1362. ((nodetype=addn) and (left.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_CONSTANT]) and (right.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_CONSTANT])) then
  1363. begin
  1364. { allocate registers }
  1365. force_reg_left_right(false,true);
  1366. set_result_location_reg;
  1367. if nodetype<>subn then
  1368. begin
  1369. if (right.location.loc<>LOC_CONSTANT) then
  1370. hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,cgop,resultdef,
  1371. left.location.register,right.location.register,
  1372. location.register,checkoverflow,ovloc)
  1373. else
  1374. hlcg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,cgop,resultdef,
  1375. right.location.value,left.location.register,
  1376. location.register,checkoverflow,ovloc);
  1377. end
  1378. else { subtract is a special case since its not commutative }
  1379. begin
  1380. if (nf_swapped in flags) then
  1381. swapleftright;
  1382. if left.location.loc<>LOC_CONSTANT then
  1383. begin
  1384. if right.location.loc<>LOC_CONSTANT then
  1385. hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
  1386. right.location.register,left.location.register,
  1387. location.register,checkoverflow,ovloc)
  1388. else
  1389. hlcg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
  1390. right.location.value,left.location.register,
  1391. location.register,checkoverflow,ovloc);
  1392. end
  1393. else
  1394. begin
  1395. tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  1396. hlcg.a_load_const_reg(current_asmdata.CurrAsmList,resultdef,
  1397. left.location.value,tmpreg);
  1398. hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
  1399. right.location.register,tmpreg,location.register,checkoverflow,ovloc);
  1400. end;
  1401. end
  1402. end
  1403. else
  1404. begin
  1405. { at least one location is a register, re-use it, so we can try two operand opcodes }
  1406. if left.location.loc<>LOC_REGISTER then
  1407. begin
  1408. if right.location.loc<>LOC_REGISTER then
  1409. begin
  1410. { tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1411. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,left.location,tmpreg);
  1412. location_reset(left.location,LOC_REGISTER,opsize);
  1413. left.location.register:=tmpreg;
  1414. }
  1415. Internalerror(2018031102);
  1416. end
  1417. else
  1418. begin
  1419. location_swap(left.location,right.location);
  1420. toggleflag(nf_swapped);
  1421. end;
  1422. end;
  1423. { at this point, left.location.loc should be LOC_REGISTER }
  1424. if right.location.loc=LOC_REGISTER then
  1425. begin
  1426. { when swapped another result register }
  1427. if (nodetype=subn) and (nf_swapped in flags) then
  1428. begin
  1429. cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,
  1430. left.location.register,right.location.register);
  1431. location_swap(left.location,right.location);
  1432. toggleflag(nf_swapped);
  1433. end
  1434. else
  1435. cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,
  1436. right.location.register,left.location.register);
  1437. end
  1438. else
  1439. begin
  1440. { right.location<>LOC_REGISTER }
  1441. if right.location.loc in [LOC_CSUBSETREF,LOC_CSUBSETREG,LOC_SUBSETREF,LOC_SUBSETREG] then
  1442. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,left.resultdef,true);
  1443. if (nodetype=subn) and (nf_swapped in flags) then
  1444. begin
  1445. tmpreg:=left.location.register;
  1446. left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1447. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,left.location.register);
  1448. cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,tmpreg,left.location.register);
  1449. end
  1450. else
  1451. cg.a_op_loc_reg(current_asmdata.CurrAsmList,cgop,opsize,right.location,left.location.register);
  1452. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1453. end;
  1454. location_copy(location,left.location);
  1455. end;
  1456. { emit overflow check if required }
  1457. if checkoverflow then
  1458. cg.g_overflowcheck_loc(current_asmdata.CurrAsmList,Location,resultdef,ovloc);
  1459. end;
  1460. procedure tx86addnode.second_cmpordinal;
  1461. var
  1462. opdef : tdef;
  1463. opsize : tcgsize;
  1464. unsigned : boolean;
  1465. begin
  1466. unsigned:=not(is_signed(left.resultdef)) or
  1467. not(is_signed(right.resultdef));
  1468. opdef:=left.resultdef;
  1469. opsize:=def_cgsize(opdef);
  1470. pass_left_right;
  1471. if (right.location.loc=LOC_CONSTANT) and
  1472. (left.location.loc in [LOC_REFERENCE, LOC_CREFERENCE])
  1473. {$ifdef x86_64}
  1474. and ((not (opsize in [OS_64,OS_S64])) or (
  1475. (right.location.value>=low(longint)) and (right.location.value<=high(longint))
  1476. ))
  1477. {$endif x86_64}
  1478. then
  1479. begin
  1480. emit_const_ref(A_CMP, TCGSize2Opsize[opsize], right.location.value, left.location.reference);
  1481. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1482. end
  1483. else
  1484. begin
  1485. left_must_be_reg(opdef,opsize,false);
  1486. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  1487. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1488. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1489. end;
  1490. location_reset(location,LOC_FLAGS,OS_NO);
  1491. location.resflags:=getresflags(unsigned);
  1492. end;
  1493. begin
  1494. caddnode:=tx86addnode;
  1495. end.