nx86add.pas 64 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. symtype,
  22. cgbase,
  23. cpubase,
  24. node,nadd,ncgadd;
  25. type
  26. tx86addnode = class(tcgaddnode)
  27. protected
  28. function getresflags(unsigned : boolean) : tresflags;
  29. function getfpuresflags : tresflags;
  30. procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  31. procedure force_left_and_right_fpureg;
  32. procedure prepare_x87_locations(out refnode: tnode);
  33. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize;AllocFlags:boolean);
  34. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  35. procedure second_cmpfloatvector;
  36. procedure second_addfloatsse;
  37. procedure second_addfloatavx;
  38. public
  39. function use_fma : boolean;override;
  40. procedure second_addfloat;override;
  41. {$ifndef i8086}
  42. procedure second_addsmallset;override;
  43. {$endif not i8086}
  44. procedure second_add64bit;override;
  45. procedure second_cmpfloat;override;
  46. procedure second_cmpsmallset;override;
  47. procedure second_cmp64bit;override;
  48. procedure second_cmpordinal;override;
  49. procedure second_addordinal;override;
  50. {$ifdef SUPPORT_MMX}
  51. procedure second_opmmx;override;
  52. {$endif SUPPORT_MMX}
  53. procedure second_opvector;override;
  54. end;
  55. implementation
  56. uses
  57. globtype,globals,
  58. verbose,cutils,compinnr,
  59. cpuinfo,
  60. aasmbase,aasmdata,aasmcpu,
  61. symconst,symdef,
  62. cgobj,hlcgobj,cgx86,cga,cgutils,
  63. tgobj,ncgutil,
  64. ncon,nset,ninl,
  65. defutil;
  66. { Range check must be disabled explicitly as the code serves
  67. on three different architecture sizes }
  68. {$R-}
  69. {*****************************************************************************
  70. Helpers
  71. *****************************************************************************}
  72. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  73. var
  74. power : longint;
  75. hl4 : tasmlabel;
  76. r : Tregister;
  77. href : treference;
  78. overflowcheck: boolean;
  79. comparison: boolean;
  80. begin
  81. overflowcheck:=needoverflowcheck;
  82. comparison:=
  83. (op=A_CMP) or (op=A_TEST) or (op=A_BT) or is_boolean(resultdef);
  84. { at this point, left.location.loc should be LOC_REGISTER }
  85. if right.location.loc=LOC_REGISTER then
  86. begin
  87. { right.location is a LOC_REGISTER }
  88. { when swapped another result register }
  89. if (nodetype=subn) and (nf_swapped in flags) then
  90. begin
  91. if extra_not then
  92. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  93. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  94. { newly swapped also set swapped flag }
  95. location_swap(left.location,right.location);
  96. toggleflag(nf_swapped);
  97. end
  98. else
  99. begin
  100. if extra_not then
  101. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  102. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  103. location_swap(left.location,right.location);
  104. if comparison then
  105. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  106. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  107. end;
  108. end
  109. else
  110. begin
  111. { right.location is not a LOC_REGISTER }
  112. if (nodetype=subn) and (nf_swapped in flags) then
  113. begin
  114. if extra_not then
  115. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  116. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  117. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  118. if comparison then
  119. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  120. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  121. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  122. end
  123. else
  124. begin
  125. { Optimizations when right.location is a constant value }
  126. if (op=A_CMP) and
  127. (nodetype in [equaln,unequaln]) and
  128. (right.location.loc=LOC_CONSTANT) and
  129. (right.location.value=0) then
  130. begin
  131. { 'test $-1,%reg' is transformable into 'test $-1,spilltemp' if %reg needs
  132. spilling, while 'test %reg,%reg' still requires loading into register.
  133. If spilling is not necessary, it is changed back into 'test %reg,%reg' by
  134. peephole optimizer (this optimization is currently available only for i386). }
  135. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  136. {$ifdef i386}
  137. emit_const_reg(A_TEST,TCGSize2Opsize[opsize],aint(-1),left.location.register)
  138. {$else i386}
  139. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  140. {$endif i386}
  141. end
  142. else
  143. if (op=A_ADD) and
  144. (right.location.loc=LOC_CONSTANT) and
  145. (right.location.value=1) and
  146. not(cs_check_overflow in current_settings.localswitches) and
  147. UseIncDec then
  148. begin
  149. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  150. end
  151. else
  152. if (op=A_SUB) and
  153. (right.location.loc=LOC_CONSTANT) and
  154. (right.location.value=1) and
  155. not(cs_check_overflow in current_settings.localswitches) and
  156. UseIncDec then
  157. begin
  158. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  159. end
  160. else
  161. if (op=A_IMUL) and
  162. (right.location.loc=LOC_CONSTANT) and
  163. (ispowerof2(int64(right.location.value),power)) and
  164. not(cs_check_overflow in current_settings.localswitches) then
  165. begin
  166. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  167. end
  168. else if (op=A_IMUL) and
  169. (right.location.loc=LOC_CONSTANT) and
  170. (right.location.value>1) and (ispowerof2(int64(right.location.value)-1,power)) and
  171. (power in [1..3]) and
  172. not(cs_check_overflow in current_settings.localswitches) then
  173. begin
  174. reference_reset_base(href,left.location.register,0,ctempposinvalid,0,[]);
  175. href.index:=left.location.register;
  176. href.scalefactor:=int64(right.location.value)-1;
  177. left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  178. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_LEA,TCgSize2OpSize[opsize],href,left.location.register));
  179. end
  180. else
  181. begin
  182. if extra_not then
  183. begin
  184. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  185. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  186. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  187. if comparison or (mboverflow and overflowcheck) then
  188. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  189. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  190. end
  191. else
  192. emit_op_right_left(op,opsize,comparison or (mboverflow and overflowcheck));
  193. end;
  194. end;
  195. end;
  196. { only in case of overflow operations }
  197. { produce overflow code }
  198. { we must put it here directly, because sign of operation }
  199. { is in unsigned VAR!! }
  200. if mboverflow then
  201. begin
  202. if cs_check_overflow in current_settings.localswitches then
  203. begin
  204. current_asmdata.getjumplabel(hl4);
  205. if unsigned then
  206. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  207. else
  208. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  209. if not comparison then
  210. cg.a_reg_dealloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  211. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  212. cg.a_label(current_asmdata.CurrAsmList,hl4);
  213. end;
  214. end;
  215. end;
  216. procedure tx86addnode.left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  217. begin
  218. { left location is not a register? }
  219. if (left.location.loc<>LOC_REGISTER) then
  220. begin
  221. { if right is register then we can swap the locations }
  222. if (not noswap) and
  223. (right.location.loc=LOC_REGISTER) then
  224. begin
  225. location_swap(left.location,right.location);
  226. toggleflag(nf_swapped);
  227. end
  228. else if (not noswap) and
  229. (right.location.loc=LOC_CREGISTER) then
  230. begin
  231. location_swap(left.location,right.location);
  232. toggleflag(nf_swapped);
  233. { maybe we can reuse a constant register when the
  234. operation is a comparison that doesn't change the
  235. value of the register }
  236. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  237. location:=left.location;
  238. end
  239. else
  240. begin
  241. { maybe we can reuse a constant register when the
  242. operation is a comparison that doesn't change the
  243. value of the register }
  244. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  245. end;
  246. end;
  247. if (right.location.loc<>LOC_CONSTANT) and
  248. (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then
  249. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  250. if (left.location.loc<>LOC_CONSTANT) and
  251. (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then
  252. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  253. end;
  254. procedure tx86addnode.force_left_and_right_fpureg;
  255. begin
  256. if (right.location.loc<>LOC_FPUREGISTER) then
  257. begin
  258. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  259. if (left.location.loc<>LOC_FPUREGISTER) then
  260. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  261. else
  262. { left was on the stack => swap }
  263. toggleflag(nf_swapped);
  264. end
  265. { the nominator in st0 }
  266. else if (left.location.loc<>LOC_FPUREGISTER) then
  267. begin
  268. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  269. end
  270. else
  271. begin
  272. { fpu operands are always in the wrong order on the stack }
  273. toggleflag(nf_swapped);
  274. end;
  275. end;
  276. { Makes sides suitable for executing an x87 instruction:
  277. if either side is OS_F32/OS_F64-sized LOC_REFERENCE, it is returned in 'refnode'
  278. everything else is loaded to FPU stack. }
  279. procedure tx86addnode.prepare_x87_locations(out refnode: tnode);
  280. begin
  281. refnode:=nil;
  282. { later on, no mm registers are allowed, so transfer everything to memory here
  283. below it is loaded into an fpu register if neede }
  284. if left.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then
  285. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  286. if right.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then
  287. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  288. case ord(left.location.loc=LOC_FPUREGISTER)+ord(right.location.loc=LOC_FPUREGISTER) of
  289. 0:
  290. begin
  291. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  292. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  293. InternalError(2013090803);
  294. if (left.location.size in [OS_F32,OS_F64]) then
  295. begin
  296. refnode:=left;
  297. toggleflag(nf_swapped);
  298. end
  299. else
  300. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  301. end;
  302. 1:
  303. begin { if left is on the stack then swap. }
  304. if (left.location.loc=LOC_FPUREGISTER) then
  305. refnode:=right
  306. else
  307. refnode:=left;
  308. if not(refnode.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  309. InternalError(2013090801);
  310. if not (refnode.location.size in [OS_F32,OS_F64]) then
  311. begin
  312. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,refnode.location,refnode.resultdef,false);
  313. if (refnode=right) then
  314. toggleflag(nf_swapped);
  315. refnode:=nil;
  316. end
  317. else
  318. begin
  319. if (refnode=left) then
  320. toggleflag(nf_swapped);
  321. end;
  322. end;
  323. 2: { fpu operands are always in the wrong order on the stack }
  324. toggleflag(nf_swapped);
  325. else
  326. InternalError(2013090802);
  327. end;
  328. end;
  329. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize;AllocFlags:boolean);
  330. {$ifdef x86_64}
  331. var
  332. tmpreg : tregister;
  333. {$endif x86_64}
  334. begin
  335. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  336. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  337. { left must be a register }
  338. case right.location.loc of
  339. LOC_REGISTER,
  340. LOC_CREGISTER :
  341. begin
  342. if AllocFlags then
  343. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  344. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  345. end;
  346. LOC_REFERENCE,
  347. LOC_CREFERENCE :
  348. begin
  349. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  350. if AllocFlags then
  351. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  352. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  353. end;
  354. LOC_CONSTANT :
  355. begin
  356. {$ifdef x86_64}
  357. { x86_64 only supports signed 32 bits constants directly }
  358. if (opsize in [OS_S64,OS_64]) and
  359. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  360. begin
  361. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  362. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  363. if AllocFlags then
  364. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  365. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  366. end
  367. else
  368. {$endif x86_64}
  369. begin
  370. if AllocFlags then
  371. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  372. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  373. end;
  374. end;
  375. else
  376. internalerror(200203232);
  377. end;
  378. end;
  379. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  380. begin
  381. case nodetype of
  382. equaln : getresflags:=F_E;
  383. unequaln : getresflags:=F_NE;
  384. else
  385. if not(unsigned) then
  386. begin
  387. if nf_swapped in flags then
  388. case nodetype of
  389. ltn : getresflags:=F_G;
  390. lten : getresflags:=F_GE;
  391. gtn : getresflags:=F_L;
  392. gten : getresflags:=F_LE;
  393. else
  394. internalerror(2013120105);
  395. end
  396. else
  397. case nodetype of
  398. ltn : getresflags:=F_L;
  399. lten : getresflags:=F_LE;
  400. gtn : getresflags:=F_G;
  401. gten : getresflags:=F_GE;
  402. else
  403. internalerror(2013120106);
  404. end;
  405. end
  406. else
  407. begin
  408. if nf_swapped in flags then
  409. case nodetype of
  410. ltn : getresflags:=F_A;
  411. lten : getresflags:=F_AE;
  412. gtn : getresflags:=F_B;
  413. gten : getresflags:=F_BE;
  414. else
  415. internalerror(2013120107);
  416. end
  417. else
  418. case nodetype of
  419. ltn : getresflags:=F_B;
  420. lten : getresflags:=F_BE;
  421. gtn : getresflags:=F_A;
  422. gten : getresflags:=F_AE;
  423. else
  424. internalerror(2013120108);
  425. end;
  426. end;
  427. end;
  428. end;
  429. function tx86addnode.getfpuresflags : tresflags;
  430. begin
  431. if (nodetype=equaln) then
  432. result:=F_FE
  433. else if (nodetype=unequaln) then
  434. result:=F_FNE
  435. else if (nf_swapped in flags) then
  436. case nodetype of
  437. ltn : result:=F_FA;
  438. lten : result:=F_FAE;
  439. gtn : result:=F_FB;
  440. gten : result:=F_FBE;
  441. else
  442. internalerror(2014031402);
  443. end
  444. else
  445. case nodetype of
  446. ltn : result:=F_FB;
  447. lten : result:=F_FBE;
  448. gtn : result:=F_FA;
  449. gten : result:=F_FAE;
  450. else
  451. internalerror(2014031403);
  452. end;
  453. end;
  454. {*****************************************************************************
  455. AddSmallSet
  456. *****************************************************************************}
  457. {$ifndef i8086}
  458. procedure tx86addnode.second_addsmallset;
  459. var
  460. setbase : aint;
  461. opdef : tdef;
  462. opsize : TCGSize;
  463. op : TAsmOp;
  464. extra_not,
  465. noswap : boolean;
  466. all_member_optimization:boolean;
  467. begin
  468. pass_left_right;
  469. noswap:=false;
  470. extra_not:=false;
  471. all_member_optimization:=false;
  472. opdef:=resultdef;
  473. opsize:=int_cgsize(opdef.size);
  474. if (left.resultdef.typ=setdef) then
  475. setbase:=tsetdef(left.resultdef).setbase
  476. else
  477. setbase:=tsetdef(right.resultdef).setbase;
  478. case nodetype of
  479. addn :
  480. begin
  481. { adding elements is not commutative }
  482. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  483. swapleftright;
  484. { are we adding set elements ? }
  485. if right.nodetype=setelementn then
  486. begin
  487. { no range support for smallsets! }
  488. if assigned(tsetelementnode(right).right) then
  489. internalerror(43244);
  490. { btsb isn't supported }
  491. if opsize=OS_8 then
  492. begin
  493. opsize:=OS_32;
  494. opdef:=u32inttype;
  495. end;
  496. { bts requires both elements to be registers }
  497. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  498. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  499. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,opdef,right.location,setbase);
  500. op:=A_BTS;
  501. noswap:=true;
  502. end
  503. else
  504. op:=A_OR;
  505. end;
  506. symdifn :
  507. op:=A_XOR;
  508. muln :
  509. op:=A_AND;
  510. subn :
  511. begin
  512. op:=A_AND;
  513. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  514. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  515. all_member_optimization:=true;
  516. if (not(nf_swapped in flags)) and
  517. (right.location.loc=LOC_CONSTANT) then
  518. right.location.value := not(right.location.value)
  519. else if (nf_swapped in flags) and
  520. (left.location.loc=LOC_CONSTANT) then
  521. left.location.value := not(left.location.value)
  522. else
  523. extra_not:=true;
  524. end;
  525. xorn :
  526. op:=A_XOR;
  527. orn :
  528. op:=A_OR;
  529. andn :
  530. op:=A_AND;
  531. else
  532. internalerror(2003042215);
  533. end;
  534. if all_member_optimization then
  535. begin
  536. {A set expression [0..31]-x can be implemented with a simple NOT.}
  537. if nf_swapped in flags then
  538. begin
  539. { newly swapped also set swapped flag }
  540. location_swap(left.location,right.location);
  541. toggleflag(nf_swapped);
  542. end;
  543. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,false);
  544. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  545. location:=right.location;
  546. end
  547. else
  548. begin
  549. { can we use the BMI1 instruction andn? }
  550. if (op=A_AND) and extra_not and (CPUX86_HAS_BMI1 in cpu_capabilities[current_settings.cputype]) and
  551. (resultdef.size in [4{$ifdef x86_64},8{$endif x86_64}]) then
  552. begin
  553. location_reset(location,LOC_REGISTER,left.location.size);
  554. location.register:=cg.getintregister(current_asmdata.currAsmList,left.location.size);
  555. if nf_swapped in flags then
  556. begin
  557. location_swap(left.location,right.location);
  558. toggleflag(nf_swapped);
  559. end;
  560. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,true);
  561. if not(left.location.loc in [LOC_CREGISTER,LOC_REGISTER,LOC_CREFERENCE,LOC_REFERENCE]) then
  562. hlcg.location_force_reg(current_asmdata.currAsmList,left.location,left.resultdef,opdef,true);
  563. case left.location.loc of
  564. LOC_CREGISTER,LOC_REGISTER:
  565. emit_reg_reg_reg(A_ANDN,TCGSize2Opsize[opsize],left.location.register,right.location.register,location.register);
  566. LOC_CREFERENCE,LOC_REFERENCE:
  567. emit_ref_reg_reg(A_ANDN,TCGSize2Opsize[opsize],left.location.reference,right.location.register,location.register);
  568. else
  569. Internalerror(2018040201);
  570. end;
  571. end
  572. else
  573. begin
  574. { left must be a register }
  575. left_must_be_reg(opdef,opsize,noswap);
  576. emit_generic_code(op,opsize,true,extra_not,false);
  577. location_freetemp(current_asmdata.CurrAsmList,right.location);
  578. { left is always a register and contains the result }
  579. location:=left.location;
  580. end;
  581. end;
  582. { fix the changed opsize we did above because of the missing btsb }
  583. if opsize<>int_cgsize(resultdef.size) then
  584. hlcg.location_force_reg(current_asmdata.CurrAsmList,location,opdef,cgsize_orddef(int_cgsize(resultdef.size)),false);
  585. end;
  586. {$endif not i8086}
  587. procedure tx86addnode.second_cmpsmallset;
  588. var
  589. opdef : tdef;
  590. opsize : TCGSize;
  591. op : TAsmOp;
  592. begin
  593. pass_left_right;
  594. opdef:=left.resultdef;
  595. opsize:=int_cgsize(opdef.size);
  596. case nodetype of
  597. equaln,
  598. unequaln :
  599. op:=A_CMP;
  600. lten,gten:
  601. begin
  602. if (not(nf_swapped in flags) and (nodetype = lten)) or
  603. ((nf_swapped in flags) and (nodetype = gten)) then
  604. swapleftright;
  605. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  606. emit_op_right_left(A_AND,opsize,False);
  607. op:=A_CMP;
  608. { warning: ugly hack, we need a JE so change the node to equaln }
  609. nodetype:=equaln;
  610. end;
  611. else
  612. internalerror(2003042215);
  613. end;
  614. { left must be a register }
  615. left_must_be_reg(opdef,opsize,false);
  616. emit_generic_code(op,opsize,true,false,false);
  617. location_freetemp(current_asmdata.CurrAsmList,right.location);
  618. location_freetemp(current_asmdata.CurrAsmList,left.location);
  619. location_reset(location,LOC_FLAGS,OS_NO);
  620. location.resflags:=getresflags(true);
  621. end;
  622. {*****************************************************************************
  623. AddMMX
  624. *****************************************************************************}
  625. {$ifdef SUPPORT_MMX}
  626. procedure tx86addnode.second_opmmx;
  627. var
  628. op : TAsmOp;
  629. cmpop : boolean;
  630. mmxbase : tmmxtype;
  631. hreg,
  632. hregister : tregister;
  633. begin
  634. pass_left_right;
  635. cmpop:=false;
  636. op:=A_NOP;
  637. mmxbase:=mmx_type(left.resultdef);
  638. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  639. case nodetype of
  640. addn :
  641. begin
  642. if (cs_mmx_saturation in current_settings.localswitches) then
  643. begin
  644. case mmxbase of
  645. mmxs8bit:
  646. op:=A_PADDSB;
  647. mmxu8bit:
  648. op:=A_PADDUSB;
  649. mmxs16bit,mmxfixed16:
  650. op:=A_PADDSW;
  651. mmxu16bit:
  652. op:=A_PADDUSW;
  653. end;
  654. end
  655. else
  656. begin
  657. case mmxbase of
  658. mmxs8bit,mmxu8bit:
  659. op:=A_PADDB;
  660. mmxs16bit,mmxu16bit,mmxfixed16:
  661. op:=A_PADDW;
  662. mmxs32bit,mmxu32bit:
  663. op:=A_PADDD;
  664. end;
  665. end;
  666. end;
  667. muln :
  668. begin
  669. case mmxbase of
  670. mmxs16bit,mmxu16bit:
  671. op:=A_PMULLW;
  672. mmxfixed16:
  673. op:=A_PMULHW;
  674. end;
  675. end;
  676. subn :
  677. begin
  678. if (cs_mmx_saturation in current_settings.localswitches) then
  679. begin
  680. case mmxbase of
  681. mmxs8bit:
  682. op:=A_PSUBSB;
  683. mmxu8bit:
  684. op:=A_PSUBUSB;
  685. mmxs16bit,mmxfixed16:
  686. op:=A_PSUBSB;
  687. mmxu16bit:
  688. op:=A_PSUBUSW;
  689. end;
  690. end
  691. else
  692. begin
  693. case mmxbase of
  694. mmxs8bit,mmxu8bit:
  695. op:=A_PSUBB;
  696. mmxs16bit,mmxu16bit,mmxfixed16:
  697. op:=A_PSUBW;
  698. mmxs32bit,mmxu32bit:
  699. op:=A_PSUBD;
  700. end;
  701. end;
  702. end;
  703. xorn:
  704. op:=A_PXOR;
  705. orn:
  706. op:=A_POR;
  707. andn:
  708. op:=A_PAND;
  709. else
  710. internalerror(2003042214);
  711. end;
  712. if op = A_NOP then
  713. internalerror(201408201);
  714. { left and right no register? }
  715. { then one must be demanded }
  716. if (left.location.loc<>LOC_MMXREGISTER) then
  717. begin
  718. if (right.location.loc=LOC_MMXREGISTER) then
  719. begin
  720. location_swap(left.location,right.location);
  721. toggleflag(nf_swapped);
  722. end
  723. else
  724. begin
  725. { register variable ? }
  726. if (left.location.loc=LOC_CMMXREGISTER) then
  727. begin
  728. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  729. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  730. end
  731. else
  732. begin
  733. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  734. internalerror(200203245);
  735. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  736. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  737. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  738. end;
  739. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  740. left.location.register:=hregister;
  741. end;
  742. end;
  743. { at this point, left.location.loc should be LOC_MMXREGISTER }
  744. if right.location.loc<>LOC_MMXREGISTER then
  745. begin
  746. if (nodetype=subn) and (nf_swapped in flags) then
  747. begin
  748. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  749. if right.location.loc=LOC_CMMXREGISTER then
  750. begin
  751. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  752. emit_reg_reg(op,S_NO,left.location.register,hreg);
  753. end
  754. else
  755. begin
  756. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  757. internalerror(200203247);
  758. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  759. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  760. emit_reg_reg(op,S_NO,left.location.register,hreg);
  761. end;
  762. location.register:=hreg;
  763. end
  764. else
  765. begin
  766. if (right.location.loc=LOC_CMMXREGISTER) then
  767. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  768. else
  769. begin
  770. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  771. internalerror(200203246);
  772. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  773. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  774. end;
  775. location.register:=left.location.register;
  776. end;
  777. end
  778. else
  779. begin
  780. { right.location=LOC_MMXREGISTER }
  781. if (nodetype=subn) and (nf_swapped in flags) then
  782. begin
  783. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  784. location_swap(left.location,right.location);
  785. toggleflag(nf_swapped);
  786. end
  787. else
  788. begin
  789. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  790. end;
  791. location.register:=left.location.register;
  792. end;
  793. location_freetemp(current_asmdata.CurrAsmList,right.location);
  794. if cmpop then
  795. location_freetemp(current_asmdata.CurrAsmList,left.location);
  796. end;
  797. {$endif SUPPORT_MMX}
  798. {*****************************************************************************
  799. AddFloat
  800. *****************************************************************************}
  801. procedure tx86addnode.second_addfloatsse;
  802. var
  803. op : topcg;
  804. sqr_sum : boolean;
  805. tmp : tnode;
  806. begin
  807. sqr_sum:=false;
  808. if (current_settings.fputype>=fpu_sse3) and
  809. use_vectorfpu(resultdef) and
  810. (nodetype in [addn,subn]) and
  811. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  812. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  813. begin
  814. sqr_sum:=true;
  815. tmp:=tinlinenode(left).left;
  816. tinlinenode(left).left:=nil;
  817. left.free;
  818. left:=tmp;
  819. tmp:=tinlinenode(right).left;
  820. tinlinenode(right).left:=nil;
  821. right.free;
  822. right:=tmp;
  823. end;
  824. pass_left_right;
  825. { fpu operands are always in reversed order on the stack }
  826. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  827. toggleflag(nf_swapped);
  828. if (nf_swapped in flags) then
  829. { can't use swapleftright if both are on the fpu stack, since then }
  830. { both are "R_ST" -> nothing would change -> manually switch }
  831. if (left.location.loc = LOC_FPUREGISTER) and
  832. (right.location.loc = LOC_FPUREGISTER) then
  833. emit_none(A_FXCH,S_NO)
  834. else
  835. swapleftright;
  836. case nodetype of
  837. addn :
  838. op:=OP_ADD;
  839. muln :
  840. op:=OP_MUL;
  841. subn :
  842. op:=OP_SUB;
  843. slashn :
  844. op:=OP_DIV;
  845. else
  846. internalerror(200312231);
  847. end;
  848. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  849. if sqr_sum then
  850. begin
  851. if nf_swapped in flags then
  852. swapleftright;
  853. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  854. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  855. location:=left.location;
  856. if is_double(resultdef) then
  857. begin
  858. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  859. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  860. case nodetype of
  861. addn:
  862. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  863. subn:
  864. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  865. else
  866. internalerror(201108162);
  867. end;
  868. end
  869. else
  870. begin
  871. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  872. { ensure that bits 64..127 contain valid values }
  873. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  874. { the data is now in bits 0..32 and 64..95 }
  875. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  876. case nodetype of
  877. addn:
  878. begin
  879. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  880. end;
  881. subn:
  882. begin
  883. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  884. end;
  885. else
  886. internalerror(201108163);
  887. end;
  888. end
  889. end
  890. { we can use only right as left operand if the operation is commutative }
  891. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  892. begin
  893. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  894. cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.location.size,location.size,right.location.register,location.register,mms_movescalar);
  895. { force floating point reg. location to be written to memory,
  896. we don't force it to mm register because writing to memory
  897. allows probably shorter code because there is no direct fpu->mm register
  898. copy instruction
  899. }
  900. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  901. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  902. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  903. end
  904. else
  905. begin
  906. if nf_swapped in flags then
  907. swapleftright;
  908. { force floating point reg. location to be written to memory,
  909. we don't force it to mm register because writing to memory
  910. allows probably shorter code because there is no direct fpu->mm register
  911. copy instruction
  912. }
  913. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  914. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  915. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  916. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  917. { force floating point reg. location to be written to memory,
  918. we don't force it to mm register because writing to memory
  919. allows probably shorter code because there is no direct fpu->mm register
  920. copy instruction
  921. }
  922. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  923. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  924. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  925. end;
  926. end;
  927. procedure tx86addnode.second_addfloatavx;
  928. var
  929. op : topcg;
  930. sqr_sum : boolean;
  931. {$ifdef dummy}
  932. tmp : tnode;
  933. {$endif dummy}
  934. begin
  935. sqr_sum:=false;
  936. {$ifdef dummy}
  937. if (current_settings.fputype>=fpu_sse3) and
  938. use_vectorfpu(resultdef) and
  939. (nodetype in [addn,subn]) and
  940. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  941. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  942. begin
  943. sqr_sum:=true;
  944. tmp:=tinlinenode(left).left;
  945. tinlinenode(left).left:=nil;
  946. left.free;
  947. left:=tmp;
  948. tmp:=tinlinenode(right).left;
  949. tinlinenode(right).left:=nil;
  950. right.free;
  951. right:=tmp;
  952. end;
  953. {$endif dummy}
  954. pass_left_right;
  955. { fpu operands are always in reversed order on the stack }
  956. if (left.location.loc=LOC_FPUREGISTER) and (right.location.loc=LOC_FPUREGISTER) then
  957. toggleflag(nf_swapped);
  958. if (nf_swapped in flags) then
  959. { can't use swapleftright if both are on the fpu stack, since then }
  960. { both are "R_ST" -> nothing would change -> manually switch }
  961. if (left.location.loc = LOC_FPUREGISTER) and
  962. (right.location.loc = LOC_FPUREGISTER) then
  963. emit_none(A_FXCH,S_NO)
  964. else
  965. swapleftright;
  966. case nodetype of
  967. addn :
  968. op:=OP_ADD;
  969. muln :
  970. op:=OP_MUL;
  971. subn :
  972. op:=OP_SUB;
  973. slashn :
  974. op:=OP_DIV;
  975. else
  976. internalerror(200312231);
  977. end;
  978. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  979. if sqr_sum then
  980. begin
  981. if nf_swapped in flags then
  982. swapleftright;
  983. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  984. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  985. location:=left.location;
  986. if is_double(resultdef) then
  987. begin
  988. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  989. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  990. case nodetype of
  991. addn:
  992. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  993. subn:
  994. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  995. else
  996. internalerror(201108162);
  997. end;
  998. end
  999. else
  1000. begin
  1001. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  1002. { ensure that bits 64..127 contain valid values }
  1003. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  1004. { the data is now in bits 0..32 and 64..95 }
  1005. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  1006. case nodetype of
  1007. addn:
  1008. begin
  1009. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  1010. end;
  1011. subn:
  1012. begin
  1013. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  1014. end;
  1015. else
  1016. internalerror(201108163);
  1017. end;
  1018. end
  1019. end
  1020. { left*2 ? }
  1021. else if (nodetype=muln) and is_constrealnode(right) and is_number_float(trealconstnode(right).value_real) and (trealconstnode(right).value_real=2) then
  1022. begin
  1023. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  1024. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1025. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  1026. left.location.register,
  1027. left.location.register,
  1028. location.register,
  1029. mms_movescalar);
  1030. end
  1031. { right*2 ? }
  1032. else if (nodetype=muln) and is_constrealnode(left) and is_number_float(trealconstnode(left).value_real) and (trealconstnode(left).value_real=2) then
  1033. begin
  1034. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,right.location.size);
  1035. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  1036. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  1037. right.location.register,
  1038. right.location.register,
  1039. location.register,
  1040. mms_movescalar);
  1041. end
  1042. { we can use only right as left operand if the operation is commutative }
  1043. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  1044. begin
  1045. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  1046. { force floating point reg. location to be written to memory,
  1047. we don't force it to mm register because writing to memory
  1048. allows probably shorter code because there is no direct fpu->mm register
  1049. copy instruction
  1050. }
  1051. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1052. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  1053. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  1054. left.location,
  1055. right.location.register,
  1056. location.register,
  1057. mms_movescalar);
  1058. end
  1059. else
  1060. begin
  1061. if (nf_swapped in flags) then
  1062. swapleftright;
  1063. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1064. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  1065. { force floating point reg. location to be written to memory,
  1066. we don't force it to mm register because writing to memory
  1067. allows probably shorter code because there is no direct fpu->mm register
  1068. copy instruction
  1069. }
  1070. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1071. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1072. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  1073. right.location,
  1074. left.location.register,
  1075. location.register,
  1076. mms_movescalar);
  1077. end;
  1078. end;
  1079. function tx86addnode.use_fma : boolean;
  1080. begin
  1081. {$ifndef i8086}
  1082. { test if the result stays in an xmm register, fiddeling with fpu registers and fma makes no sense }
  1083. Result:=use_vectorfpu(resultdef) and
  1084. ((cpu_capabilities[current_settings.cputype]*[CPUX86_HAS_FMA,CPUX86_HAS_FMA4])<>[]);
  1085. {$else i8086}
  1086. Result:=inherited use_fma;
  1087. {$endif i8086}
  1088. end;
  1089. procedure tx86addnode.second_cmpfloatvector;
  1090. var
  1091. op : tasmop;
  1092. const
  1093. ops_single: array[boolean] of tasmop = (A_COMISS,A_VCOMISS);
  1094. ops_double: array[boolean] of tasmop = (A_COMISD,A_VCOMISD);
  1095. begin
  1096. if is_single(left.resultdef) then
  1097. op:=ops_single[UseAVX]
  1098. else if is_double(left.resultdef) then
  1099. op:=ops_double[UseAVX]
  1100. else
  1101. internalerror(200402222);
  1102. pass_left_right;
  1103. location_reset(location,LOC_FLAGS,OS_NO);
  1104. { Direct move fpu->mm register is not possible, so force any fpu operands to
  1105. memory (not to mm registers because one of the memory locations can be used
  1106. directly in compare instruction, yielding shorter code) }
  1107. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1108. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  1109. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1110. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1111. if (right.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1112. begin
  1113. case left.location.loc of
  1114. LOC_REFERENCE,LOC_CREFERENCE:
  1115. begin
  1116. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  1117. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  1118. end;
  1119. LOC_MMREGISTER,LOC_CMMREGISTER:
  1120. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  1121. else
  1122. internalerror(200402221);
  1123. end;
  1124. toggleflag(nf_swapped);
  1125. end
  1126. else
  1127. begin
  1128. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1129. case right.location.loc of
  1130. LOC_REFERENCE,LOC_CREFERENCE:
  1131. begin
  1132. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  1133. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  1134. end;
  1135. LOC_MMREGISTER,LOC_CMMREGISTER:
  1136. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  1137. else
  1138. internalerror(200402223);
  1139. end;
  1140. end;
  1141. location.resflags:=getfpuresflags;
  1142. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1143. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1144. end;
  1145. procedure tx86addnode.second_opvector;
  1146. var
  1147. op : topcg;
  1148. begin
  1149. pass_left_right;
  1150. if (nf_swapped in flags) then
  1151. swapleftright;
  1152. case nodetype of
  1153. addn :
  1154. op:=OP_ADD;
  1155. muln :
  1156. op:=OP_MUL;
  1157. subn :
  1158. op:=OP_SUB;
  1159. slashn :
  1160. op:=OP_DIV;
  1161. else
  1162. internalerror(200610071);
  1163. end;
  1164. if fits_in_mm_register(left.resultdef) then
  1165. begin
  1166. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  1167. { we can use only right as left operand if the operation is commutative }
  1168. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  1169. begin
  1170. location.register:=right.location.register;
  1171. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  1172. end
  1173. else
  1174. begin
  1175. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  1176. location.register:=left.location.register;
  1177. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  1178. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  1179. end;
  1180. end
  1181. else
  1182. begin
  1183. { not yet supported }
  1184. internalerror(200610072);
  1185. end
  1186. end;
  1187. procedure tx86addnode.second_addfloat;
  1188. const
  1189. ops_add: array[boolean] of TAsmOp = (A_FADDP,A_FADD);
  1190. ops_mul: array[boolean] of TAsmOp = (A_FMULP,A_FMUL);
  1191. ops_sub: array[boolean] of TAsmOp = (A_FSUBP,A_FSUB);
  1192. ops_rsub: array[boolean] of TAsmOp = (A_FSUBRP,A_FSUBR);
  1193. ops_div: array[boolean] of TAsmOp = (A_FDIVP,A_FDIV);
  1194. ops_rdiv: array[boolean] of TAsmOp = (A_FDIVRP,A_FDIVR);
  1195. var
  1196. op : TAsmOp;
  1197. refnode : tnode;
  1198. hasref : boolean;
  1199. begin
  1200. if use_vectorfpu(resultdef) then
  1201. begin
  1202. if UseAVX then
  1203. second_addfloatavx
  1204. else
  1205. second_addfloatsse;
  1206. exit;
  1207. end;
  1208. pass_left_right;
  1209. prepare_x87_locations(refnode);
  1210. hasref:=assigned(refnode);
  1211. case nodetype of
  1212. addn :
  1213. op:=ops_add[hasref];
  1214. muln :
  1215. op:=ops_mul[hasref];
  1216. subn :
  1217. if (nf_swapped in flags) then
  1218. op:=ops_rsub[hasref]
  1219. else
  1220. op:=ops_sub[hasref];
  1221. slashn :
  1222. if (nf_swapped in flags) then
  1223. op:=ops_rdiv[hasref]
  1224. else
  1225. op:=ops_div[hasref];
  1226. else
  1227. internalerror(2003042214);
  1228. end;
  1229. if hasref then
  1230. emit_ref(op,tcgsize2opsize[refnode.location.size],refnode.location.reference)
  1231. else
  1232. begin
  1233. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  1234. tcgx86(cg).dec_fpu_stack;
  1235. end;
  1236. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  1237. location.register:=NR_ST;
  1238. end;
  1239. procedure tx86addnode.second_cmpfloat;
  1240. {$ifdef i8086}
  1241. var
  1242. tmpref: treference;
  1243. {$endif i8086}
  1244. begin
  1245. if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
  1246. begin
  1247. second_cmpfloatvector;
  1248. exit;
  1249. end;
  1250. pass_left_right;
  1251. force_left_and_right_fpureg;
  1252. {$ifndef x86_64}
  1253. if current_settings.cputype<cpu_Pentium2 then
  1254. begin
  1255. emit_none(A_FCOMPP,S_NO);
  1256. tcgx86(cg).dec_fpu_stack;
  1257. tcgx86(cg).dec_fpu_stack;
  1258. { load fpu flags }
  1259. {$ifdef i8086}
  1260. if current_settings.cputype < cpu_286 then
  1261. begin
  1262. tg.gettemp(current_asmdata.CurrAsmList,2,2,tt_normal,tmpref);
  1263. emit_ref(A_FSTSW,S_NO,tmpref);
  1264. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1265. inc(tmpref.offset);
  1266. emit_ref_reg(A_MOV,S_B,tmpref,NR_AH);
  1267. dec(tmpref.offset);
  1268. emit_none(A_SAHF,S_NO);
  1269. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1270. tg.ungettemp(current_asmdata.CurrAsmList,tmpref);
  1271. end
  1272. else
  1273. {$endif i8086}
  1274. begin
  1275. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1276. emit_reg(A_FNSTSW,S_NO,NR_AX);
  1277. emit_none(A_SAHF,S_NO);
  1278. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1279. end;
  1280. end
  1281. else
  1282. {$endif x86_64}
  1283. begin
  1284. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  1285. { fcomip pops only one fpu register }
  1286. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  1287. tcgx86(cg).dec_fpu_stack;
  1288. tcgx86(cg).dec_fpu_stack;
  1289. end;
  1290. location_reset(location,LOC_FLAGS,OS_NO);
  1291. location.resflags:=getfpuresflags;
  1292. end;
  1293. {*****************************************************************************
  1294. Add64bit
  1295. *****************************************************************************}
  1296. procedure tx86addnode.second_add64bit;
  1297. begin
  1298. {$ifdef cpu64bitalu}
  1299. second_addordinal;
  1300. {$else cpu64bitalu}
  1301. { must be implemented separate }
  1302. internalerror(200402042);
  1303. {$endif cpu64bitalu}
  1304. end;
  1305. procedure tx86addnode.second_cmp64bit;
  1306. begin
  1307. {$ifdef cpu64bitalu}
  1308. second_cmpordinal;
  1309. {$else cpu64bitalu}
  1310. { must be implemented separate }
  1311. internalerror(200402043);
  1312. {$endif cpu64bitalu}
  1313. end;
  1314. {*****************************************************************************
  1315. AddOrdinal
  1316. *****************************************************************************}
  1317. procedure tx86addnode.second_addordinal;
  1318. var
  1319. opsize : tcgsize;
  1320. unsigned : boolean;
  1321. cgop : topcg;
  1322. checkoverflow : Boolean;
  1323. ovloc : tlocation;
  1324. tmpreg : TRegister;
  1325. begin
  1326. { determine if the comparison will be unsigned }
  1327. unsigned:=not(is_signed(left.resultdef)) or
  1328. not(is_signed(right.resultdef));
  1329. { assume no overflow checking is require }
  1330. checkoverflow := false;
  1331. ovloc.loc:=LOC_VOID;
  1332. case nodetype of
  1333. addn:
  1334. begin
  1335. cgop:=OP_ADD;
  1336. checkoverflow:=true;
  1337. end;
  1338. xorn :
  1339. begin
  1340. cgop:=OP_XOR;
  1341. end;
  1342. orn :
  1343. begin
  1344. cgop:=OP_OR;
  1345. end;
  1346. andn:
  1347. begin
  1348. cgop:=OP_AND;
  1349. end;
  1350. muln:
  1351. begin
  1352. checkoverflow:=true;
  1353. if unsigned then
  1354. cgop:=OP_MUL
  1355. else
  1356. cgop:=OP_IMUL;
  1357. end;
  1358. subn :
  1359. begin
  1360. checkoverflow:=true;
  1361. cgop:=OP_SUB;
  1362. end;
  1363. else
  1364. internalerror(2015022501);
  1365. end;
  1366. checkoverflow:=
  1367. checkoverflow and
  1368. (left.resultdef.typ<>pointerdef) and
  1369. (right.resultdef.typ<>pointerdef) and
  1370. (cs_check_overflow in current_settings.localswitches);
  1371. opsize:=def_cgsize(left.resultdef);
  1372. pass_left_right;
  1373. { do have to allocate a register? If yes, then three opcode instructions are better }
  1374. if ((left.location.loc<>LOC_REGISTER) and (right.location.loc<>LOC_REGISTER)) or
  1375. ((nodetype=addn) and (left.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_CONSTANT]) and (right.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_CONSTANT])) then
  1376. begin
  1377. { allocate registers }
  1378. force_reg_left_right(false,true);
  1379. set_result_location_reg;
  1380. if nodetype<>subn then
  1381. begin
  1382. if (right.location.loc<>LOC_CONSTANT) then
  1383. hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,cgop,resultdef,
  1384. left.location.register,right.location.register,
  1385. location.register,checkoverflow,ovloc)
  1386. else
  1387. hlcg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,cgop,resultdef,
  1388. right.location.value,left.location.register,
  1389. location.register,checkoverflow,ovloc);
  1390. end
  1391. else { subtract is a special case since its not commutative }
  1392. begin
  1393. if (nf_swapped in flags) then
  1394. swapleftright;
  1395. if left.location.loc<>LOC_CONSTANT then
  1396. begin
  1397. if right.location.loc<>LOC_CONSTANT then
  1398. hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
  1399. right.location.register,left.location.register,
  1400. location.register,checkoverflow,ovloc)
  1401. else
  1402. hlcg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
  1403. right.location.value,left.location.register,
  1404. location.register,checkoverflow,ovloc);
  1405. end
  1406. else
  1407. begin
  1408. tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  1409. hlcg.a_load_const_reg(current_asmdata.CurrAsmList,resultdef,
  1410. left.location.value,tmpreg);
  1411. hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
  1412. right.location.register,tmpreg,location.register,checkoverflow,ovloc);
  1413. end;
  1414. end
  1415. end
  1416. else
  1417. begin
  1418. { at least one location is a register, re-use it, so we can try two operand opcodes }
  1419. if left.location.loc<>LOC_REGISTER then
  1420. begin
  1421. if right.location.loc<>LOC_REGISTER then
  1422. begin
  1423. { tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1424. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,left.location,tmpreg);
  1425. location_reset(left.location,LOC_REGISTER,opsize);
  1426. left.location.register:=tmpreg;
  1427. }
  1428. Internalerror(2018031102);
  1429. end
  1430. else
  1431. begin
  1432. location_swap(left.location,right.location);
  1433. toggleflag(nf_swapped);
  1434. end;
  1435. end;
  1436. { at this point, left.location.loc should be LOC_REGISTER }
  1437. if right.location.loc=LOC_REGISTER then
  1438. begin
  1439. { when swapped another result register }
  1440. if (nodetype=subn) and (nf_swapped in flags) then
  1441. begin
  1442. cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,
  1443. left.location.register,right.location.register);
  1444. location_swap(left.location,right.location);
  1445. toggleflag(nf_swapped);
  1446. end
  1447. else
  1448. cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,
  1449. right.location.register,left.location.register);
  1450. end
  1451. else
  1452. begin
  1453. { right.location<>LOC_REGISTER }
  1454. if right.location.loc in [LOC_CSUBSETREF,LOC_CSUBSETREG,LOC_SUBSETREF,LOC_SUBSETREG] then
  1455. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,left.resultdef,true);
  1456. if (nodetype=subn) and (nf_swapped in flags) then
  1457. begin
  1458. tmpreg:=left.location.register;
  1459. left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1460. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,left.location.register);
  1461. cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,tmpreg,left.location.register);
  1462. end
  1463. else
  1464. cg.a_op_loc_reg(current_asmdata.CurrAsmList,cgop,opsize,right.location,left.location.register);
  1465. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1466. end;
  1467. location_copy(location,left.location);
  1468. end;
  1469. { emit overflow check if required }
  1470. if checkoverflow then
  1471. cg.g_overflowcheck_loc(current_asmdata.CurrAsmList,Location,resultdef,ovloc);
  1472. end;
  1473. procedure tx86addnode.second_cmpordinal;
  1474. var
  1475. opdef : tdef;
  1476. opsize : tcgsize;
  1477. unsigned : boolean;
  1478. begin
  1479. unsigned:=not(is_signed(left.resultdef)) or
  1480. not(is_signed(right.resultdef));
  1481. opdef:=left.resultdef;
  1482. opsize:=def_cgsize(opdef);
  1483. pass_left_right;
  1484. if (right.location.loc=LOC_CONSTANT) and
  1485. (left.location.loc in [LOC_REFERENCE, LOC_CREFERENCE])
  1486. {$ifdef x86_64}
  1487. and ((not (opsize in [OS_64,OS_S64])) or (
  1488. (right.location.value>=low(longint)) and (right.location.value<=high(longint))
  1489. ))
  1490. {$endif x86_64}
  1491. then
  1492. begin
  1493. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  1494. emit_const_ref(A_CMP, TCGSize2Opsize[opsize], right.location.value, left.location.reference);
  1495. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1496. end
  1497. else
  1498. begin
  1499. left_must_be_reg(opdef,opsize,false);
  1500. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  1501. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1502. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1503. end;
  1504. location_reset(location,LOC_FLAGS,OS_NO);
  1505. location.resflags:=getresflags(unsigned);
  1506. end;
  1507. begin
  1508. caddnode:=tx86addnode;
  1509. end.