nx86add.pas 68 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. symtype,
  22. cgbase,
  23. cpubase,
  24. node,nadd,ncgadd;
  25. type
  26. tx86addnode = class(tcgaddnode)
  27. protected
  28. function getresflags(unsigned : boolean) : tresflags;
  29. function getfpuresflags : tresflags;
  30. procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  31. procedure force_left_and_right_fpureg;
  32. procedure prepare_x87_locations(out refnode: tnode);
  33. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize);
  34. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  35. procedure second_cmpfloatvector;
  36. procedure second_addfloatsse;
  37. procedure second_addfloatavx;
  38. public
  39. function pass_1 : tnode;override;
  40. function use_fma : boolean;override;
  41. procedure second_addfloat;override;
  42. {$ifndef i8086}
  43. procedure second_addsmallset;override;
  44. {$endif not i8086}
  45. procedure second_add64bit;override;
  46. procedure second_cmpfloat;override;
  47. procedure second_cmpsmallset;override;
  48. procedure second_cmp64bit;override;
  49. procedure second_cmpordinal;override;
  50. procedure second_addordinal;override;
  51. procedure second_addboolean;override;
  52. {$ifdef SUPPORT_MMX}
  53. procedure second_opmmx;override;
  54. {$endif SUPPORT_MMX}
  55. procedure second_opvector;override;
  56. end;
  57. implementation
  58. uses
  59. globtype,globals,
  60. verbose,cutils,compinnr,
  61. cpuinfo,
  62. aasmbase,aasmdata,aasmcpu,
  63. symconst,symdef,
  64. cgobj,hlcgobj,cgx86,cga,cgutils,
  65. tgobj,ncgutil,
  66. ncon,nset,ninl,ncnv,
  67. defutil,
  68. htypechk;
  69. { Range check must be disabled explicitly as the code serves
  70. on three different architecture sizes }
  71. {$R-}
  72. {*****************************************************************************
  73. Helpers
  74. *****************************************************************************}
  75. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  76. var
  77. power : longint;
  78. hl4 : tasmlabel;
  79. r : Tregister;
  80. href : treference;
  81. overflowcheck: boolean;
  82. begin
  83. overflowcheck:=needoverflowcheck;
  84. { at this point, left.location.loc should be LOC_REGISTER }
  85. if right.location.loc=LOC_REGISTER then
  86. begin
  87. { right.location is a LOC_REGISTER }
  88. { when swapped another result register }
  89. if (nodetype=subn) and (nf_swapped in flags) then
  90. begin
  91. if extra_not then
  92. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  93. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  94. { newly swapped also set swapped flag }
  95. location_swap(left.location,right.location);
  96. toggleflag(nf_swapped);
  97. end
  98. else
  99. begin
  100. if extra_not then
  101. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  102. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  103. location_swap(left.location,right.location);
  104. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  105. end;
  106. end
  107. else
  108. begin
  109. { right.location is not a LOC_REGISTER }
  110. if (nodetype=subn) and (nf_swapped in flags) then
  111. begin
  112. if extra_not then
  113. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  114. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  115. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  116. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  117. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  118. end
  119. else
  120. begin
  121. { Optimizations when right.location is a constant value }
  122. if (op=A_CMP) and
  123. (nodetype in [equaln,unequaln]) and
  124. (right.location.loc=LOC_CONSTANT) and
  125. (right.location.value=0) then
  126. begin
  127. { 'test $-1,%reg' is transformable into 'test $-1,spilltemp' if %reg needs
  128. spilling, while 'test %reg,%reg' still requires loading into register.
  129. If spilling is not necessary, it is changed back into 'test %reg,%reg' by
  130. peephole optimizer (this optimization is currently available only for i386). }
  131. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  132. {$ifdef i386}
  133. emit_const_reg(A_TEST,TCGSize2Opsize[opsize],aint(-1),left.location.register)
  134. {$else i386}
  135. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  136. {$endif i386}
  137. end
  138. else
  139. if (op=A_ADD) and
  140. (right.location.loc=LOC_CONSTANT) and
  141. (right.location.value=1) and
  142. not overflowcheck and
  143. UseIncDec then
  144. begin
  145. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  146. end
  147. else
  148. if (op=A_SUB) and
  149. (right.location.loc=LOC_CONSTANT) and
  150. (right.location.value=1) and
  151. not overflowcheck and
  152. UseIncDec then
  153. begin
  154. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  155. end
  156. else
  157. if (op=A_IMUL) and
  158. (right.location.loc=LOC_CONSTANT) and
  159. (ispowerof2(int64(right.location.value),power)) and
  160. overflowcheck then
  161. begin
  162. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  163. end
  164. else if (op=A_IMUL) and
  165. (right.location.loc=LOC_CONSTANT) and
  166. (right.location.value>1) and (ispowerof2(int64(right.location.value)-1,power)) and
  167. (power in [1..3]) and
  168. not overflowcheck then
  169. begin
  170. reference_reset_base(href,left.location.register,0,ctempposinvalid,0,[]);
  171. href.index:=left.location.register;
  172. href.scalefactor:=int64(right.location.value)-1;
  173. left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  174. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_LEA,TCgSize2OpSize[opsize],href,left.location.register));
  175. end
  176. else
  177. begin
  178. if extra_not then
  179. begin
  180. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  181. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  182. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  183. if mboverflow and overflowcheck then
  184. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  185. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  186. end
  187. else
  188. begin
  189. if mboverflow and overflowcheck then
  190. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  191. emit_op_right_left(op,opsize);
  192. end;
  193. end;
  194. end;
  195. end;
  196. { only in case of overflow operations }
  197. { produce overflow code }
  198. { we must put it here directly, because sign of operation }
  199. { is in unsigned VAR!! }
  200. if mboverflow then
  201. begin
  202. if overflowcheck then
  203. begin
  204. current_asmdata.getjumplabel(hl4);
  205. if unsigned then
  206. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  207. else
  208. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  209. cg.a_reg_dealloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  210. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  211. cg.a_label(current_asmdata.CurrAsmList,hl4);
  212. end;
  213. end;
  214. end;
  215. procedure tx86addnode.left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  216. begin
  217. { left location is not a register? }
  218. if (left.location.loc<>LOC_REGISTER) then
  219. begin
  220. { if right is register then we can swap the locations }
  221. if (not noswap) and
  222. (right.location.loc=LOC_REGISTER) then
  223. begin
  224. location_swap(left.location,right.location);
  225. toggleflag(nf_swapped);
  226. end
  227. else if (not noswap) and
  228. (right.location.loc=LOC_CREGISTER) then
  229. begin
  230. location_swap(left.location,right.location);
  231. toggleflag(nf_swapped);
  232. { maybe we can reuse a constant register when the
  233. operation is a comparison that doesn't change the
  234. value of the register }
  235. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  236. location:=left.location;
  237. end
  238. else
  239. begin
  240. { maybe we can reuse a constant register when the
  241. operation is a comparison that doesn't change the
  242. value of the register }
  243. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  244. end;
  245. end;
  246. if (right.location.loc<>LOC_CONSTANT) and
  247. (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then
  248. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  249. if (left.location.loc<>LOC_CONSTANT) and
  250. (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then
  251. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  252. end;
  253. procedure tx86addnode.force_left_and_right_fpureg;
  254. begin
  255. if (right.location.loc<>LOC_FPUREGISTER) then
  256. begin
  257. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  258. if (left.location.loc<>LOC_FPUREGISTER) then
  259. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  260. else
  261. { left was on the stack => swap }
  262. toggleflag(nf_swapped);
  263. end
  264. { the nominator in st0 }
  265. else if (left.location.loc<>LOC_FPUREGISTER) then
  266. begin
  267. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  268. end
  269. else
  270. begin
  271. { fpu operands are always in the wrong order on the stack }
  272. toggleflag(nf_swapped);
  273. end;
  274. end;
  275. { Makes sides suitable for executing an x87 instruction:
  276. if either side is OS_F32/OS_F64-sized LOC_REFERENCE, it is returned in 'refnode'
  277. everything else is loaded to FPU stack. }
  278. procedure tx86addnode.prepare_x87_locations(out refnode: tnode);
  279. begin
  280. refnode:=nil;
  281. { later on, no mm registers are allowed, so transfer everything to memory here
  282. below it is loaded into an fpu register if neede }
  283. if left.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then
  284. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  285. if right.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then
  286. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  287. case ord(left.location.loc=LOC_FPUREGISTER)+ord(right.location.loc=LOC_FPUREGISTER) of
  288. 0:
  289. begin
  290. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  291. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  292. InternalError(2013090803);
  293. if (left.location.size in [OS_F32,OS_F64]) then
  294. begin
  295. refnode:=left;
  296. toggleflag(nf_swapped);
  297. end
  298. else
  299. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  300. end;
  301. 1:
  302. begin { if left is on the stack then swap. }
  303. if (left.location.loc=LOC_FPUREGISTER) then
  304. refnode:=right
  305. else
  306. refnode:=left;
  307. if not(refnode.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  308. InternalError(2013090801);
  309. if not (refnode.location.size in [OS_F32,OS_F64]) then
  310. begin
  311. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,refnode.location,refnode.resultdef,false);
  312. if (refnode=right) then
  313. toggleflag(nf_swapped);
  314. refnode:=nil;
  315. end
  316. else
  317. begin
  318. if (refnode=left) then
  319. toggleflag(nf_swapped);
  320. end;
  321. end;
  322. 2: { fpu operands are always in the wrong order on the stack }
  323. toggleflag(nf_swapped);
  324. else
  325. InternalError(2013090802);
  326. end;
  327. end;
  328. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize);
  329. {$ifdef x86_64}
  330. var
  331. tmpreg : tregister;
  332. {$endif x86_64}
  333. begin
  334. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  335. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  336. { left must be a register }
  337. case right.location.loc of
  338. LOC_REGISTER,
  339. LOC_CREGISTER :
  340. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  341. LOC_REFERENCE,
  342. LOC_CREFERENCE :
  343. begin
  344. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  345. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  346. end;
  347. LOC_CONSTANT :
  348. begin
  349. {$ifdef x86_64}
  350. { x86_64 only supports signed 32 bits constants directly }
  351. if (opsize in [OS_S64,OS_64]) and
  352. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  353. begin
  354. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  355. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  356. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  357. end
  358. else
  359. {$endif x86_64}
  360. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  361. end;
  362. else
  363. internalerror(200203232);
  364. end;
  365. end;
  366. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  367. begin
  368. case nodetype of
  369. equaln : getresflags:=F_E;
  370. unequaln : getresflags:=F_NE;
  371. else
  372. if not(unsigned) then
  373. begin
  374. if nf_swapped in flags then
  375. case nodetype of
  376. ltn : getresflags:=F_G;
  377. lten : getresflags:=F_GE;
  378. gtn : getresflags:=F_L;
  379. gten : getresflags:=F_LE;
  380. else
  381. internalerror(2013120105);
  382. end
  383. else
  384. case nodetype of
  385. ltn : getresflags:=F_L;
  386. lten : getresflags:=F_LE;
  387. gtn : getresflags:=F_G;
  388. gten : getresflags:=F_GE;
  389. else
  390. internalerror(2013120106);
  391. end;
  392. end
  393. else
  394. begin
  395. if nf_swapped in flags then
  396. case nodetype of
  397. ltn : getresflags:=F_A;
  398. lten : getresflags:=F_AE;
  399. gtn : getresflags:=F_B;
  400. gten : getresflags:=F_BE;
  401. else
  402. internalerror(2013120107);
  403. end
  404. else
  405. case nodetype of
  406. ltn : getresflags:=F_B;
  407. lten : getresflags:=F_BE;
  408. gtn : getresflags:=F_A;
  409. gten : getresflags:=F_AE;
  410. else
  411. internalerror(2013120108);
  412. end;
  413. end;
  414. end;
  415. end;
  416. function tx86addnode.getfpuresflags : tresflags;
  417. begin
  418. if (nodetype=equaln) then
  419. result:=F_FE
  420. else if (nodetype=unequaln) then
  421. result:=F_FNE
  422. else if (nf_swapped in flags) then
  423. case nodetype of
  424. ltn : result:=F_FA;
  425. lten : result:=F_FAE;
  426. gtn : result:=F_FB;
  427. gten : result:=F_FBE;
  428. else
  429. internalerror(2014031402);
  430. end
  431. else
  432. case nodetype of
  433. ltn : result:=F_FB;
  434. lten : result:=F_FBE;
  435. gtn : result:=F_FA;
  436. gten : result:=F_FAE;
  437. else
  438. internalerror(2014031403);
  439. end;
  440. end;
  441. {*****************************************************************************
  442. AddSmallSet
  443. *****************************************************************************}
  444. {$ifndef i8086}
  445. procedure tx86addnode.second_addsmallset;
  446. var
  447. setbase : aint;
  448. opdef : tdef;
  449. opsize : TCGSize;
  450. op : TAsmOp;
  451. extra_not,
  452. noswap : boolean;
  453. all_member_optimization:boolean;
  454. begin
  455. pass_left_right;
  456. noswap:=false;
  457. extra_not:=false;
  458. all_member_optimization:=false;
  459. opdef:=resultdef;
  460. opsize:=int_cgsize(opdef.size);
  461. if (left.resultdef.typ=setdef) then
  462. setbase:=tsetdef(left.resultdef).setbase
  463. else
  464. setbase:=tsetdef(right.resultdef).setbase;
  465. case nodetype of
  466. addn :
  467. begin
  468. { adding elements is not commutative }
  469. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  470. swapleftright;
  471. { are we adding set elements ? }
  472. if right.nodetype=setelementn then
  473. begin
  474. { no range support for smallsets! }
  475. if assigned(tsetelementnode(right).right) then
  476. internalerror(43244);
  477. { btsb isn't supported }
  478. if opsize=OS_8 then
  479. begin
  480. opsize:=OS_32;
  481. opdef:=u32inttype;
  482. end;
  483. { bts requires both elements to be registers }
  484. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  485. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  486. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,opdef,right.location,setbase);
  487. op:=A_BTS;
  488. noswap:=true;
  489. end
  490. else
  491. op:=A_OR;
  492. end;
  493. symdifn :
  494. op:=A_XOR;
  495. muln :
  496. op:=A_AND;
  497. subn :
  498. begin
  499. op:=A_AND;
  500. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  501. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  502. all_member_optimization:=true;
  503. if (not(nf_swapped in flags)) and
  504. (right.location.loc=LOC_CONSTANT) then
  505. right.location.value := not(right.location.value)
  506. else if (nf_swapped in flags) and
  507. (left.location.loc=LOC_CONSTANT) then
  508. left.location.value := not(left.location.value)
  509. else
  510. extra_not:=true;
  511. end;
  512. xorn :
  513. op:=A_XOR;
  514. orn :
  515. op:=A_OR;
  516. andn :
  517. op:=A_AND;
  518. else
  519. internalerror(2003042215);
  520. end;
  521. if all_member_optimization then
  522. begin
  523. {A set expression [0..31]-x can be implemented with a simple NOT.}
  524. if nf_swapped in flags then
  525. begin
  526. { newly swapped also set swapped flag }
  527. location_swap(left.location,right.location);
  528. toggleflag(nf_swapped);
  529. end;
  530. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,false);
  531. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  532. location:=right.location;
  533. end
  534. else
  535. begin
  536. { can we use the BMI1 instruction andn? }
  537. if (op=A_AND) and extra_not and (CPUX86_HAS_BMI1 in cpu_capabilities[current_settings.cputype]) and
  538. (resultdef.size in [4{$ifdef x86_64},8{$endif x86_64}]) then
  539. begin
  540. location_reset(location,LOC_REGISTER,left.location.size);
  541. location.register:=cg.getintregister(current_asmdata.currAsmList,left.location.size);
  542. if nf_swapped in flags then
  543. begin
  544. location_swap(left.location,right.location);
  545. toggleflag(nf_swapped);
  546. end;
  547. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,true);
  548. if not(left.location.loc in [LOC_CREGISTER,LOC_REGISTER,LOC_CREFERENCE,LOC_REFERENCE]) then
  549. hlcg.location_force_reg(current_asmdata.currAsmList,left.location,left.resultdef,opdef,true);
  550. case left.location.loc of
  551. LOC_CREGISTER,LOC_REGISTER:
  552. emit_reg_reg_reg(A_ANDN,TCGSize2Opsize[opsize],left.location.register,right.location.register,location.register);
  553. LOC_CREFERENCE,LOC_REFERENCE:
  554. emit_ref_reg_reg(A_ANDN,TCGSize2Opsize[opsize],left.location.reference,right.location.register,location.register);
  555. else
  556. Internalerror(2018040201);
  557. end;
  558. end
  559. else
  560. begin
  561. { left must be a register }
  562. left_must_be_reg(opdef,opsize,noswap);
  563. emit_generic_code(op,opsize,true,extra_not,false);
  564. location_freetemp(current_asmdata.CurrAsmList,right.location);
  565. { left is always a register and contains the result }
  566. location:=left.location;
  567. end;
  568. end;
  569. { fix the changed opsize we did above because of the missing btsb }
  570. if opsize<>int_cgsize(resultdef.size) then
  571. hlcg.location_force_reg(current_asmdata.CurrAsmList,location,opdef,cgsize_orddef(int_cgsize(resultdef.size)),false);
  572. end;
  573. {$endif not i8086}
  574. procedure tx86addnode.second_cmpsmallset;
  575. var
  576. opdef : tdef;
  577. opsize : TCGSize;
  578. op : TAsmOp;
  579. begin
  580. pass_left_right;
  581. opdef:=left.resultdef;
  582. opsize:=int_cgsize(opdef.size);
  583. case nodetype of
  584. equaln,
  585. unequaln :
  586. op:=A_CMP;
  587. lten,gten:
  588. begin
  589. if (not(nf_swapped in flags) and (nodetype = lten)) or
  590. ((nf_swapped in flags) and (nodetype = gten)) then
  591. swapleftright;
  592. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  593. emit_op_right_left(A_AND,opsize);
  594. op:=A_CMP;
  595. { warning: ugly hack, we need a JE so change the node to equaln }
  596. nodetype:=equaln;
  597. end;
  598. else
  599. internalerror(2003042204);
  600. end;
  601. { left must be a register }
  602. left_must_be_reg(opdef,opsize,false);
  603. emit_generic_code(op,opsize,true,false,false);
  604. location_freetemp(current_asmdata.CurrAsmList,right.location);
  605. location_freetemp(current_asmdata.CurrAsmList,left.location);
  606. location_reset(location,LOC_FLAGS,OS_NO);
  607. location.resflags:=getresflags(true);
  608. end;
  609. {*****************************************************************************
  610. AddMMX
  611. *****************************************************************************}
  612. {$ifdef SUPPORT_MMX}
  613. procedure tx86addnode.second_opmmx;
  614. var
  615. op : TAsmOp;
  616. cmpop : boolean;
  617. mmxbase : tmmxtype;
  618. hreg,
  619. hregister : tregister;
  620. begin
  621. pass_left_right;
  622. cmpop:=false;
  623. op:=A_NOP;
  624. mmxbase:=mmx_type(left.resultdef);
  625. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  626. case nodetype of
  627. addn :
  628. begin
  629. if (cs_mmx_saturation in current_settings.localswitches) then
  630. begin
  631. case mmxbase of
  632. mmxs8bit:
  633. op:=A_PADDSB;
  634. mmxu8bit:
  635. op:=A_PADDUSB;
  636. mmxs16bit,mmxfixed16:
  637. op:=A_PADDSW;
  638. mmxu16bit:
  639. op:=A_PADDUSW;
  640. else
  641. ;
  642. end;
  643. end
  644. else
  645. begin
  646. case mmxbase of
  647. mmxs8bit,mmxu8bit:
  648. op:=A_PADDB;
  649. mmxs16bit,mmxu16bit,mmxfixed16:
  650. op:=A_PADDW;
  651. mmxs32bit,mmxu32bit:
  652. op:=A_PADDD;
  653. else
  654. ;
  655. end;
  656. end;
  657. end;
  658. muln :
  659. begin
  660. case mmxbase of
  661. mmxs16bit,mmxu16bit:
  662. op:=A_PMULLW;
  663. mmxfixed16:
  664. op:=A_PMULHW;
  665. else
  666. ;
  667. end;
  668. end;
  669. subn :
  670. begin
  671. if (cs_mmx_saturation in current_settings.localswitches) then
  672. begin
  673. case mmxbase of
  674. mmxs8bit:
  675. op:=A_PSUBSB;
  676. mmxu8bit:
  677. op:=A_PSUBUSB;
  678. mmxs16bit,mmxfixed16:
  679. op:=A_PSUBSB;
  680. mmxu16bit:
  681. op:=A_PSUBUSW;
  682. else
  683. ;
  684. end;
  685. end
  686. else
  687. begin
  688. case mmxbase of
  689. mmxs8bit,mmxu8bit:
  690. op:=A_PSUBB;
  691. mmxs16bit,mmxu16bit,mmxfixed16:
  692. op:=A_PSUBW;
  693. mmxs32bit,mmxu32bit:
  694. op:=A_PSUBD;
  695. else
  696. ;
  697. end;
  698. end;
  699. end;
  700. xorn:
  701. op:=A_PXOR;
  702. orn:
  703. op:=A_POR;
  704. andn:
  705. op:=A_PAND;
  706. else
  707. internalerror(2003042214);
  708. end;
  709. if op = A_NOP then
  710. internalerror(201408201);
  711. { left and right no register? }
  712. { then one must be demanded }
  713. if (left.location.loc<>LOC_MMXREGISTER) then
  714. begin
  715. if (right.location.loc=LOC_MMXREGISTER) then
  716. begin
  717. location_swap(left.location,right.location);
  718. toggleflag(nf_swapped);
  719. end
  720. else
  721. begin
  722. { register variable ? }
  723. if (left.location.loc=LOC_CMMXREGISTER) then
  724. begin
  725. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  726. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  727. end
  728. else
  729. begin
  730. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  731. internalerror(200203245);
  732. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  733. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  734. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  735. end;
  736. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  737. left.location.register:=hregister;
  738. end;
  739. end;
  740. { at this point, left.location.loc should be LOC_MMXREGISTER }
  741. if right.location.loc<>LOC_MMXREGISTER then
  742. begin
  743. if (nodetype=subn) and (nf_swapped in flags) then
  744. begin
  745. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  746. if right.location.loc=LOC_CMMXREGISTER then
  747. begin
  748. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  749. emit_reg_reg(op,S_NO,left.location.register,hreg);
  750. end
  751. else
  752. begin
  753. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  754. internalerror(2002032412);
  755. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  756. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  757. emit_reg_reg(op,S_NO,left.location.register,hreg);
  758. end;
  759. location.register:=hreg;
  760. end
  761. else
  762. begin
  763. if (right.location.loc=LOC_CMMXREGISTER) then
  764. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  765. else
  766. begin
  767. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  768. internalerror(200203246);
  769. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  770. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  771. end;
  772. location.register:=left.location.register;
  773. end;
  774. end
  775. else
  776. begin
  777. { right.location=LOC_MMXREGISTER }
  778. if (nodetype=subn) and (nf_swapped in flags) then
  779. begin
  780. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  781. location_swap(left.location,right.location);
  782. toggleflag(nf_swapped);
  783. end
  784. else
  785. begin
  786. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  787. end;
  788. location.register:=left.location.register;
  789. end;
  790. location_freetemp(current_asmdata.CurrAsmList,right.location);
  791. if cmpop then
  792. location_freetemp(current_asmdata.CurrAsmList,left.location);
  793. end;
  794. {$endif SUPPORT_MMX}
  795. {*****************************************************************************
  796. AddFloat
  797. *****************************************************************************}
  798. procedure tx86addnode.second_addfloatsse;
  799. var
  800. op : topcg;
  801. sqr_sum : boolean;
  802. tmp : tnode;
  803. begin
  804. sqr_sum:=false;
  805. if (current_settings.fputype>=fpu_sse3) and
  806. use_vectorfpu(resultdef) and
  807. (nodetype in [addn,subn]) and
  808. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  809. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  810. begin
  811. sqr_sum:=true;
  812. tmp:=tinlinenode(left).left;
  813. tinlinenode(left).left:=nil;
  814. left.free;
  815. left:=tmp;
  816. tmp:=tinlinenode(right).left;
  817. tinlinenode(right).left:=nil;
  818. right.free;
  819. right:=tmp;
  820. end;
  821. pass_left_right;
  822. { fpu operands are always in reversed order on the stack }
  823. if (left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) and (right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
  824. toggleflag(nf_swapped);
  825. if (nf_swapped in flags) then
  826. { can't use swapleftright if both are on the fpu stack, since then }
  827. { both are "R_ST" -> nothing would change -> manually switch }
  828. if (left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) and
  829. (right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
  830. emit_none(A_FXCH,S_NO)
  831. else
  832. swapleftright;
  833. case nodetype of
  834. addn :
  835. op:=OP_ADD;
  836. muln :
  837. op:=OP_MUL;
  838. subn :
  839. op:=OP_SUB;
  840. slashn :
  841. op:=OP_DIV;
  842. else
  843. internalerror(200312231);
  844. end;
  845. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  846. if sqr_sum then
  847. begin
  848. if nf_swapped in flags then
  849. swapleftright;
  850. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  851. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  852. location:=left.location;
  853. if is_double(resultdef) then
  854. begin
  855. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  856. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  857. case nodetype of
  858. addn:
  859. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  860. subn:
  861. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  862. else
  863. internalerror(201108162);
  864. end;
  865. end
  866. else
  867. begin
  868. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  869. { ensure that bits 64..127 contain valid values }
  870. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  871. { the data is now in bits 0..32 and 64..95 }
  872. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  873. case nodetype of
  874. addn:
  875. begin
  876. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  877. end;
  878. subn:
  879. begin
  880. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  881. end;
  882. else
  883. internalerror(201108163);
  884. end;
  885. end
  886. end
  887. { we can use only right as left operand if the operation is commutative }
  888. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  889. begin
  890. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  891. cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.location.size,location.size,right.location.register,location.register,mms_movescalar);
  892. { force floating point reg. location to be written to memory,
  893. we don't force it to mm register because writing to memory
  894. allows probably shorter code because there is no direct fpu->mm register
  895. copy instruction
  896. }
  897. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  898. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  899. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  900. if left.location.loc=LOC_REFERENCE then
  901. tg.ungetiftemp(current_asmdata.CurrAsmList,left.location.reference);
  902. end
  903. else
  904. begin
  905. if nf_swapped in flags then
  906. swapleftright;
  907. { force floating point reg. location to be written to memory,
  908. we don't force it to mm register because writing to memory
  909. allows probably shorter code because there is no direct fpu->mm register
  910. copy instruction
  911. }
  912. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  913. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  914. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  915. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  916. if left.location.loc=LOC_REFERENCE then
  917. tg.ungetiftemp(current_asmdata.CurrAsmList,left.location.reference);
  918. { force floating point reg. location to be written to memory,
  919. we don't force it to mm register because writing to memory
  920. allows probably shorter code because there is no direct fpu->mm register
  921. copy instruction
  922. }
  923. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  924. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  925. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  926. if right.location.loc=LOC_REFERENCE then
  927. tg.ungetiftemp(current_asmdata.CurrAsmList,right.location.reference);
  928. end;
  929. end;
  930. procedure tx86addnode.second_addfloatavx;
  931. var
  932. op : topcg;
  933. sqr_sum : boolean;
  934. {$ifdef dummy}
  935. tmp : tnode;
  936. {$endif dummy}
  937. begin
  938. sqr_sum:=false;
  939. {$ifdef dummy}
  940. if (current_settings.fputype>=fpu_sse3) and
  941. use_vectorfpu(resultdef) and
  942. (nodetype in [addn,subn]) and
  943. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  944. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  945. begin
  946. sqr_sum:=true;
  947. tmp:=tinlinenode(left).left;
  948. tinlinenode(left).left:=nil;
  949. left.free;
  950. left:=tmp;
  951. tmp:=tinlinenode(right).left;
  952. tinlinenode(right).left:=nil;
  953. right.free;
  954. right:=tmp;
  955. end;
  956. {$endif dummy}
  957. pass_left_right;
  958. { fpu operands are always in reversed order on the stack }
  959. if (left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) and (right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
  960. toggleflag(nf_swapped);
  961. if (nf_swapped in flags) then
  962. { can't use swapleftright if both are on the fpu stack, since then }
  963. { both are "R_ST" -> nothing would change -> manually switch }
  964. if (left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) and
  965. (right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
  966. emit_none(A_FXCH,S_NO)
  967. else
  968. swapleftright;
  969. case nodetype of
  970. addn :
  971. op:=OP_ADD;
  972. muln :
  973. op:=OP_MUL;
  974. subn :
  975. op:=OP_SUB;
  976. slashn :
  977. op:=OP_DIV;
  978. else
  979. internalerror(2003122303);
  980. end;
  981. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  982. if sqr_sum then
  983. begin
  984. if nf_swapped in flags then
  985. swapleftright;
  986. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  987. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  988. location:=left.location;
  989. if is_double(resultdef) then
  990. begin
  991. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  992. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  993. case nodetype of
  994. addn:
  995. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  996. subn:
  997. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  998. else
  999. internalerror(2011081601);
  1000. end;
  1001. end
  1002. else
  1003. begin
  1004. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  1005. { ensure that bits 64..127 contain valid values }
  1006. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  1007. { the data is now in bits 0..32 and 64..95 }
  1008. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  1009. case nodetype of
  1010. addn:
  1011. begin
  1012. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  1013. end;
  1014. subn:
  1015. begin
  1016. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  1017. end;
  1018. else
  1019. internalerror(2011081604);
  1020. end;
  1021. end
  1022. end
  1023. { left*2 ? }
  1024. else if (nodetype=muln) and is_constrealnode(right) and is_number_float(trealconstnode(right).value_real) and (trealconstnode(right).value_real=2) then
  1025. begin
  1026. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  1027. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1028. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  1029. left.location.register,
  1030. left.location.register,
  1031. location.register,
  1032. mms_movescalar);
  1033. end
  1034. { right*2 ? }
  1035. else if (nodetype=muln) and is_constrealnode(left) and is_number_float(trealconstnode(left).value_real) and (trealconstnode(left).value_real=2) then
  1036. begin
  1037. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,right.location.size);
  1038. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  1039. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  1040. right.location.register,
  1041. right.location.register,
  1042. location.register,
  1043. mms_movescalar);
  1044. end
  1045. { we can use only right as left operand if the operation is commutative }
  1046. else if (right.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) and (op in [OP_ADD,OP_MUL]) then
  1047. begin
  1048. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  1049. { force floating point reg. location to be written to memory,
  1050. we don't force it to mm register because writing to memory
  1051. allows probably shorter code because there is no direct fpu->mm register
  1052. copy instruction
  1053. }
  1054. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1055. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  1056. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  1057. left.location,
  1058. right.location.register,
  1059. location.register,
  1060. mms_movescalar);
  1061. end
  1062. else
  1063. begin
  1064. if (nf_swapped in flags) then
  1065. swapleftright;
  1066. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1067. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  1068. { force floating point reg. location to be written to memory,
  1069. we don't force it to mm register because writing to memory
  1070. allows probably shorter code because there is no direct fpu->mm register
  1071. copy instruction
  1072. }
  1073. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1074. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1075. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  1076. right.location,
  1077. left.location.register,
  1078. location.register,
  1079. mms_movescalar);
  1080. end;
  1081. end;
  1082. function tx86addnode.pass_1: tnode;
  1083. begin
  1084. { on x86, we do not support fpu registers, so in case of operations using the x87, it
  1085. is normally useful, not to put the operands into registers which would be mm register }
  1086. if ((left.resultdef.typ=floatdef) or (right.resultdef.typ=floatdef)) and
  1087. (not(use_vectorfpu(left.resultdef)) and not(use_vectorfpu(right.resultdef)) and
  1088. not(use_vectorfpu(resultdef))) then
  1089. begin
  1090. make_not_regable(left,[ra_addr_regable]);
  1091. make_not_regable(right,[ra_addr_regable]);
  1092. end;
  1093. Result:=inherited pass_1;
  1094. { correct expectloc, it does not matter of Result is set as another pass_1 is run on it
  1095. which will fix that one }
  1096. if use_vectorfpu(resultdef) then
  1097. expectloc:=LOC_MMREGISTER;
  1098. end;
  1099. function tx86addnode.use_fma : boolean;
  1100. begin
  1101. {$ifndef i8086}
  1102. { test if the result stays in an xmm register, fiddeling with fpu registers and fma makes no sense }
  1103. Result:=use_vectorfpu(resultdef) and
  1104. ((fpu_capabilities[current_settings.fputype]*[FPUX86_HAS_FMA,FPUX86_HAS_FMA4])<>[]);
  1105. {$else i8086}
  1106. Result:=inherited use_fma;
  1107. {$endif i8086}
  1108. end;
  1109. procedure tx86addnode.second_cmpfloatvector;
  1110. var
  1111. op : tasmop;
  1112. const
  1113. ops_single: array[boolean] of tasmop = (A_COMISS,A_VCOMISS);
  1114. ops_double: array[boolean] of tasmop = (A_COMISD,A_VCOMISD);
  1115. begin
  1116. if is_single(left.resultdef) then
  1117. op:=ops_single[UseAVX]
  1118. else if is_double(left.resultdef) then
  1119. op:=ops_double[UseAVX]
  1120. else
  1121. internalerror(200402222);
  1122. pass_left_right;
  1123. { fpu operands are always in reversed order on the stack }
  1124. if (left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) and (right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
  1125. toggleflag(nf_swapped);
  1126. location_reset(location,LOC_FLAGS,OS_NO);
  1127. { Direct move fpu->mm register is not possible, so force any fpu operands to
  1128. memory (not to mm registers because one of the memory locations can be used
  1129. directly in compare instruction, yielding shorter code) }
  1130. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1131. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  1132. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1133. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1134. if (right.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1135. begin
  1136. case left.location.loc of
  1137. LOC_REFERENCE,LOC_CREFERENCE:
  1138. begin
  1139. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  1140. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  1141. end;
  1142. LOC_MMREGISTER,LOC_CMMREGISTER:
  1143. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  1144. else
  1145. internalerror(200402221);
  1146. end;
  1147. toggleflag(nf_swapped);
  1148. end
  1149. else
  1150. begin
  1151. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1152. case right.location.loc of
  1153. LOC_REFERENCE,LOC_CREFERENCE:
  1154. begin
  1155. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  1156. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  1157. end;
  1158. LOC_MMREGISTER,LOC_CMMREGISTER:
  1159. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  1160. else
  1161. internalerror(200402223);
  1162. end;
  1163. end;
  1164. location.resflags:=getfpuresflags;
  1165. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1166. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1167. end;
  1168. procedure tx86addnode.second_opvector;
  1169. var
  1170. op : topcg;
  1171. begin
  1172. pass_left_right;
  1173. if (nf_swapped in flags) then
  1174. swapleftright;
  1175. case nodetype of
  1176. addn :
  1177. op:=OP_ADD;
  1178. muln :
  1179. op:=OP_MUL;
  1180. subn :
  1181. op:=OP_SUB;
  1182. slashn :
  1183. op:=OP_DIV;
  1184. else
  1185. internalerror(200610071);
  1186. end;
  1187. if fits_in_mm_register(left.resultdef) then
  1188. begin
  1189. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  1190. { we can use only right as left operand if the operation is commutative }
  1191. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  1192. begin
  1193. if UseAVX then
  1194. begin
  1195. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,OS_VECTOR);
  1196. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,right.location.register,location.register,nil);
  1197. end
  1198. else
  1199. begin
  1200. location.register:=right.location.register;
  1201. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  1202. end;
  1203. end
  1204. else
  1205. begin
  1206. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  1207. if UseAVX then
  1208. begin
  1209. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,OS_VECTOR);
  1210. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,
  1211. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,left.location.register,location.register,nil);
  1212. end
  1213. else
  1214. begin
  1215. location.register:=left.location.register;
  1216. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  1217. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  1218. end;
  1219. end;
  1220. end
  1221. else
  1222. begin
  1223. { not yet supported }
  1224. internalerror(200610072);
  1225. end
  1226. end;
  1227. procedure tx86addnode.second_addfloat;
  1228. const
  1229. ops_add: array[boolean] of TAsmOp = (A_FADDP,A_FADD);
  1230. ops_mul: array[boolean] of TAsmOp = (A_FMULP,A_FMUL);
  1231. ops_sub: array[boolean] of TAsmOp = (A_FSUBP,A_FSUB);
  1232. ops_rsub: array[boolean] of TAsmOp = (A_FSUBRP,A_FSUBR);
  1233. ops_div: array[boolean] of TAsmOp = (A_FDIVP,A_FDIV);
  1234. ops_rdiv: array[boolean] of TAsmOp = (A_FDIVRP,A_FDIVR);
  1235. var
  1236. op : TAsmOp;
  1237. refnode, hp: tnode;
  1238. hasref : boolean;
  1239. begin
  1240. if use_vectorfpu(resultdef) then
  1241. begin
  1242. if UseAVX then
  1243. second_addfloatavx
  1244. else
  1245. second_addfloatsse;
  1246. exit;
  1247. end;
  1248. { can the operation do the conversion? }
  1249. if (left.nodetype=typeconvn) and (is_double(ttypeconvnode(left).left.resultdef) or is_single(ttypeconvnode(left).left.resultdef)) then
  1250. begin
  1251. hp:=left;
  1252. left:=ttypeconvnode(left).left;
  1253. ttypeconvnode(hp).left:=nil;
  1254. hp.Free;
  1255. end;
  1256. if (right.nodetype=typeconvn) and (is_double(ttypeconvnode(right).left.resultdef) or is_single(ttypeconvnode(right).left.resultdef)) then
  1257. begin
  1258. hp:=right;
  1259. right:=ttypeconvnode(right).left;
  1260. ttypeconvnode(hp).left:=nil;
  1261. hp.Free;
  1262. end;
  1263. pass_left_right;
  1264. prepare_x87_locations(refnode);
  1265. hasref:=assigned(refnode);
  1266. case nodetype of
  1267. addn :
  1268. op:=ops_add[hasref];
  1269. muln :
  1270. op:=ops_mul[hasref];
  1271. subn :
  1272. if (nf_swapped in flags) then
  1273. op:=ops_rsub[hasref]
  1274. else
  1275. op:=ops_sub[hasref];
  1276. slashn :
  1277. if (nf_swapped in flags) then
  1278. op:=ops_rdiv[hasref]
  1279. else
  1280. op:=ops_div[hasref];
  1281. else
  1282. internalerror(2003042203);
  1283. end;
  1284. if hasref then
  1285. emit_ref(op,tcgsize2opsize[refnode.location.size],refnode.location.reference)
  1286. else
  1287. begin
  1288. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  1289. tcgx86(cg).dec_fpu_stack;
  1290. end;
  1291. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  1292. location.register:=NR_ST;
  1293. end;
  1294. procedure tx86addnode.second_cmpfloat;
  1295. {$ifdef i8086}
  1296. var
  1297. tmpref: treference;
  1298. {$endif i8086}
  1299. begin
  1300. if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
  1301. begin
  1302. second_cmpfloatvector;
  1303. exit;
  1304. end;
  1305. pass_left_right;
  1306. force_left_and_right_fpureg;
  1307. {$ifndef x86_64}
  1308. if current_settings.cputype<cpu_Pentium2 then
  1309. begin
  1310. emit_none(A_FCOMPP,S_NO);
  1311. tcgx86(cg).dec_fpu_stack;
  1312. tcgx86(cg).dec_fpu_stack;
  1313. { load fpu flags }
  1314. {$ifdef i8086}
  1315. if current_settings.cputype < cpu_286 then
  1316. begin
  1317. tg.gettemp(current_asmdata.CurrAsmList,2,2,tt_normal,tmpref);
  1318. emit_ref(A_FSTSW,S_NO,tmpref);
  1319. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1320. inc(tmpref.offset);
  1321. emit_ref_reg(A_MOV,S_B,tmpref,NR_AH);
  1322. dec(tmpref.offset);
  1323. emit_none(A_SAHF,S_NO);
  1324. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1325. tg.ungettemp(current_asmdata.CurrAsmList,tmpref);
  1326. end
  1327. else
  1328. {$endif i8086}
  1329. begin
  1330. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1331. emit_reg(A_FNSTSW,S_NO,NR_AX);
  1332. emit_none(A_SAHF,S_NO);
  1333. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1334. end;
  1335. if cs_fpu_fwait in current_settings.localswitches then
  1336. current_asmdata.CurrAsmList.concat(Taicpu.Op_none(A_FWAIT,S_NO));
  1337. end
  1338. else
  1339. {$endif x86_64}
  1340. begin
  1341. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  1342. { fcomip pops only one fpu register }
  1343. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  1344. tcgx86(cg).dec_fpu_stack;
  1345. tcgx86(cg).dec_fpu_stack;
  1346. end;
  1347. location_reset(location,LOC_FLAGS,OS_NO);
  1348. location.resflags:=getfpuresflags;
  1349. end;
  1350. {*****************************************************************************
  1351. Add64bit
  1352. *****************************************************************************}
  1353. procedure tx86addnode.second_add64bit;
  1354. begin
  1355. {$ifdef cpu64bitalu}
  1356. second_addordinal;
  1357. {$else cpu64bitalu}
  1358. { must be implemented separate }
  1359. internalerror(200402042);
  1360. {$endif cpu64bitalu}
  1361. end;
  1362. procedure tx86addnode.second_cmp64bit;
  1363. begin
  1364. {$ifdef cpu64bitalu}
  1365. second_cmpordinal;
  1366. {$else cpu64bitalu}
  1367. { must be implemented separate }
  1368. internalerror(200402043);
  1369. {$endif cpu64bitalu}
  1370. end;
  1371. {*****************************************************************************
  1372. AddOrdinal
  1373. *****************************************************************************}
  1374. procedure tx86addnode.second_addordinal;
  1375. var
  1376. opsize : tcgsize;
  1377. unsigned : boolean;
  1378. cgop : topcg;
  1379. checkoverflow : Boolean;
  1380. ovloc : tlocation;
  1381. tmpreg : TRegister;
  1382. begin
  1383. { determine if the comparison will be unsigned }
  1384. unsigned:=not(is_signed(left.resultdef)) or
  1385. not(is_signed(right.resultdef));
  1386. { assume no overflow checking is require }
  1387. checkoverflow := false;
  1388. ovloc.loc:=LOC_VOID;
  1389. case nodetype of
  1390. addn:
  1391. begin
  1392. cgop:=OP_ADD;
  1393. checkoverflow:=true;
  1394. end;
  1395. xorn :
  1396. begin
  1397. cgop:=OP_XOR;
  1398. end;
  1399. orn :
  1400. begin
  1401. cgop:=OP_OR;
  1402. end;
  1403. andn:
  1404. begin
  1405. cgop:=OP_AND;
  1406. end;
  1407. muln:
  1408. begin
  1409. checkoverflow:=true;
  1410. if unsigned then
  1411. cgop:=OP_MUL
  1412. else
  1413. cgop:=OP_IMUL;
  1414. end;
  1415. subn :
  1416. begin
  1417. checkoverflow:=true;
  1418. cgop:=OP_SUB;
  1419. end;
  1420. else
  1421. internalerror(2015022501);
  1422. end;
  1423. checkoverflow:=
  1424. checkoverflow and
  1425. needoverflowcheck;
  1426. opsize:=def_cgsize(left.resultdef);
  1427. pass_left_right;
  1428. { do we have to allocate a register? If yes, then three opcode instructions are better, however for sub three op code instructions
  1429. make no sense if right is a reference }
  1430. if ((left.location.loc<>LOC_REGISTER) and (right.location.loc<>LOC_REGISTER) and
  1431. ((nodetype<>subn) or not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE])) and
  1432. { 3 op mul makes only sense if a constant is involed }
  1433. ((nodetype<>muln) or (left.location.loc=LOC_CONSTANT) or (right.location.loc=LOC_CONSTANT)
  1434. {$ifndef i8086}
  1435. or ((CPUX86_HAS_BMI2 in cpu_capabilities[current_settings.cputype]) and (not(needoverflowcheck))
  1436. )
  1437. {$endif i8086}
  1438. ) and
  1439. (not(nodetype in [orn,andn,xorn]))) or
  1440. ((nodetype=addn) and (left.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_CONSTANT]) and (right.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_CONSTANT])) then
  1441. begin
  1442. { allocate registers }
  1443. force_reg_left_right(false,true);
  1444. set_result_location_reg;
  1445. if nodetype<>subn then
  1446. begin
  1447. if checkoverflow then
  1448. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  1449. if (right.location.loc<>LOC_CONSTANT) then
  1450. hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,cgop,resultdef,
  1451. left.location.register,right.location.register,
  1452. location.register,checkoverflow,ovloc)
  1453. else
  1454. hlcg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,cgop,resultdef,
  1455. right.location.value,left.location.register,
  1456. location.register,checkoverflow,ovloc);
  1457. end
  1458. else { subtract is a special case since its not commutative }
  1459. begin
  1460. if (nf_swapped in flags) then
  1461. swapleftright;
  1462. if left.location.loc<>LOC_CONSTANT then
  1463. begin
  1464. if checkoverflow then
  1465. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  1466. if right.location.loc<>LOC_CONSTANT then
  1467. hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
  1468. right.location.register,left.location.register,
  1469. location.register,checkoverflow,ovloc)
  1470. else
  1471. hlcg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
  1472. right.location.value,left.location.register,
  1473. location.register,checkoverflow,ovloc);
  1474. end
  1475. else
  1476. begin
  1477. tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  1478. hlcg.a_load_const_reg(current_asmdata.CurrAsmList,resultdef,
  1479. left.location.value,tmpreg);
  1480. if checkoverflow then
  1481. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  1482. hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
  1483. right.location.register,tmpreg,location.register,checkoverflow,ovloc);
  1484. end;
  1485. end
  1486. end
  1487. else
  1488. begin
  1489. { at least one location should be a register, if yes, try to re-use it, so we can try two operand opcodes }
  1490. if left.location.loc<>LOC_REGISTER then
  1491. begin
  1492. if right.location.loc<>LOC_REGISTER then
  1493. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false)
  1494. else
  1495. begin
  1496. location_swap(left.location,right.location);
  1497. toggleflag(nf_swapped);
  1498. end;
  1499. end;
  1500. { at this point, left.location.loc should be LOC_REGISTER }
  1501. if right.location.loc=LOC_REGISTER then
  1502. begin
  1503. if checkoverflow then
  1504. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  1505. { when swapped another result register }
  1506. if (nodetype=subn) and (nf_swapped in flags) then
  1507. begin
  1508. cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,
  1509. left.location.register,right.location.register);
  1510. location_swap(left.location,right.location);
  1511. toggleflag(nf_swapped);
  1512. end
  1513. else
  1514. cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,
  1515. right.location.register,left.location.register);
  1516. end
  1517. else
  1518. begin
  1519. { right.location<>LOC_REGISTER }
  1520. if right.location.loc in [LOC_CSUBSETREF,LOC_CSUBSETREG,LOC_SUBSETREF,LOC_SUBSETREG] then
  1521. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,left.resultdef,true);
  1522. if (nodetype=subn) and (nf_swapped in flags) then
  1523. begin
  1524. tmpreg:=left.location.register;
  1525. left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1526. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,left.location.register);
  1527. if checkoverflow then
  1528. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  1529. cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,tmpreg,left.location.register);
  1530. end
  1531. else
  1532. begin
  1533. if checkoverflow then
  1534. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  1535. cg.a_op_loc_reg(current_asmdata.CurrAsmList,cgop,opsize,right.location,left.location.register);
  1536. end;
  1537. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1538. end;
  1539. location_copy(location,left.location);
  1540. end;
  1541. { emit overflow check if required }
  1542. if checkoverflow then
  1543. cg.g_overflowcheck_loc(current_asmdata.CurrAsmList,Location,resultdef,ovloc);
  1544. end;
  1545. procedure tx86addnode.second_addboolean;
  1546. begin
  1547. if (nodetype in [orn,andn]) and
  1548. (not(cs_full_boolean_eval in current_settings.localswitches) or
  1549. (nf_short_bool in flags)) then
  1550. inherited second_addboolean
  1551. else if is_64bit(left.resultdef) then
  1552. inherited
  1553. else
  1554. second_addordinal;
  1555. end;
  1556. procedure tx86addnode.second_cmpordinal;
  1557. var
  1558. opdef : tdef;
  1559. opsize : tcgsize;
  1560. unsigned : boolean;
  1561. begin
  1562. unsigned:=not(is_signed(left.resultdef)) or
  1563. not(is_signed(right.resultdef));
  1564. opdef:=left.resultdef;
  1565. opsize:=def_cgsize(opdef);
  1566. pass_left_right;
  1567. if (right.location.loc=LOC_CONSTANT) and
  1568. (left.location.loc in [LOC_REFERENCE, LOC_CREFERENCE])
  1569. {$ifdef x86_64}
  1570. and ((not (opsize in [OS_64,OS_S64])) or (
  1571. (right.location.value>=low(longint)) and (right.location.value<=high(longint))
  1572. ))
  1573. {$endif x86_64}
  1574. then
  1575. begin
  1576. emit_const_ref(A_CMP, TCGSize2Opsize[opsize], right.location.value, left.location.reference);
  1577. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1578. end
  1579. else
  1580. begin
  1581. left_must_be_reg(opdef,opsize,false);
  1582. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  1583. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1584. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1585. end;
  1586. location_reset(location,LOC_FLAGS,OS_NO);
  1587. location.resflags:=getresflags(unsigned);
  1588. end;
  1589. begin
  1590. caddnode:=tx86addnode;
  1591. end.