nx86add.pas 94 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301
  1. {
  2. Copyright (c) 2000-2002 by Florian Klaempfl
  3. Common code generation for add nodes on the i386 and x86
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86add;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. symtype,
  22. cgbase,
  23. cpubase,
  24. node,nadd,ncgadd;
  25. type
  26. tx86addnode = class(tcgaddnode)
  27. protected
  28. function getresflags(unsigned : boolean) : tresflags;
  29. function getfpuresflags : tresflags;
  30. procedure left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  31. procedure force_left_and_right_fpureg;
  32. procedure prepare_x87_locations(out refnode: tnode);
  33. procedure emit_op_right_left(op:TAsmOp;opsize:TCgSize;AllocFlags:boolean);
  34. procedure emit_generic_code(op:TAsmOp;opsize:TCgSize;unsigned,extra_not,mboverflow:boolean);
  35. procedure second_cmpfloatvector;
  36. procedure second_addfloatsse;
  37. procedure second_addfloatavx;
  38. public
  39. function pass_1 : tnode;override;
  40. function simplify(forinline : boolean) : tnode; override;
  41. function use_fma : boolean;override;
  42. procedure second_addfloat;override;
  43. {$ifndef i8086}
  44. procedure second_addsmallset;override;
  45. procedure second_addsmallsetelement;override;
  46. {$endif not i8086}
  47. procedure second_add64bit;override;
  48. procedure second_cmpfloat;override;
  49. procedure second_cmpsmallset;override;
  50. procedure second_cmp64bit;override;
  51. procedure second_cmpordinal;override;
  52. procedure second_addordinal;override;
  53. procedure second_addboolean;override;
  54. {$ifdef SUPPORT_MMX}
  55. procedure second_opmmx;override;
  56. {$endif SUPPORT_MMX}
  57. procedure second_opvector;override;
  58. end;
  59. implementation
  60. uses
  61. globtype,globals,
  62. verbose,cutils,compinnr,
  63. cpuinfo,
  64. aasmbase,aasmdata,aasmcpu,
  65. symconst,symdef,
  66. cgobj,hlcgobj,cgx86,cga,cgutils,
  67. tgobj,ncgutil,nutils,
  68. ncon,nset,ninl,ncnv,ncal,nmat,
  69. defutil,defcmp,constexp,
  70. pass_1,pass_2,htypechk;
  71. { Range check must be disabled explicitly as the code serves
  72. on three different architecture sizes }
  73. {$R-}
  74. {*****************************************************************************
  75. Helpers
  76. *****************************************************************************}
  77. procedure tx86addnode.emit_generic_code(op:TAsmOp;opsize:TCGSize;unsigned,extra_not,mboverflow:boolean);
  78. var
  79. power : longint;
  80. hl4 : tasmlabel;
  81. r : Tregister;
  82. href : treference;
  83. overflowcheck: boolean;
  84. comparison: boolean;
  85. begin
  86. overflowcheck:=needoverflowcheck;
  87. comparison:=
  88. (op=A_CMP) or (op=A_TEST) or (op=A_BT) or is_boolean(resultdef);
  89. { at this point, left.location.loc should be LOC_REGISTER }
  90. if right.location.loc=LOC_REGISTER then
  91. begin
  92. { right.location is a LOC_REGISTER }
  93. { when swapped another result register }
  94. if (nodetype=subn) and (nf_swapped in flags) then
  95. begin
  96. if extra_not then
  97. emit_reg(A_NOT,TCGSize2Opsize[opsize],left.location.register);
  98. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,right.location.register);
  99. { newly swapped also set swapped flag }
  100. location_swap(left.location,right.location);
  101. toggleflag(nf_swapped);
  102. end
  103. else
  104. begin
  105. if extra_not then
  106. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  107. if (op=A_ADD) or (op=A_OR) or (op=A_AND) or (op=A_XOR) or (op=A_IMUL) then
  108. location_swap(left.location,right.location);
  109. if comparison then
  110. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  111. emit_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register);
  112. end;
  113. end
  114. else
  115. begin
  116. { right.location is not a LOC_REGISTER }
  117. if (nodetype=subn) and (nf_swapped in flags) then
  118. begin
  119. if extra_not then
  120. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_NOT,opsize,left.location.register,left.location.register);
  121. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  122. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  123. if comparison then
  124. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  125. emit_reg_reg(op,TCGSize2Opsize[opsize],left.location.register,r);
  126. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,r,left.location.register);
  127. end
  128. else
  129. begin
  130. { Optimizations when right.location is a constant value }
  131. if (op=A_CMP) and
  132. (nodetype in [equaln,unequaln]) and
  133. (right.location.loc=LOC_CONSTANT) and
  134. (right.location.value=0) then
  135. begin
  136. { 'test $-1,%reg' is transformable into 'test $-1,spilltemp' if %reg needs
  137. spilling, while 'test %reg,%reg' still requires loading into register.
  138. If spilling is not necessary, it is changed back into 'test %reg,%reg' by
  139. peephole optimizer (this optimization is currently available only for i386). }
  140. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  141. {$ifdef i386}
  142. emit_const_reg(A_TEST,TCGSize2Opsize[opsize],aint(-1),left.location.register)
  143. {$else i386}
  144. emit_reg_reg(A_TEST,TCGSize2Opsize[opsize],left.location.register,left.location.register);
  145. {$endif i386}
  146. end
  147. else
  148. if (op=A_ADD) and
  149. (right.location.loc=LOC_CONSTANT) and
  150. (right.location.value=1) and
  151. not overflowcheck and
  152. UseIncDec then
  153. begin
  154. emit_reg(A_INC,TCGSize2Opsize[opsize],left.location.register);
  155. end
  156. else
  157. if (op=A_SUB) and
  158. (right.location.loc=LOC_CONSTANT) and
  159. (right.location.value=1) and
  160. not overflowcheck and
  161. UseIncDec then
  162. begin
  163. emit_reg(A_DEC,TCGSize2Opsize[opsize],left.location.register);
  164. end
  165. else
  166. if (op=A_IMUL) and
  167. (right.location.loc=LOC_CONSTANT) and
  168. (ispowerof2(int64(right.location.value),power)) and
  169. overflowcheck then
  170. begin
  171. emit_const_reg(A_SHL,TCGSize2Opsize[opsize],power,left.location.register);
  172. end
  173. else if (op=A_IMUL) and
  174. (right.location.loc=LOC_CONSTANT) and
  175. (right.location.value>1) and (ispowerof2(int64(right.location.value)-1,power)) and
  176. (power in [1..3]) and
  177. not overflowcheck then
  178. begin
  179. reference_reset_base(href,left.location.register,0,ctempposinvalid,0,[]);
  180. href.index:=left.location.register;
  181. href.scalefactor:=int64(right.location.value)-1;
  182. left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  183. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_LEA,TCgSize2OpSize[opsize],href,left.location.register));
  184. end
  185. else
  186. begin
  187. if extra_not then
  188. begin
  189. r:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  190. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,right.resultdef,cgsize_orddef(opsize),right.location,r);
  191. emit_reg(A_NOT,TCGSize2Opsize[opsize],r);
  192. if comparison or (mboverflow and overflowcheck) then
  193. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  194. emit_reg_reg(A_AND,TCGSize2Opsize[opsize],r,left.location.register);
  195. end
  196. else
  197. emit_op_right_left(op,opsize,comparison or (mboverflow and overflowcheck));
  198. end;
  199. end;
  200. end;
  201. { only in case of overflow operations }
  202. { produce overflow code }
  203. { we must put it here directly, because sign of operation }
  204. { is in unsigned VAR!! }
  205. if mboverflow then
  206. begin
  207. if overflowcheck then
  208. begin
  209. current_asmdata.getjumplabel(hl4);
  210. if unsigned then
  211. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_AE,hl4)
  212. else
  213. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl4);
  214. if not comparison then
  215. cg.a_reg_dealloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  216. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  217. cg.a_label(current_asmdata.CurrAsmList,hl4);
  218. end;
  219. end;
  220. end;
  221. procedure tx86addnode.left_must_be_reg(opdef: tdef; opsize:TCGSize;noswap:boolean);
  222. begin
  223. { left location is not a register? }
  224. if (left.location.loc<>LOC_REGISTER) then
  225. begin
  226. { if right is register then we can swap the locations }
  227. if (not noswap) and
  228. (right.location.loc=LOC_REGISTER) then
  229. begin
  230. location_swap(left.location,right.location);
  231. toggleflag(nf_swapped);
  232. end
  233. else if (not noswap) and
  234. (right.location.loc=LOC_CREGISTER) then
  235. begin
  236. location_swap(left.location,right.location);
  237. toggleflag(nf_swapped);
  238. { maybe we can reuse a constant register when the
  239. operation is a comparison that doesn't change the
  240. value of the register }
  241. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  242. location:=left.location;
  243. end
  244. else
  245. begin
  246. { maybe we can reuse a constant register when the
  247. operation is a comparison that doesn't change the
  248. value of the register }
  249. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,(nodetype in [ltn,lten,gtn,gten,equaln,unequaln]));
  250. end;
  251. end;
  252. if (right.location.loc<>LOC_CONSTANT) and
  253. (tcgsize2unsigned[right.location.size]<>tcgsize2unsigned[opsize]) then
  254. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  255. if (left.location.loc<>LOC_CONSTANT) and
  256. (tcgsize2unsigned[left.location.size]<>tcgsize2unsigned[opsize]) then
  257. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  258. end;
  259. procedure tx86addnode.force_left_and_right_fpureg;
  260. begin
  261. if (right.location.loc<>LOC_FPUREGISTER) then
  262. begin
  263. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  264. if (left.location.loc<>LOC_FPUREGISTER) then
  265. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  266. else
  267. { left was on the stack => swap }
  268. toggleflag(nf_swapped);
  269. end
  270. { the nominator in st0 }
  271. else if (left.location.loc<>LOC_FPUREGISTER) then
  272. begin
  273. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false)
  274. end
  275. else
  276. begin
  277. { fpu operands are always in the wrong order on the stack }
  278. toggleflag(nf_swapped);
  279. end;
  280. end;
  281. { Makes sides suitable for executing an x87 instruction:
  282. if either side is OS_F32/OS_F64-sized LOC_REFERENCE, it is returned in 'refnode'
  283. everything else is loaded to FPU stack. }
  284. procedure tx86addnode.prepare_x87_locations(out refnode: tnode);
  285. begin
  286. refnode:=nil;
  287. { later on, no mm registers are allowed, so transfer everything to memory here
  288. below it is loaded into an fpu register if neede }
  289. if left.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then
  290. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  291. if right.location.loc in [LOC_CMMREGISTER,LOC_MMREGISTER] then
  292. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  293. case ord(left.location.loc=LOC_FPUREGISTER)+ord(right.location.loc=LOC_FPUREGISTER) of
  294. 0:
  295. begin
  296. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,right.location,right.resultdef,false);
  297. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  298. InternalError(2013090803);
  299. if (left.location.size in [OS_F32,OS_F64]) then
  300. begin
  301. refnode:=left;
  302. toggleflag(nf_swapped);
  303. end
  304. else
  305. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  306. end;
  307. 1:
  308. begin { if left is on the stack then swap. }
  309. if (left.location.loc=LOC_FPUREGISTER) then
  310. refnode:=right
  311. else
  312. refnode:=left;
  313. if not(refnode.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  314. InternalError(2013090801);
  315. if not (refnode.location.size in [OS_F32,OS_F64]) then
  316. begin
  317. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,refnode.location,refnode.resultdef,false);
  318. if (refnode=right) then
  319. toggleflag(nf_swapped);
  320. refnode:=nil;
  321. end
  322. else
  323. begin
  324. if (refnode=left) then
  325. toggleflag(nf_swapped);
  326. end;
  327. end;
  328. 2: { fpu operands are always in the wrong order on the stack }
  329. toggleflag(nf_swapped);
  330. else
  331. InternalError(2013090802);
  332. end;
  333. end;
  334. procedure tx86addnode.emit_op_right_left(op:TAsmOp;opsize:TCgsize;AllocFlags:boolean);
  335. {$ifdef x86_64}
  336. var
  337. tmpreg : tregister;
  338. {$endif x86_64}
  339. begin
  340. if (right.location.loc in [LOC_CSUBSETREG,LOC_SUBSETREG,LOC_SUBSETREF,LOC_CSUBSETREF]) then
  341. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,right.resultdef,true);
  342. { left must be a register }
  343. case right.location.loc of
  344. LOC_REGISTER,
  345. LOC_CREGISTER :
  346. begin
  347. if AllocFlags then
  348. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  349. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],right.location.register,left.location.register));
  350. end;
  351. LOC_REFERENCE,
  352. LOC_CREFERENCE :
  353. begin
  354. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  355. if AllocFlags then
  356. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  357. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,TCGSize2Opsize[opsize],right.location.reference,left.location.register));
  358. end;
  359. LOC_CONSTANT :
  360. begin
  361. {$ifdef x86_64}
  362. { x86_64 only supports signed 32 bits constants directly }
  363. if (opsize in [OS_S64,OS_64]) and
  364. ((right.location.value<low(longint)) or (right.location.value>high(longint))) then
  365. begin
  366. tmpreg:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  367. cg.a_load_const_reg(current_asmdata.CurrAsmList,opsize,right.location.value,tmpreg);
  368. if AllocFlags then
  369. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  370. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,TCGSize2Opsize[opsize],tmpreg,left.location.register));
  371. end
  372. else
  373. {$endif x86_64}
  374. begin
  375. if AllocFlags then
  376. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  377. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(op,TCGSize2Opsize[opsize],right.location.value,left.location.register));
  378. end;
  379. end;
  380. else
  381. internalerror(200203232);
  382. end;
  383. end;
  384. function tx86addnode.getresflags(unsigned : boolean) : tresflags;
  385. begin
  386. case nodetype of
  387. equaln : getresflags:=F_E;
  388. unequaln : getresflags:=F_NE;
  389. else
  390. if not(unsigned) then
  391. begin
  392. if nf_swapped in flags then
  393. case nodetype of
  394. ltn : getresflags:=F_G;
  395. lten : getresflags:=F_GE;
  396. gtn : getresflags:=F_L;
  397. gten : getresflags:=F_LE;
  398. else
  399. internalerror(2013120105);
  400. end
  401. else
  402. case nodetype of
  403. ltn : getresflags:=F_L;
  404. lten : getresflags:=F_LE;
  405. gtn : getresflags:=F_G;
  406. gten : getresflags:=F_GE;
  407. else
  408. internalerror(2013120106);
  409. end;
  410. end
  411. else
  412. begin
  413. if nf_swapped in flags then
  414. case nodetype of
  415. ltn : getresflags:=F_A;
  416. lten : getresflags:=F_AE;
  417. gtn : getresflags:=F_B;
  418. gten : getresflags:=F_BE;
  419. else
  420. internalerror(2013120107);
  421. end
  422. else
  423. case nodetype of
  424. ltn : getresflags:=F_B;
  425. lten : getresflags:=F_BE;
  426. gtn : getresflags:=F_A;
  427. gten : getresflags:=F_AE;
  428. else
  429. internalerror(2013120108);
  430. end;
  431. end;
  432. end;
  433. end;
  434. function tx86addnode.getfpuresflags : tresflags;
  435. begin
  436. if (nodetype=equaln) then
  437. result:=F_FE
  438. else if (nodetype=unequaln) then
  439. result:=F_FNE
  440. else if (nf_swapped in flags) then
  441. case nodetype of
  442. ltn : result:=F_FA;
  443. lten : result:=F_FAE;
  444. gtn : result:=F_FB;
  445. gten : result:=F_FBE;
  446. else
  447. internalerror(2014031402);
  448. end
  449. else
  450. case nodetype of
  451. ltn : result:=F_FB;
  452. lten : result:=F_FBE;
  453. gtn : result:=F_FA;
  454. gten : result:=F_FAE;
  455. else
  456. internalerror(2014031403);
  457. end;
  458. end;
  459. {*****************************************************************************
  460. AddSmallSet
  461. *****************************************************************************}
  462. {$ifndef i8086}
  463. procedure tx86addnode.second_addsmallset;
  464. var
  465. setbase : aint;
  466. opdef : tdef;
  467. opsize : TCGSize;
  468. op : TAsmOp;
  469. extra_not,
  470. noswap : boolean;
  471. all_member_optimization:boolean;
  472. begin
  473. pass_left_right;
  474. noswap:=false;
  475. extra_not:=false;
  476. all_member_optimization:=false;
  477. opdef:=resultdef;
  478. opsize:=int_cgsize(opdef.size);
  479. if (left.resultdef.typ=setdef) then
  480. setbase:=tsetdef(left.resultdef).setbase
  481. else
  482. setbase:=tsetdef(right.resultdef).setbase;
  483. case nodetype of
  484. addn :
  485. begin
  486. { adding elements is not commutative }
  487. if (nf_swapped in flags) and (left.nodetype=setelementn) then
  488. swapleftright;
  489. { are we adding set elements ? }
  490. if right.nodetype=setelementn then
  491. begin
  492. { no range support for smallsets! }
  493. if assigned(tsetelementnode(right).right) then
  494. internalerror(43244);
  495. { btsb isn't supported }
  496. if opsize=OS_8 then
  497. begin
  498. opsize:=OS_32;
  499. opdef:=u32inttype;
  500. end;
  501. { bts requires both elements to be registers }
  502. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  503. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,opdef,true);
  504. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,opdef,right.location,setbase);
  505. op:=A_BTS;
  506. noswap:=true;
  507. end
  508. else
  509. op:=A_OR;
  510. end;
  511. symdifn :
  512. op:=A_XOR;
  513. muln :
  514. op:=A_AND;
  515. subn :
  516. begin
  517. op:=A_AND;
  518. if (not(nf_swapped in flags) and (left.location.loc=LOC_CONSTANT) and (left.location.value=-1)) or
  519. ((nf_swapped in flags) and (right.location.loc=LOC_CONSTANT) and (right.location.value=-1)) then
  520. all_member_optimization:=true;
  521. if (not(nf_swapped in flags)) and
  522. (right.location.loc=LOC_CONSTANT) then
  523. right.location.value := not(right.location.value)
  524. else if (nf_swapped in flags) and
  525. (left.location.loc=LOC_CONSTANT) then
  526. left.location.value := not(left.location.value)
  527. else
  528. extra_not:=true;
  529. end;
  530. xorn :
  531. op:=A_XOR;
  532. orn :
  533. op:=A_OR;
  534. andn :
  535. op:=A_AND;
  536. else
  537. internalerror(2003042215);
  538. end;
  539. if all_member_optimization then
  540. begin
  541. {A set expression [0..31]-x can be implemented with a simple NOT.}
  542. if nf_swapped in flags then
  543. begin
  544. { newly swapped also set swapped flag }
  545. location_swap(left.location,right.location);
  546. toggleflag(nf_swapped);
  547. end;
  548. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,false);
  549. emit_reg(A_NOT,TCGSize2Opsize[opsize],right.location.register);
  550. location:=right.location;
  551. end
  552. else
  553. begin
  554. { can we use the BMI1 instruction andn? }
  555. if (op=A_AND) and extra_not and (CPUX86_HAS_BMI1 in cpu_capabilities[current_settings.cputype]) and
  556. (resultdef.size in [4{$ifdef x86_64},8{$endif x86_64}]) then
  557. begin
  558. location_reset(location,LOC_REGISTER,left.location.size);
  559. location.register:=cg.getintregister(current_asmdata.currAsmList,left.location.size);
  560. if nf_swapped in flags then
  561. begin
  562. location_swap(left.location,right.location);
  563. toggleflag(nf_swapped);
  564. end;
  565. hlcg.location_force_reg(current_asmdata.currAsmList,right.location,right.resultdef,opdef,true);
  566. if not(left.location.loc in [LOC_CREGISTER,LOC_REGISTER,LOC_CREFERENCE,LOC_REFERENCE]) then
  567. hlcg.location_force_reg(current_asmdata.currAsmList,left.location,left.resultdef,opdef,true);
  568. case left.location.loc of
  569. LOC_CREGISTER,LOC_REGISTER:
  570. emit_reg_reg_reg(A_ANDN,TCGSize2Opsize[opsize],left.location.register,right.location.register,location.register);
  571. LOC_CREFERENCE,LOC_REFERENCE:
  572. emit_ref_reg_reg(A_ANDN,TCGSize2Opsize[opsize],left.location.reference,right.location.register,location.register);
  573. else
  574. Internalerror(2018040201);
  575. end;
  576. end
  577. else
  578. begin
  579. { left must be a register }
  580. left_must_be_reg(opdef,opsize,noswap);
  581. emit_generic_code(op,opsize,true,extra_not,false);
  582. location_freetemp(current_asmdata.CurrAsmList,right.location);
  583. { left is always a register and contains the result }
  584. location:=left.location;
  585. end;
  586. end;
  587. { fix the changed opsize we did above because of the missing btsb }
  588. if opsize<>int_cgsize(resultdef.size) then
  589. hlcg.location_force_reg(current_asmdata.CurrAsmList,location,opdef,cgsize_orddef(int_cgsize(resultdef.size)),false);
  590. end;
  591. procedure tx86addnode.second_addsmallsetelement;
  592. var
  593. setbase, mask: aint;
  594. begin
  595. if resultdef.size=1 then
  596. inherited second_addsmallsetelement
  597. else
  598. begin
  599. if nodetype<>addn then
  600. internalerror(2022090502);
  601. { no range support for smallsets }
  602. if assigned(tsetelementnode(right).right) then
  603. internalerror(2022090501);
  604. pass_left_right;
  605. { setelementn is a special case, it must be on right }
  606. if (nf_swapped in flags) and
  607. (left.nodetype=setelementn) then
  608. swapleftright;
  609. force_reg_left_right(false,false);
  610. set_result_location_reg;
  611. setbase:=tsetdef(left.resultdef).setbase;
  612. if (right.location.loc = LOC_CONSTANT) then
  613. begin
  614. mask:=aint(1 shl (right.location.value-setbase));
  615. hlcg.a_op_const_reg_reg(current_asmdata.CurrAsmList,OP_OR,resultdef,
  616. mask,left.location.register,location.register);
  617. end
  618. else
  619. begin
  620. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,resultdef,true);
  621. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,resultdef,right.location,setbase);
  622. if left.location.loc <> LOC_CONSTANT then
  623. hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,left.resultdef,resultdef,
  624. left.location.register,location.register)
  625. else
  626. hlcg.a_load_const_reg(current_asmdata.CurrAsmList,resultdef,
  627. left.location.value,location.register);
  628. emit_reg_reg(A_BTS,TCGSize2Opsize[def_cgsize(resultdef)],right.location.register,location.register);
  629. end;
  630. end;
  631. end;
  632. {$endif not i8086}
  633. procedure tx86addnode.second_cmpsmallset;
  634. var
  635. opdef : tdef;
  636. opsize : TCGSize;
  637. op : TAsmOp;
  638. begin
  639. pass_left_right;
  640. opdef:=left.resultdef;
  641. opsize:=int_cgsize(opdef.size);
  642. case nodetype of
  643. equaln,
  644. unequaln :
  645. op:=A_CMP;
  646. lten,gten:
  647. begin
  648. if (not(nf_swapped in flags) and (nodetype = lten)) or
  649. ((nf_swapped in flags) and (nodetype = gten)) then
  650. swapleftright;
  651. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,opdef,false);
  652. emit_op_right_left(A_AND,opsize,False);
  653. op:=A_CMP;
  654. { warning: ugly hack, we need a JE so change the node to equaln }
  655. nodetype:=equaln;
  656. end;
  657. else
  658. internalerror(2003042204);
  659. end;
  660. { left must be a register }
  661. left_must_be_reg(opdef,opsize,false);
  662. emit_generic_code(op,opsize,true,false,false);
  663. location_freetemp(current_asmdata.CurrAsmList,right.location);
  664. location_freetemp(current_asmdata.CurrAsmList,left.location);
  665. location_reset(location,LOC_FLAGS,OS_NO);
  666. location.resflags:=getresflags(true);
  667. end;
  668. {*****************************************************************************
  669. AddMMX
  670. *****************************************************************************}
  671. {$ifdef SUPPORT_MMX}
  672. procedure tx86addnode.second_opmmx;
  673. var
  674. op : TAsmOp;
  675. cmpop : boolean;
  676. mmxbase : tmmxtype;
  677. hreg,
  678. hregister : tregister;
  679. begin
  680. pass_left_right;
  681. cmpop:=false;
  682. op:=A_NOP;
  683. mmxbase:=mmx_type(left.resultdef);
  684. location_reset(location,LOC_MMXREGISTER,def_cgsize(resultdef));
  685. case nodetype of
  686. addn :
  687. begin
  688. if (cs_mmx_saturation in current_settings.localswitches) then
  689. begin
  690. case mmxbase of
  691. mmxs8bit:
  692. op:=A_PADDSB;
  693. mmxu8bit:
  694. op:=A_PADDUSB;
  695. mmxs16bit,mmxfixed16:
  696. op:=A_PADDSW;
  697. mmxu16bit:
  698. op:=A_PADDUSW;
  699. else
  700. ;
  701. end;
  702. end
  703. else
  704. begin
  705. case mmxbase of
  706. mmxs8bit,mmxu8bit:
  707. op:=A_PADDB;
  708. mmxs16bit,mmxu16bit,mmxfixed16:
  709. op:=A_PADDW;
  710. mmxs32bit,mmxu32bit:
  711. op:=A_PADDD;
  712. else
  713. ;
  714. end;
  715. end;
  716. end;
  717. muln :
  718. begin
  719. case mmxbase of
  720. mmxs16bit,mmxu16bit:
  721. op:=A_PMULLW;
  722. mmxfixed16:
  723. op:=A_PMULHW;
  724. else
  725. ;
  726. end;
  727. end;
  728. subn :
  729. begin
  730. if (cs_mmx_saturation in current_settings.localswitches) then
  731. begin
  732. case mmxbase of
  733. mmxs8bit:
  734. op:=A_PSUBSB;
  735. mmxu8bit:
  736. op:=A_PSUBUSB;
  737. mmxs16bit,mmxfixed16:
  738. op:=A_PSUBSB;
  739. mmxu16bit:
  740. op:=A_PSUBUSW;
  741. else
  742. ;
  743. end;
  744. end
  745. else
  746. begin
  747. case mmxbase of
  748. mmxs8bit,mmxu8bit:
  749. op:=A_PSUBB;
  750. mmxs16bit,mmxu16bit,mmxfixed16:
  751. op:=A_PSUBW;
  752. mmxs32bit,mmxu32bit:
  753. op:=A_PSUBD;
  754. else
  755. ;
  756. end;
  757. end;
  758. end;
  759. xorn:
  760. op:=A_PXOR;
  761. orn:
  762. op:=A_POR;
  763. andn:
  764. op:=A_PAND;
  765. else
  766. internalerror(2003042214);
  767. end;
  768. if op = A_NOP then
  769. internalerror(201408201);
  770. { left and right no register? }
  771. { then one must be demanded }
  772. if (left.location.loc<>LOC_MMXREGISTER) then
  773. begin
  774. if (right.location.loc=LOC_MMXREGISTER) then
  775. begin
  776. location_swap(left.location,right.location);
  777. toggleflag(nf_swapped);
  778. end
  779. else
  780. begin
  781. { register variable ? }
  782. if (left.location.loc=LOC_CMMXREGISTER) then
  783. begin
  784. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  785. emit_reg_reg(A_MOVQ,S_NO,left.location.register,hregister);
  786. end
  787. else
  788. begin
  789. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  790. internalerror(200203245);
  791. hregister:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  792. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  793. emit_ref_reg(A_MOVQ,S_NO,left.location.reference,hregister);
  794. end;
  795. location_reset(left.location,LOC_MMXREGISTER,OS_NO);
  796. left.location.register:=hregister;
  797. end;
  798. end;
  799. { at this point, left.location.loc should be LOC_MMXREGISTER }
  800. if right.location.loc<>LOC_MMXREGISTER then
  801. begin
  802. if (nodetype=subn) and (nf_swapped in flags) then
  803. begin
  804. hreg:=tcgx86(cg).getmmxregister(current_asmdata.CurrAsmList);
  805. if right.location.loc=LOC_CMMXREGISTER then
  806. begin
  807. emit_reg_reg(A_MOVQ,S_NO,right.location.register,hreg);
  808. emit_reg_reg(op,S_NO,left.location.register,hreg);
  809. end
  810. else
  811. begin
  812. if not(left.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  813. internalerror(2002032412);
  814. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  815. emit_ref_reg(A_MOVQ,S_NO,right.location.reference,hreg);
  816. emit_reg_reg(op,S_NO,left.location.register,hreg);
  817. end;
  818. location.register:=hreg;
  819. end
  820. else
  821. begin
  822. if (right.location.loc=LOC_CMMXREGISTER) then
  823. emit_reg_reg(op,S_NO,right.location.register,left.location.register)
  824. else
  825. begin
  826. if not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) then
  827. internalerror(200203246);
  828. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  829. emit_ref_reg(op,S_NO,right.location.reference,left.location.register);
  830. end;
  831. location.register:=left.location.register;
  832. end;
  833. end
  834. else
  835. begin
  836. { right.location=LOC_MMXREGISTER }
  837. if (nodetype=subn) and (nf_swapped in flags) then
  838. begin
  839. emit_reg_reg(op,S_NO,left.location.register,right.location.register);
  840. location_swap(left.location,right.location);
  841. toggleflag(nf_swapped);
  842. end
  843. else
  844. begin
  845. emit_reg_reg(op,S_NO,right.location.register,left.location.register);
  846. end;
  847. location.register:=left.location.register;
  848. end;
  849. location_freetemp(current_asmdata.CurrAsmList,right.location);
  850. if cmpop then
  851. location_freetemp(current_asmdata.CurrAsmList,left.location);
  852. end;
  853. {$endif SUPPORT_MMX}
  854. {*****************************************************************************
  855. AddFloat
  856. *****************************************************************************}
  857. procedure tx86addnode.second_addfloatsse;
  858. var
  859. op : topcg;
  860. sqr_sum : boolean;
  861. tmp : tnode;
  862. begin
  863. sqr_sum:=false;
  864. if (current_settings.fputype>=fpu_sse3) and
  865. use_vectorfpu(resultdef) and
  866. (nodetype in [addn,subn]) and
  867. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  868. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  869. begin
  870. sqr_sum:=true;
  871. tmp:=tinlinenode(left).left;
  872. tinlinenode(left).left:=nil;
  873. left.free;
  874. left:=tmp;
  875. tmp:=tinlinenode(right).left;
  876. tinlinenode(right).left:=nil;
  877. right.free;
  878. right:=tmp;
  879. end;
  880. pass_left_right;
  881. { fpu operands are always in reversed order on the stack }
  882. if (left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) and (right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
  883. toggleflag(nf_swapped);
  884. if (nf_swapped in flags) then
  885. { can't use swapleftright if both are on the fpu stack, since then }
  886. { both are "R_ST" -> nothing would change -> manually switch }
  887. if (left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) and
  888. (right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
  889. emit_none(A_FXCH,S_NO)
  890. else
  891. swapleftright;
  892. case nodetype of
  893. addn :
  894. op:=OP_ADD;
  895. muln :
  896. op:=OP_MUL;
  897. subn :
  898. op:=OP_SUB;
  899. slashn :
  900. op:=OP_DIV;
  901. else
  902. internalerror(200312231);
  903. end;
  904. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  905. if sqr_sum then
  906. begin
  907. if nf_swapped in flags then
  908. swapleftright;
  909. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  910. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  911. location:=left.location;
  912. if is_double(resultdef) then
  913. begin
  914. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  915. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  916. case nodetype of
  917. addn:
  918. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  919. subn:
  920. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  921. else
  922. internalerror(201108162);
  923. end;
  924. end
  925. else
  926. begin
  927. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  928. { ensure that bits 64..127 contain valid values }
  929. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  930. { the data is now in bits 0..32 and 64..95 }
  931. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  932. case nodetype of
  933. addn:
  934. begin
  935. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  936. end;
  937. subn:
  938. begin
  939. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  940. end;
  941. else
  942. internalerror(201108163);
  943. end;
  944. end
  945. end
  946. { we can use only right as left operand if the operation is commutative }
  947. else if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  948. begin
  949. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  950. cg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,right.location.size,location.size,right.location.register,location.register,mms_movescalar);
  951. { force floating point reg. location to be written to memory,
  952. we don't force it to mm register because writing to memory
  953. allows probably shorter code because there is no direct fpu->mm register
  954. copy instruction
  955. }
  956. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  957. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  958. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,left.location,location.register,mms_movescalar);
  959. if left.location.loc=LOC_REFERENCE then
  960. tg.ungetiftemp(current_asmdata.CurrAsmList,left.location.reference);
  961. end
  962. else
  963. begin
  964. if nf_swapped in flags then
  965. swapleftright;
  966. { force floating point reg. location to be written to memory,
  967. we don't force it to mm register because writing to memory
  968. allows probably shorter code because there is no direct fpu->mm register
  969. copy instruction
  970. }
  971. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  972. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  973. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  974. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  975. if left.location.loc=LOC_REFERENCE then
  976. tg.ungetiftemp(current_asmdata.CurrAsmList,left.location.reference);
  977. { force floating point reg. location to be written to memory,
  978. we don't force it to mm register because writing to memory
  979. allows probably shorter code because there is no direct fpu->mm register
  980. copy instruction
  981. }
  982. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  983. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  984. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,location.size,right.location,location.register,mms_movescalar);
  985. if right.location.loc=LOC_REFERENCE then
  986. tg.ungetiftemp(current_asmdata.CurrAsmList,right.location.reference);
  987. end;
  988. end;
  989. procedure tx86addnode.second_addfloatavx;
  990. var
  991. op : topcg;
  992. sqr_sum : boolean;
  993. {$ifdef dummy}
  994. tmp : tnode;
  995. {$endif dummy}
  996. begin
  997. sqr_sum:=false;
  998. {$ifdef dummy}
  999. if (current_settings.fputype>=fpu_sse3) and
  1000. use_vectorfpu(resultdef) and
  1001. (nodetype in [addn,subn]) and
  1002. (left.nodetype=inlinen) and (tinlinenode(left).inlinenumber=in_sqr_real) and
  1003. (right.nodetype=inlinen) and (tinlinenode(right).inlinenumber=in_sqr_real) then
  1004. begin
  1005. sqr_sum:=true;
  1006. tmp:=tinlinenode(left).left;
  1007. tinlinenode(left).left:=nil;
  1008. left.free;
  1009. left:=tmp;
  1010. tmp:=tinlinenode(right).left;
  1011. tinlinenode(right).left:=nil;
  1012. right.free;
  1013. right:=tmp;
  1014. end;
  1015. {$endif dummy}
  1016. pass_left_right;
  1017. { fpu operands are always in reversed order on the stack }
  1018. if (left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) and (right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
  1019. toggleflag(nf_swapped);
  1020. if (nf_swapped in flags) then
  1021. { can't use swapleftright if both are on the fpu stack, since then }
  1022. { both are "R_ST" -> nothing would change -> manually switch }
  1023. if (left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) and
  1024. (right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
  1025. emit_none(A_FXCH,S_NO)
  1026. else
  1027. swapleftright;
  1028. case nodetype of
  1029. addn :
  1030. op:=OP_ADD;
  1031. muln :
  1032. op:=OP_MUL;
  1033. subn :
  1034. op:=OP_SUB;
  1035. slashn :
  1036. op:=OP_DIV;
  1037. else
  1038. internalerror(2003122303);
  1039. end;
  1040. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  1041. if sqr_sum then
  1042. begin
  1043. if nf_swapped in flags then
  1044. swapleftright;
  1045. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,false);
  1046. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  1047. location:=left.location;
  1048. if is_double(resultdef) then
  1049. begin
  1050. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,right.location.register,location.register));
  1051. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPD,S_NO,location.register,location.register));
  1052. case nodetype of
  1053. addn:
  1054. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPD,S_NO,location.register,location.register));
  1055. subn:
  1056. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPD,S_NO,location.register,location.register));
  1057. else
  1058. internalerror(2011081601);
  1059. end;
  1060. end
  1061. else
  1062. begin
  1063. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_UNPCKLPS,S_NO,right.location.register,location.register));
  1064. { ensure that bits 64..127 contain valid values }
  1065. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_SHUFPD,S_NO,%00,location.register,location.register));
  1066. { the data is now in bits 0..32 and 64..95 }
  1067. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MULPS,S_NO,location.register,location.register));
  1068. case nodetype of
  1069. addn:
  1070. begin
  1071. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HADDPS,S_NO,location.register,location.register));
  1072. end;
  1073. subn:
  1074. begin
  1075. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_HSUBPS,S_NO,location.register,location.register));
  1076. end;
  1077. else
  1078. internalerror(2011081604);
  1079. end;
  1080. end
  1081. end
  1082. { left*2 ? }
  1083. else if (nodetype=muln) and is_constrealnode(right) and is_number_float(trealconstnode(right).value_real) and (trealconstnode(right).value_real=2) then
  1084. begin
  1085. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  1086. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1087. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  1088. left.location.register,
  1089. left.location.register,
  1090. location.register,
  1091. mms_movescalar);
  1092. end
  1093. { right*2 ? }
  1094. else if (nodetype=muln) and is_constrealnode(left) and is_number_float(trealconstnode(left).value_real) and (trealconstnode(left).value_real=2) then
  1095. begin
  1096. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,right.location.size);
  1097. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,right.location,right.resultdef,true);
  1098. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_ADD,location.size,
  1099. right.location.register,
  1100. right.location.register,
  1101. location.register,
  1102. mms_movescalar);
  1103. end
  1104. { we can use only right as left operand if the operation is commutative }
  1105. else if (right.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) and (op in [OP_ADD,OP_MUL]) then
  1106. begin
  1107. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  1108. { force floating point reg. location to be written to memory,
  1109. we don't force it to mm register because writing to memory
  1110. allows probably shorter code because there is no direct fpu->mm register
  1111. copy instruction
  1112. }
  1113. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1114. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  1115. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  1116. left.location,
  1117. right.location.register,
  1118. location.register,
  1119. mms_movescalar);
  1120. end
  1121. else
  1122. begin
  1123. if (nf_swapped in flags) then
  1124. swapleftright;
  1125. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1126. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,left.location.size);
  1127. { force floating point reg. location to be written to memory,
  1128. we don't force it to mm register because writing to memory
  1129. allows probably shorter code because there is no direct fpu->mm register
  1130. copy instruction
  1131. }
  1132. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1133. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1134. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,location.size,
  1135. right.location,
  1136. left.location.register,
  1137. location.register,
  1138. mms_movescalar);
  1139. end;
  1140. end;
  1141. function tx86addnode.pass_1: tnode;
  1142. begin
  1143. { only pass_1 might set the resultdef as it could be set to nil by some previous
  1144. code transformation. As we need a valid left/right.resultdef later on, ensure
  1145. a valid result def is set, see also issue #40727 }
  1146. if not(assigned(left.resultdef)) then
  1147. typecheckpass(left);
  1148. if not(assigned(right.resultdef)) then
  1149. typecheckpass(right);
  1150. { on x86, we do not support fpu registers, so in case of operations using the x87, it
  1151. is normally useful, not to put the operands into registers which would be mm register
  1152. this should be called before pass_1 so we have a proper expectloc }
  1153. if ((left.resultdef.typ=floatdef) or (right.resultdef.typ=floatdef)) and
  1154. (not(use_vectorfpu(left.resultdef)) and not(use_vectorfpu(right.resultdef)) and
  1155. not(use_vectorfpu(resultdef))) then
  1156. begin
  1157. make_not_regable(left,[ra_addr_regable]);
  1158. make_not_regable(right,[ra_addr_regable]);
  1159. end;
  1160. Result:=inherited pass_1;
  1161. { correct expectloc, it does not matter of Result is set as another pass_1 is run on it
  1162. which will fix that one }
  1163. if use_vectorfpu(resultdef) then
  1164. expectloc:=LOC_MMREGISTER;
  1165. end;
  1166. function tx86addnode.simplify(forinline : boolean) : tnode;
  1167. var
  1168. t, m, ThisNode, ConstNode: TNode;
  1169. lt,rt, ThisType: TNodeType;
  1170. ThisDef: TDef;
  1171. DoOptimisation: Boolean;
  1172. reciprocal, comparison, divisor: AWord;
  1173. shift, N: Byte;
  1174. begin
  1175. { Load into local variables to reduce the number of pointer deallocations }
  1176. rt:=right.nodetype;
  1177. lt:=left.nodetype;
  1178. DoOptimisation:=False;
  1179. {$if defined(cpu64bitalu) or defined(cpu32bitalu) or defined(cpu16bitalu)}
  1180. if (cs_opt_level1 in current_settings.optimizerswitches) and
  1181. { The presence of overflow checks tends to cause internal errors with the multiplication nodes }
  1182. not (cs_check_overflow in current_settings.localswitches) and
  1183. (nodetype in [equaln,unequaln]) then
  1184. begin
  1185. if (lt=modn) and (rt=ordconstn) and (TOrdConstNode(right).value.uvalue=0) then
  1186. begin
  1187. t:=left;
  1188. m:=right;
  1189. end
  1190. else if (rt=modn) and (lt=ordconstn) and (TOrdConstNode(left).value.uvalue=0) then
  1191. begin
  1192. t:=right;
  1193. m:=left;
  1194. end
  1195. else
  1196. begin
  1197. t:=nil;
  1198. m:=nil;
  1199. end;
  1200. if Assigned(t) and (TModDivNode(t).right.nodetype=ordconstn) and
  1201. {$ifndef cpu64bitalu}
  1202. { Converting Int64 and QWord division doesn't work under i386 }
  1203. {$ifndef cpu32bitalu}
  1204. (TModDivNode(t).resultdef.size < 4) and
  1205. {$else cpu32bitalu}
  1206. (TModDivNode(t).resultdef.size < 8) and
  1207. {$endif cpu32bitalu}
  1208. {$endif cpu64bitalu}
  1209. (TOrdConstNode(TModDivNode(t).right).value>=3) then
  1210. begin
  1211. divisor:=TOrdConstNode(TModDivNode(t).right).value.uvalue;
  1212. { Exclude powers of 2, as there are more efficient ways to handle those }
  1213. if PopCnt(divisor)>1 then
  1214. begin
  1215. if is_signed(TModDivNode(t).left.resultdef) then
  1216. begin
  1217. { See pages 250-251 of Hacker's Delight, Second Edition
  1218. for an explanation and proof of the algorithm, but
  1219. essentially, we're doing the following:
  1220. - Convert the divisor d to the form k.2^b if it isn't
  1221. already odd (in which case, k = d and b = 0)
  1222. - Calculate r, the multiplicative inverse of k modulo 2^N
  1223. - Calculate c = floor(2^(N-1) / k) & -(2^b)
  1224. - Let q = ((n * r) + c) ror b (mod 2^N)
  1225. - Repurpose c to equal floor(2c / 2^b) = c shr (b - 1)
  1226. (some RISC platforms will benefit from doing this over
  1227. precalculating the modified constant. For x86,
  1228. it's better with the constant precalculated for
  1229. 32-bit and under, but for 64-bit, use SHR. )
  1230. - If q is below or equal to c, then (n mod d) = 0
  1231. }
  1232. while True do
  1233. begin
  1234. ThisNode:=TModDivNode(t).left;
  1235. case ThisNode.nodetype of
  1236. typeconvn:
  1237. begin
  1238. ThisDef:=TTypeConvNode(ThisNode).left.resultdef;
  1239. { See if we can simplify things to a smaller ordinal to
  1240. reduce code size and increase speed }
  1241. if is_signed(ThisDef) and
  1242. is_integer(ThisDef) and
  1243. { Byte-sized multiplications can cause problems }
  1244. (ThisDef.size>=2) and
  1245. { Make sure the divisor is in range }
  1246. (divisor>=TOrdDef(ThisDef).low) and
  1247. (divisor<=TOrdDef(ThisDef).high) then
  1248. begin
  1249. TOrdConstNode(TModDivNode(t).right).resultdef:=ThisDef;
  1250. TOrdConstNode(m).resultdef:=ThisDef;
  1251. TModDivNode(t).resultdef:=ThisDef;
  1252. { Destroy the typeconv node }
  1253. TModDivNode(t).left:=TTypeConvNode(ThisNode).left;
  1254. TTypeConvNode(ThisNode).left:=nil;
  1255. ThisNode.Free;
  1256. Continue;
  1257. end;
  1258. end;
  1259. ordconstn:
  1260. begin
  1261. { Just simplify into a constant }
  1262. Result:=inherited simplify(forinline);
  1263. Exit;
  1264. end;
  1265. else
  1266. ;
  1267. end;
  1268. DoOptimisation:=True;
  1269. Break;
  1270. end;
  1271. if DoOptimisation then
  1272. begin
  1273. ThisDef:=TModDivNode(t).left.resultdef;
  1274. if nodetype = equaln then
  1275. ThisType:=lten
  1276. else
  1277. ThisType:=gtn;
  1278. N:=ThisDef.size*8;
  1279. calc_mul_inverse(N, TOrdConstNode(TModDivNode(t).right).value.uvalue, reciprocal, shift);
  1280. { Construct the following node tree for odd divisors:
  1281. <lten> (for equaln) or <gtn> (for notequaln)
  1282. <addn>
  1283. <muln>
  1284. <typeconv signed-to-unsigned>
  1285. <numerator node (TModDivNode(t).left)>
  1286. <reciprocal constant>
  1287. <comparison constant (effectively a signed shift)>
  1288. <comparison constant * 2>
  1289. For even divisors, convert them to the form k.2^b, with
  1290. odd k, then construct the following:
  1291. <lten> (for equaln) or <gtn> (for notequaln)
  1292. <ror>
  1293. (b)
  1294. <addn>
  1295. <muln>
  1296. <typeconv signed-to-unsigned>
  1297. <numerator node (TModDivNode(t).left)>
  1298. <reciprocal constant>
  1299. <comparison constant (effectively a signed shift)>
  1300. <comparison constant shr (b - 1)>
  1301. }
  1302. ThisNode:=ctypeconvnode.create_internal(TModDivNode(t).left, ThisDef);
  1303. TTypeConvNode(ThisNode).convtype:=tc_int_2_int;
  1304. ThisDef:=get_unsigned_inttype(ThisDef);
  1305. ThisNode.resultdef:=ThisDef;
  1306. TModDivNode(t).left:=nil;
  1307. ConstNode:=cordconstnode.create(reciprocal, ThisDef, False);
  1308. ConstNode.resultdef:=ThisDef;
  1309. ThisNode:=caddnode.create_internal(muln, ThisNode, ConstNode);
  1310. ThisNode.resultdef:=ThisDef;
  1311. {$push}
  1312. {$warnings off}
  1313. if shift>0 then
  1314. comparison:=((aWord(1) shl ((N-1) and (SizeOf(aWord)*8-1))) div (divisor shr shift)) and -(1 shl shift)
  1315. else
  1316. comparison:=(aWord(1) shl ((N-1) and (SizeOf(aWord)*8-1))) div divisor;
  1317. {$pop}
  1318. ConstNode:=cordconstnode.create(comparison, ThisDef, False);
  1319. ConstNode.resultdef:=ThisDef;
  1320. ThisNode:=caddnode.create_internal(addn, ThisNode, ConstNode);
  1321. ThisNode.resultdef:=ThisDef;
  1322. if shift>0 then
  1323. begin
  1324. ConstNode:=cordconstnode.create(shift, u8inttype, False);
  1325. ConstNode.resultdef:=u8inttype;
  1326. ThisNode:=cinlinenode.createintern(in_ror_x_y,false,
  1327. ccallparanode.create(ConstNode,
  1328. ccallparanode.create(ThisNode, nil)));
  1329. ThisNode.resultdef:=ThisDef;
  1330. ConstNode:=cordconstnode.create(comparison shr (shift - 1), ThisDef, False);
  1331. end
  1332. else
  1333. ConstNode:=cordconstnode.create(comparison*2, ThisDef, False);
  1334. ConstNode.resultdef:=ThisDef;
  1335. Result:=CAddNode.create_internal(ThisType, ThisNode, ConstNode);
  1336. Result.resultdef:=resultdef;
  1337. Exit;
  1338. end;
  1339. end
  1340. else
  1341. begin
  1342. { For bit length N, convert "(x mod d) = 0" or "(x mod d) <> 0", where
  1343. d is an odd-numbered integer constant, to "(x * r) <= m", where
  1344. dr = 1 (mod 2^N) and m = floor(2^N / d).
  1345. If d is even, convert to the form k.2^b, where k is odd, then
  1346. convert to "(x * r) ror b <= m", where kr = 1 (mod 2^N) and
  1347. m = floor(2^N / d) = floor(2^(N-b) / k) }
  1348. while True do
  1349. begin
  1350. ThisNode:=TModDivNode(t).left;
  1351. case ThisNode.nodetype of
  1352. typeconvn:
  1353. begin
  1354. ThisDef:=TTypeConvNode(ThisNode).left.resultdef;
  1355. { See if we can simplify things to a smaller ordinal to
  1356. reduce code size and increase speed }
  1357. if not is_signed(ThisDef) and
  1358. is_integer(ThisDef) and
  1359. { Byte-sized multiplications can cause problems }
  1360. (ThisDef.size>=2) and
  1361. { Make sure the divisor is in range }
  1362. (divisor>=TOrdDef(ThisDef).low) and
  1363. (divisor<=TOrdDef(ThisDef).high) then
  1364. begin
  1365. TOrdConstNode(TModDivNode(t).right).resultdef:=ThisDef;
  1366. TOrdConstNode(m).resultdef:=ThisDef;
  1367. TModDivNode(t).resultdef:=ThisDef;
  1368. { Destroy the typeconv node }
  1369. TModDivNode(t).left:=TTypeConvNode(ThisNode).left;
  1370. TTypeConvNode(ThisNode).left:=nil;
  1371. ThisNode.Free;
  1372. Continue;
  1373. end;
  1374. end;
  1375. ordconstn:
  1376. begin
  1377. { Just simplify into a constant }
  1378. Result:=inherited simplify(forinline);
  1379. Exit;
  1380. end;
  1381. else
  1382. ;
  1383. end;
  1384. DoOptimisation:=True;
  1385. Break;
  1386. end;
  1387. if DoOptimisation then
  1388. begin
  1389. ThisDef:=TModDivNode(t).left.resultdef;
  1390. { Construct the following node tree for odd divisors:
  1391. <lten> (for equaln) or <gtn> (for notequaln)
  1392. <muln>
  1393. <numerator node (TModDivNode(t).left)>
  1394. <reciprocal constant>
  1395. (2^N / divisor)
  1396. For even divisors, convert them to the form k.2^b, with
  1397. odd k, then construct the following:
  1398. <lten> (for equaln) or <gtn> (for notequaln)
  1399. <ror>
  1400. (b)
  1401. <muln>
  1402. <numerator node (TModDivNode(t).left)>
  1403. <reciprocal constant>
  1404. (2^N / divisor)
  1405. }
  1406. if nodetype=equaln then
  1407. ThisType:=lten
  1408. else
  1409. ThisType:=gtn;
  1410. N:=ThisDef.size*8;
  1411. calc_mul_inverse(N, TOrdConstNode(TModDivNode(t).right).value.uvalue, reciprocal, shift);
  1412. ConstNode:=cordconstnode.create(reciprocal, ThisDef, False);
  1413. ConstNode.resultdef:=ThisDef;
  1414. ThisNode:=caddnode.create_internal(muln, TModDivNode(t).left, ConstNode);
  1415. ThisNode.resultdef:=ThisDef;
  1416. TModDivNode(t).left:=nil;
  1417. if shift>0 then
  1418. begin
  1419. ConstNode:=cordconstnode.create(shift, u8inttype, False);
  1420. ConstNode.resultdef:=u8inttype;
  1421. ThisNode:=cinlinenode.createintern(in_ror_x_y,false,
  1422. ccallparanode.create(ConstNode,
  1423. ccallparanode.create(ThisNode, nil)));
  1424. ThisNode.resultdef:=ThisDef;
  1425. comparison:=(aWord(1) shl ((N-shift) and (SizeOf(aWord)*8-1))) div (divisor shr shift);
  1426. end
  1427. else
  1428. begin
  1429. {$push}
  1430. {$warnings off}
  1431. { Because 2^N and divisor are relatively prime,
  1432. floor(2^N / divisor) = floor((2^N - 1) / divisor) }
  1433. comparison:=(aWord(not 0) shr (((SizeOf(aWord)*8)-N) and (SizeOf(aWord)*8-1))) div divisor;
  1434. {$pop}
  1435. end;
  1436. ConstNode:=cordconstnode.create(comparison, ThisDef, False);
  1437. ConstNode.resultdef:=ThisDef;
  1438. Result:=CAddNode.create_internal(ThisType, ThisNode, ConstNode);
  1439. Result.resultdef:=resultdef;
  1440. Exit;
  1441. end;
  1442. end;
  1443. end;
  1444. end;
  1445. end;
  1446. {$ifend defined(cpu64bitalu) or defined(cpu32bitalu) or defined(cpu16bitalu)}
  1447. Result:=inherited simplify(forinline);
  1448. end;
  1449. function tx86addnode.use_fma : boolean;
  1450. begin
  1451. {$ifndef i8086}
  1452. { test if the result stays in an xmm register, fiddeling with fpu registers and fma makes no sense }
  1453. Result:=use_vectorfpu(resultdef) and
  1454. ((fpu_capabilities[current_settings.fputype]*[FPUX86_HAS_FMA,FPUX86_HAS_FMA4])<>[]);
  1455. {$else i8086}
  1456. Result:=inherited use_fma;
  1457. {$endif i8086}
  1458. end;
  1459. procedure tx86addnode.second_cmpfloatvector;
  1460. var
  1461. op : tasmop;
  1462. const
  1463. ops_single: array[boolean] of tasmop = (A_COMISS,A_VCOMISS);
  1464. ops_double: array[boolean] of tasmop = (A_COMISD,A_VCOMISD);
  1465. begin
  1466. if is_single(left.resultdef) then
  1467. op:=ops_single[UseAVX]
  1468. else if is_double(left.resultdef) then
  1469. op:=ops_double[UseAVX]
  1470. else
  1471. internalerror(200402222);
  1472. pass_left_right;
  1473. { fpu operands are always in reversed order on the stack }
  1474. if (left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) and (right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER]) then
  1475. toggleflag(nf_swapped);
  1476. location_reset(location,LOC_FLAGS,OS_NO);
  1477. { Direct move fpu->mm register is not possible, so force any fpu operands to
  1478. memory (not to mm registers because one of the memory locations can be used
  1479. directly in compare instruction, yielding shorter code) }
  1480. if left.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1481. hlcg.location_force_mem(current_asmdata.CurrAsmList,left.location,left.resultdef);
  1482. if right.location.loc in [LOC_FPUREGISTER,LOC_CFPUREGISTER] then
  1483. hlcg.location_force_mem(current_asmdata.CurrAsmList,right.location,right.resultdef);
  1484. if (right.location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1485. begin
  1486. case left.location.loc of
  1487. LOC_REFERENCE,LOC_CREFERENCE:
  1488. begin
  1489. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,left.location.reference);
  1490. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,left.location.reference,right.location.register));
  1491. end;
  1492. LOC_MMREGISTER,LOC_CMMREGISTER:
  1493. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,left.location.register,right.location.register));
  1494. else
  1495. internalerror(200402221);
  1496. end;
  1497. toggleflag(nf_swapped);
  1498. end
  1499. else
  1500. begin
  1501. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1502. case right.location.loc of
  1503. LOC_REFERENCE,LOC_CREFERENCE:
  1504. begin
  1505. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList,right.location.reference);
  1506. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(op,S_NO,right.location.reference,left.location.register));
  1507. end;
  1508. LOC_MMREGISTER,LOC_CMMREGISTER:
  1509. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(op,S_NO,right.location.register,left.location.register));
  1510. else
  1511. internalerror(200402223);
  1512. end;
  1513. end;
  1514. location.resflags:=getfpuresflags;
  1515. location_freetemp(current_asmdata.CurrAsmList,left.location);
  1516. location_freetemp(current_asmdata.CurrAsmList,right.location);
  1517. end;
  1518. procedure tx86addnode.second_opvector;
  1519. var
  1520. op : topcg;
  1521. begin
  1522. pass_left_right;
  1523. if (nf_swapped in flags) then
  1524. swapleftright;
  1525. case nodetype of
  1526. addn :
  1527. op:=OP_ADD;
  1528. muln :
  1529. op:=OP_MUL;
  1530. subn :
  1531. op:=OP_SUB;
  1532. slashn :
  1533. op:=OP_DIV;
  1534. else
  1535. internalerror(200610071);
  1536. end;
  1537. if fits_in_mm_register(left.resultdef) then
  1538. begin
  1539. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  1540. { we can use only right as left operand if the operation is commutative }
  1541. if (right.location.loc=LOC_MMREGISTER) and (op in [OP_ADD,OP_MUL]) then
  1542. begin
  1543. if UseAVX then
  1544. begin
  1545. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,OS_VECTOR);
  1546. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,right.location.register,location.register,nil);
  1547. end
  1548. else
  1549. begin
  1550. location.register:=right.location.register;
  1551. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,tfloat2tcgsize[tfloatdef(left.resultdef).floattype],left.location,location.register,nil);
  1552. end;
  1553. end
  1554. else
  1555. begin
  1556. location_force_mmreg(current_asmdata.CurrAsmList,left.location,false);
  1557. if UseAVX then
  1558. begin
  1559. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,OS_VECTOR);
  1560. cg.a_opmm_loc_reg_reg(current_asmdata.CurrAsmList,op,
  1561. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,left.location.register,location.register,nil);
  1562. end
  1563. else
  1564. begin
  1565. location.register:=left.location.register;
  1566. cg.a_opmm_loc_reg(current_asmdata.CurrAsmList,op,
  1567. tfloat2tcgsize[tfloatdef(tarraydef(left.resultdef).elementdef).floattype],right.location,location.register,nil);
  1568. end;
  1569. end;
  1570. end
  1571. else
  1572. begin
  1573. { not yet supported }
  1574. internalerror(200610072);
  1575. end
  1576. end;
  1577. procedure tx86addnode.second_addfloat;
  1578. const
  1579. ops_add: array[boolean] of TAsmOp = (A_FADDP,A_FADD);
  1580. ops_mul: array[boolean] of TAsmOp = (A_FMULP,A_FMUL);
  1581. ops_sub: array[boolean] of TAsmOp = (A_FSUBP,A_FSUB);
  1582. ops_rsub: array[boolean] of TAsmOp = (A_FSUBRP,A_FSUBR);
  1583. ops_div: array[boolean] of TAsmOp = (A_FDIVP,A_FDIV);
  1584. ops_rdiv: array[boolean] of TAsmOp = (A_FDIVRP,A_FDIVR);
  1585. var
  1586. op : TAsmOp;
  1587. refnode, hp: tnode;
  1588. hasref : boolean;
  1589. begin
  1590. if use_vectorfpu(resultdef) then
  1591. begin
  1592. if UseAVX then
  1593. second_addfloatavx
  1594. else
  1595. second_addfloatsse;
  1596. exit;
  1597. end;
  1598. { can the operation do the conversion? }
  1599. if (left.nodetype=typeconvn) and (is_double(ttypeconvnode(left).left.resultdef) or is_single(ttypeconvnode(left).left.resultdef)) then
  1600. begin
  1601. hp:=left;
  1602. left:=ttypeconvnode(left).left;
  1603. ttypeconvnode(hp).left:=nil;
  1604. hp.Free;
  1605. end;
  1606. if (right.nodetype=typeconvn) and (is_double(ttypeconvnode(right).left.resultdef) or is_single(ttypeconvnode(right).left.resultdef)) then
  1607. begin
  1608. hp:=right;
  1609. right:=ttypeconvnode(right).left;
  1610. ttypeconvnode(hp).left:=nil;
  1611. hp.Free;
  1612. end;
  1613. pass_left_right;
  1614. prepare_x87_locations(refnode);
  1615. hasref:=assigned(refnode);
  1616. case nodetype of
  1617. addn :
  1618. op:=ops_add[hasref];
  1619. muln :
  1620. op:=ops_mul[hasref];
  1621. subn :
  1622. if (nf_swapped in flags) then
  1623. op:=ops_rsub[hasref]
  1624. else
  1625. op:=ops_sub[hasref];
  1626. slashn :
  1627. if (nf_swapped in flags) then
  1628. op:=ops_rdiv[hasref]
  1629. else
  1630. op:=ops_div[hasref];
  1631. else
  1632. internalerror(2003042203);
  1633. end;
  1634. if hasref then
  1635. emit_ref(op,tcgsize2opsize[refnode.location.size],refnode.location.reference)
  1636. else
  1637. begin
  1638. emit_reg_reg(op,S_NO,NR_ST,NR_ST1);
  1639. tcgx86(cg).dec_fpu_stack;
  1640. end;
  1641. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  1642. location.register:=NR_ST;
  1643. end;
  1644. procedure tx86addnode.second_cmpfloat;
  1645. {$ifdef i8086}
  1646. var
  1647. tmpref: treference;
  1648. {$endif i8086}
  1649. begin
  1650. if use_vectorfpu(left.resultdef) or use_vectorfpu(right.resultdef) then
  1651. begin
  1652. second_cmpfloatvector;
  1653. exit;
  1654. end;
  1655. pass_left_right;
  1656. force_left_and_right_fpureg;
  1657. {$ifndef x86_64}
  1658. if current_settings.cputype<cpu_Pentium2 then
  1659. begin
  1660. emit_none(A_FCOMPP,S_NO);
  1661. tcgx86(cg).dec_fpu_stack;
  1662. tcgx86(cg).dec_fpu_stack;
  1663. { load fpu flags }
  1664. {$ifdef i8086}
  1665. if current_settings.cputype < cpu_286 then
  1666. begin
  1667. tg.gettemp(current_asmdata.CurrAsmList,2,2,tt_normal,tmpref);
  1668. emit_ref(A_FSTSW,S_NO,tmpref);
  1669. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1670. inc(tmpref.offset);
  1671. emit_ref_reg(A_MOV,S_B,tmpref,NR_AH);
  1672. dec(tmpref.offset);
  1673. emit_none(A_SAHF,S_NO);
  1674. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1675. tg.ungettemp(current_asmdata.CurrAsmList,tmpref);
  1676. end
  1677. else
  1678. {$endif i8086}
  1679. begin
  1680. cg.getcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1681. emit_reg(A_FNSTSW,S_NO,NR_AX);
  1682. emit_none(A_SAHF,S_NO);
  1683. cg.ungetcpuregister(current_asmdata.CurrAsmList,NR_AX);
  1684. end;
  1685. if cs_fpu_fwait in current_settings.localswitches then
  1686. current_asmdata.CurrAsmList.concat(Taicpu.Op_none(A_FWAIT,S_NO));
  1687. end
  1688. else
  1689. {$endif x86_64}
  1690. begin
  1691. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_FCOMIP,S_NO,NR_ST1,NR_ST0));
  1692. { fcomip pops only one fpu register }
  1693. current_asmdata.CurrAsmList.concat(taicpu.op_reg(A_FSTP,S_NO,NR_ST0));
  1694. tcgx86(cg).dec_fpu_stack;
  1695. tcgx86(cg).dec_fpu_stack;
  1696. end;
  1697. location_reset(location,LOC_FLAGS,OS_NO);
  1698. location.resflags:=getfpuresflags;
  1699. end;
  1700. {*****************************************************************************
  1701. Add64bit
  1702. *****************************************************************************}
  1703. procedure tx86addnode.second_add64bit;
  1704. begin
  1705. {$ifdef cpu64bitalu}
  1706. second_addordinal;
  1707. {$else cpu64bitalu}
  1708. { must be implemented separate }
  1709. internalerror(200402042);
  1710. {$endif cpu64bitalu}
  1711. end;
  1712. procedure tx86addnode.second_cmp64bit;
  1713. begin
  1714. {$ifdef cpu64bitalu}
  1715. second_cmpordinal;
  1716. {$else cpu64bitalu}
  1717. { must be implemented separate }
  1718. internalerror(200402043);
  1719. {$endif cpu64bitalu}
  1720. end;
  1721. {*****************************************************************************
  1722. AddOrdinal
  1723. *****************************************************************************}
  1724. procedure tx86addnode.second_addordinal;
  1725. var
  1726. opsize : tcgsize;
  1727. unsigned : boolean;
  1728. cgop : topcg;
  1729. checkoverflow : Boolean;
  1730. ovloc : tlocation;
  1731. tmpreg : TRegister;
  1732. indexnode : TNode;
  1733. begin
  1734. { determine if the comparison will be unsigned }
  1735. unsigned:=not(is_signed(left.resultdef)) or
  1736. not(is_signed(right.resultdef));
  1737. { assume no overflow checking is require }
  1738. checkoverflow := false;
  1739. ovloc.loc:=LOC_VOID;
  1740. case nodetype of
  1741. addn:
  1742. begin
  1743. cgop:=OP_ADD;
  1744. checkoverflow:=true;
  1745. end;
  1746. xorn :
  1747. begin
  1748. cgop:=OP_XOR;
  1749. end;
  1750. orn :
  1751. begin
  1752. cgop:=OP_OR;
  1753. end;
  1754. andn:
  1755. begin
  1756. cgop:=OP_AND;
  1757. end;
  1758. muln:
  1759. begin
  1760. checkoverflow:=true;
  1761. if unsigned then
  1762. cgop:=OP_MUL
  1763. else
  1764. cgop:=OP_IMUL;
  1765. end;
  1766. subn :
  1767. begin
  1768. checkoverflow:=true;
  1769. cgop:=OP_SUB;
  1770. end;
  1771. else
  1772. internalerror(2015022501);
  1773. end;
  1774. checkoverflow:=
  1775. checkoverflow and
  1776. needoverflowcheck;
  1777. opsize:=def_cgsize(left.resultdef);
  1778. {$ifndef i8086}
  1779. if (cs_opt_level2 in current_settings.optimizerswitches) then
  1780. begin
  1781. { BMI1 optimisations }
  1782. if (CPUX86_HAS_BMI1 in cpu_capabilities[current_settings.cputype]) then
  1783. begin
  1784. { Can we turn "x and (not y)" into an ANDN instruction instead? }
  1785. if (nodetype = andn) and
  1786. (opsize in [OS_32, OS_S32{$ifdef x86_64}, OS_64, OS_S64{$endif x86_64}]) and
  1787. ((left.nodetype = notn) or (right.nodetype = notn)) and
  1788. (
  1789. { With "const and (not variable)", ANDN will produce larger
  1790. code once everything is moved into registers (as a side-note,
  1791. "const and (not const)" and "variable and (not const)" will
  1792. have been simplified earlier to remove the NOT operation). }
  1793. not (cs_opt_size in current_settings.optimizerswitches) or
  1794. (
  1795. (left.location.loc <> LOC_CONSTANT) and
  1796. (right.location.loc <> LOC_CONSTANT)
  1797. )
  1798. ) then
  1799. begin
  1800. { ANDN only supports the second operand being inverted; however,
  1801. since we're dealing with ordinals, there won't be any Boolean
  1802. shortcutting, so we can safely swap the parameters }
  1803. if (right.nodetype <> notn) then
  1804. swapleftright;
  1805. secondpass(left);
  1806. { Skip the not node completely }
  1807. Include(right.transientflags, tnf_do_not_execute);
  1808. secondpass(tnotnode(right).left);
  1809. { allocate registers }
  1810. hlcg.location_force_reg(
  1811. current_asmdata.CurrAsmList,
  1812. tnotnode(right).left.location,
  1813. tnotnode(right).left.resultdef,
  1814. tnotnode(right).left.resultdef,
  1815. false
  1816. );
  1817. if left.location.loc = LOC_CONSTANT then
  1818. { With "const and (not variable)", we can probably still make a
  1819. saving when it comes to pipeline stalls (left.location.loc
  1820. will become LOC_CREGISTER). }
  1821. hlcg.location_force_reg(
  1822. current_asmdata.CurrAsmList,
  1823. left.location,
  1824. left.resultdef,
  1825. left.resultdef,
  1826. true
  1827. );
  1828. set_result_location_reg;
  1829. case left.location.loc of
  1830. LOC_REFERENCE,
  1831. LOC_CREFERENCE:
  1832. emit_ref_reg_reg(A_ANDN, TCGSize2OpSize[opsize], left.location.reference, tnotnode(right).left.location.register, location.register);
  1833. LOC_REGISTER,
  1834. LOC_CREGISTER:
  1835. emit_reg_reg_reg(A_ANDN, TCGSize2OpSize[opsize], left.location.register, tnotnode(right).left.location.register, location.register);
  1836. else
  1837. InternalError(2022102110);
  1838. end;
  1839. { Overflow can't happen with and/andn }
  1840. Exit;
  1841. end;
  1842. end;
  1843. { BMI2 optimisations }
  1844. if (CPUX86_HAS_BMI2 in cpu_capabilities[current_settings.cputype]) then
  1845. begin
  1846. { Can we turn "x and ((1 shl y) - 1)" into a BZHI instruction instead? }
  1847. if (nodetype = andn) and
  1848. (opsize in [OS_32, OS_S32{$ifdef x86_64}, OS_64, OS_S64{$endif x86_64}]) and
  1849. (
  1850. (
  1851. (right.nodetype = subn) and
  1852. (taddnode(right).right.nodetype = ordconstn) and
  1853. (tordconstnode(taddnode(right).right).value = 1) and
  1854. (taddnode(right).left.nodetype = shln) and
  1855. (tshlshrnode(taddnode(right).left).left.nodetype = ordconstn) and
  1856. (tordconstnode(tshlshrnode(taddnode(right).left).left).value = 1)
  1857. ) or
  1858. (
  1859. (left.nodetype = subn) and
  1860. (taddnode(left).right.nodetype = ordconstn) and
  1861. (tordconstnode(taddnode(left).right).value = 1) and
  1862. (taddnode(left).left.nodetype = shln) and
  1863. (tshlshrnode(taddnode(left).left).left.nodetype = ordconstn) and
  1864. (tordconstnode(tshlshrnode(taddnode(left).left).left).value = 1)
  1865. )
  1866. ) then
  1867. begin
  1868. { Put the subtract node on the right }
  1869. if (right.nodetype <> subn) then
  1870. swapleftright;
  1871. secondpass(left);
  1872. { Skip the subtract and shift nodes completely }
  1873. Include(right.transientflags, tnf_do_not_execute);
  1874. Include(taddnode(right).left.transientflags, tnf_do_not_execute);
  1875. { Helps avoid all the awkward typecasts }
  1876. indexnode := tshlshrnode(taddnode(right).left).right;
  1877. {$ifdef x86_64}
  1878. { The code generator sometimes extends the shift result to 64-bit unnecessarily }
  1879. if (indexnode.nodetype = typeconvn) and (opsize in [OS_32, OS_S32]) and
  1880. (def_cgsize(TTypeConvNode(indexnode).resultdef) in [OS_64, OS_S64]) then
  1881. begin
  1882. { Convert to the 32-bit type }
  1883. indexnode.resultdef := resultdef;
  1884. node_reset_flags(indexnode,[],[tnf_pass1_done]);
  1885. { We should't be getting any new errors }
  1886. if do_firstpass(indexnode) then
  1887. InternalError(2022110201);
  1888. { Keep things internally consistent in case indexnode changed }
  1889. tshlshrnode(taddnode(right).left).right := indexnode;
  1890. end;
  1891. {$endif x86_64}
  1892. secondpass(indexnode);
  1893. { allocate registers }
  1894. hlcg.location_force_reg(
  1895. current_asmdata.CurrAsmList,
  1896. indexnode.location,
  1897. indexnode.resultdef,
  1898. resultdef,
  1899. false
  1900. );
  1901. set_result_location_reg;
  1902. case left.location.loc of
  1903. LOC_REFERENCE,
  1904. LOC_CREFERENCE:
  1905. emit_reg_ref_reg(A_BZHI, TCGSize2OpSize[opsize], indexnode.location.register, left.location.reference, location.register);
  1906. LOC_REGISTER,
  1907. LOC_CREGISTER:
  1908. emit_reg_reg_reg(A_BZHI, TCGSize2OpSize[opsize], indexnode.location.register, left.location.register, location.register);
  1909. else
  1910. InternalError(2022102111);
  1911. end;
  1912. Exit;
  1913. end;
  1914. end;
  1915. end;
  1916. {$endif not i8086}
  1917. pass_left_right;
  1918. { do we have to allocate a register? If yes, then three opcode instructions are better, however for sub three op code instructions
  1919. make no sense if right is a reference }
  1920. if ((left.location.loc<>LOC_REGISTER) and (right.location.loc<>LOC_REGISTER) and
  1921. ((nodetype<>subn) or not(right.location.loc in [LOC_REFERENCE,LOC_CREFERENCE])) and
  1922. { 3 op mul makes only sense if a constant is involed }
  1923. ((nodetype<>muln) or (left.location.loc=LOC_CONSTANT) or (right.location.loc=LOC_CONSTANT)
  1924. {$ifndef i8086}
  1925. or ((CPUX86_HAS_BMI2 in cpu_capabilities[current_settings.cputype]) and (not(needoverflowcheck))
  1926. )
  1927. {$endif i8086}
  1928. ) and
  1929. (not(nodetype in [orn,andn,xorn]))) or
  1930. ((nodetype=addn) and (left.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_CONSTANT]) and (right.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_CONSTANT])) then
  1931. begin
  1932. { allocate registers }
  1933. force_reg_left_right(nodetype<>subn,true);
  1934. set_result_location_reg;
  1935. if nodetype<>subn then
  1936. begin
  1937. if checkoverflow then
  1938. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  1939. if (right.location.loc<>LOC_CONSTANT) then
  1940. hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,cgop,resultdef,
  1941. left.location.register,right.location.register,
  1942. location.register,checkoverflow,ovloc)
  1943. else
  1944. hlcg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,cgop,resultdef,
  1945. right.location.value,left.location.register,
  1946. location.register,checkoverflow,ovloc);
  1947. end
  1948. else { subtract is a special case since its not commutative }
  1949. begin
  1950. if (nf_swapped in flags) then
  1951. swapleftright;
  1952. if left.location.loc<>LOC_CONSTANT then
  1953. begin
  1954. if checkoverflow then
  1955. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  1956. if right.location.loc<>LOC_CONSTANT then
  1957. hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
  1958. right.location.register,left.location.register,
  1959. location.register,checkoverflow,ovloc)
  1960. else
  1961. hlcg.a_op_const_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
  1962. right.location.value,left.location.register,
  1963. location.register,checkoverflow,ovloc);
  1964. end
  1965. else
  1966. begin
  1967. tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  1968. hlcg.a_load_const_reg(current_asmdata.CurrAsmList,resultdef,
  1969. left.location.value,tmpreg);
  1970. if checkoverflow then
  1971. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  1972. hlcg.a_op_reg_reg_reg_checkoverflow(current_asmdata.CurrAsmList,OP_SUB,resultdef,
  1973. right.location.register,tmpreg,location.register,checkoverflow,ovloc);
  1974. end;
  1975. end
  1976. end
  1977. else
  1978. begin
  1979. { at least one location should be a register, if yes, try to re-use it, so we can try two operand opcodes }
  1980. if left.location.loc<>LOC_REGISTER then
  1981. begin
  1982. if right.location.loc<>LOC_REGISTER then
  1983. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false)
  1984. else
  1985. begin
  1986. location_swap(left.location,right.location);
  1987. toggleflag(nf_swapped);
  1988. end;
  1989. end;
  1990. { at this point, left.location.loc should be LOC_REGISTER }
  1991. if right.location.loc=LOC_REGISTER then
  1992. begin
  1993. if checkoverflow then
  1994. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  1995. { when swapped another result register }
  1996. if (nodetype=subn) and (nf_swapped in flags) then
  1997. begin
  1998. cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,
  1999. left.location.register,right.location.register);
  2000. location_swap(left.location,right.location);
  2001. toggleflag(nf_swapped);
  2002. end
  2003. else
  2004. cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,
  2005. right.location.register,left.location.register);
  2006. end
  2007. else
  2008. begin
  2009. { right.location<>LOC_REGISTER }
  2010. if right.location.loc in [LOC_CSUBSETREF,LOC_CSUBSETREG,LOC_SUBSETREF,LOC_SUBSETREG] then
  2011. hlcg.location_force_reg(current_asmdata.CurrAsmList,right.location,right.resultdef,left.resultdef,true);
  2012. if (nodetype=subn) and (nf_swapped in flags) then
  2013. begin
  2014. tmpreg:=left.location.register;
  2015. left.location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  2016. cg.a_load_loc_reg(current_asmdata.CurrAsmList,opsize,right.location,left.location.register);
  2017. if checkoverflow then
  2018. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  2019. cg.a_op_reg_reg(current_asmdata.CurrAsmList,cgop,opsize,tmpreg,left.location.register);
  2020. end
  2021. else
  2022. begin
  2023. if checkoverflow then
  2024. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  2025. cg.a_op_loc_reg(current_asmdata.CurrAsmList,cgop,opsize,right.location,left.location.register);
  2026. end;
  2027. location_freetemp(current_asmdata.CurrAsmList,right.location);
  2028. end;
  2029. location_copy(location,left.location);
  2030. end;
  2031. { emit overflow check if required }
  2032. if checkoverflow then
  2033. cg.g_overflowcheck_loc(current_asmdata.CurrAsmList,Location,resultdef,ovloc);
  2034. end;
  2035. procedure tx86addnode.second_addboolean;
  2036. begin
  2037. if (nodetype in [orn,andn]) and
  2038. (not(cs_full_boolean_eval in current_settings.localswitches) or
  2039. (anf_short_bool in addnodeflags)) then
  2040. inherited second_addboolean
  2041. else if is_64bit(left.resultdef) then
  2042. inherited
  2043. else
  2044. second_addordinal;
  2045. end;
  2046. procedure tx86addnode.second_cmpordinal;
  2047. var
  2048. opdef : tdef;
  2049. opsize : tcgsize;
  2050. unsigned : boolean;
  2051. begin
  2052. unsigned:=not(is_signed(left.resultdef)) or
  2053. not(is_signed(right.resultdef));
  2054. opdef:=left.resultdef;
  2055. opsize:=def_cgsize(opdef);
  2056. pass_left_right;
  2057. if (right.location.loc=LOC_CONSTANT) and
  2058. (left.location.loc in [LOC_REFERENCE, LOC_CREFERENCE])
  2059. {$ifdef x86_64}
  2060. and ((not (opsize in [OS_64,OS_S64])) or (
  2061. (right.location.value>=low(longint)) and (right.location.value<=high(longint))
  2062. ))
  2063. {$endif x86_64}
  2064. then
  2065. begin
  2066. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  2067. emit_const_ref(A_CMP, TCGSize2Opsize[opsize], right.location.value, left.location.reference);
  2068. location_freetemp(current_asmdata.CurrAsmList,left.location);
  2069. end
  2070. else
  2071. begin
  2072. left_must_be_reg(opdef,opsize,false);
  2073. emit_generic_code(A_CMP,opsize,unsigned,false,false);
  2074. location_freetemp(current_asmdata.CurrAsmList,right.location);
  2075. location_freetemp(current_asmdata.CurrAsmList,left.location);
  2076. end;
  2077. location_reset(location,LOC_FLAGS,OS_NO);
  2078. location.resflags:=getresflags(unsigned);
  2079. end;
  2080. begin
  2081. caddnode:=tx86addnode;
  2082. end.