nx86inl.pas 75 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. Generate x86 inline nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86inl;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,ninl,ncginl;
  22. type
  23. tx86inlinenode = class(tcginlinenode)
  24. protected
  25. procedure maybe_remove_round_trunc_typeconv; virtual;
  26. public
  27. function pass_typecheck_cpu:tnode;override;
  28. { first pass override
  29. so that the code generator will actually generate
  30. these nodes.
  31. }
  32. function first_cpu: tnode;override;
  33. function first_pi: tnode ; override;
  34. function first_arctan_real: tnode; override;
  35. function first_abs_real: tnode; override;
  36. function first_sqr_real: tnode; override;
  37. function first_sqrt_real: tnode; override;
  38. function first_ln_real: tnode; override;
  39. function first_cos_real: tnode; override;
  40. function first_sin_real: tnode; override;
  41. function first_round_real: tnode; override;
  42. function first_trunc_real: tnode; override;
  43. function first_popcnt: tnode; override;
  44. function first_fma: tnode; override;
  45. function first_frac_real : tnode; override;
  46. function first_int_real : tnode; override;
  47. function first_minmax: tnode; override;
  48. function simplify(forinline : boolean) : tnode; override;
  49. { second pass override to generate these nodes }
  50. procedure pass_generate_code_cpu;override;
  51. procedure second_IncludeExclude;override;
  52. procedure second_AndOrXorShiftRot_assign;override;
  53. procedure second_pi; override;
  54. procedure second_arctan_real; override;
  55. procedure second_abs_real; override;
  56. procedure second_round_real; override;
  57. procedure second_sqr_real; override;
  58. procedure second_sqrt_real; override;
  59. procedure second_ln_real; override;
  60. procedure second_cos_real; override;
  61. procedure second_sin_real; override;
  62. procedure second_trunc_real; override;
  63. procedure second_prefetch;override;
  64. procedure second_abs_long;override;
  65. procedure second_popcnt;override;
  66. procedure second_fma;override;
  67. procedure second_frac_real;override;
  68. procedure second_int_real;override;
  69. procedure second_high;override;
  70. procedure second_minmax;override;
  71. private
  72. procedure load_fpu_location(lnode: tnode);
  73. end;
  74. implementation
  75. uses
  76. systems,
  77. globtype,globals,
  78. verbose,compinnr,fmodule,
  79. defutil,
  80. aasmbase,aasmdata,aasmcpu,
  81. symconst,symtype,symdef,symcpu,
  82. ncnv,
  83. htypechk,
  84. cgbase,pass_1,pass_2,
  85. cpuinfo,cpubase,nutils,
  86. ncal,ncgutil,nld,ncon,nadd,nmat,constexp,
  87. tgobj,
  88. cga,cgutils,cgx86,cgobj,hlcgobj,cutils;
  89. {*****************************************************************************
  90. TX86INLINENODE
  91. *****************************************************************************}
  92. procedure tx86inlinenode.maybe_remove_round_trunc_typeconv;
  93. begin
  94. { only makes a difference for x86_64 }
  95. end;
  96. function tx86inlinenode.pass_typecheck_cpu: tnode;
  97. begin
  98. Result:=nil;
  99. case inlinenumber of
  100. in_x86_inportb:
  101. begin
  102. CheckParameters(1);
  103. resultdef:=u8inttype;
  104. end;
  105. in_x86_inportw:
  106. begin
  107. CheckParameters(1);
  108. resultdef:=u16inttype;
  109. end;
  110. in_x86_inportl:
  111. begin
  112. CheckParameters(1);
  113. resultdef:=s32inttype;
  114. end;
  115. in_x86_outportb,
  116. in_x86_outportw,
  117. in_x86_outportl:
  118. begin
  119. CheckParameters(2);
  120. resultdef:=voidtype;
  121. end;
  122. in_x86_pause,
  123. in_x86_cli,
  124. in_x86_sti:
  125. resultdef:=voidtype;
  126. in_x86_get_cs,
  127. in_x86_get_ss,
  128. in_x86_get_ds,
  129. in_x86_get_es,
  130. in_x86_get_fs,
  131. in_x86_get_gs:
  132. {$ifdef i8086}
  133. resultdef:=u16inttype;
  134. {$else i8086}
  135. resultdef:=s32inttype;
  136. {$endif i8086}
  137. { include automatically generated code }
  138. {$i x86mmtype.inc}
  139. else
  140. Result:=inherited pass_typecheck_cpu;
  141. end;
  142. end;
  143. function tx86inlinenode.first_cpu: tnode;
  144. begin
  145. Result:=nil;
  146. case inlinenumber of
  147. in_x86_inportb,
  148. in_x86_inportw,
  149. in_x86_inportl,
  150. in_x86_get_cs,
  151. in_x86_get_ss,
  152. in_x86_get_ds,
  153. in_x86_get_es,
  154. in_x86_get_fs,
  155. in_x86_get_gs:
  156. expectloc:=LOC_REGISTER;
  157. in_x86_outportb,
  158. in_x86_outportw,
  159. in_x86_outportl,
  160. in_x86_pause,
  161. in_x86_cli,
  162. in_x86_sti:
  163. expectloc:=LOC_VOID;
  164. { include automatically generated code }
  165. {$i x86mmfirst.inc}
  166. else
  167. Result:=inherited first_cpu;
  168. end;
  169. end;
  170. function tx86inlinenode.first_pi : tnode;
  171. begin
  172. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  173. begin
  174. expectloc:=LOC_FPUREGISTER;
  175. first_pi := nil;
  176. end
  177. else
  178. result:=inherited;
  179. end;
  180. function tx86inlinenode.first_arctan_real : tnode;
  181. begin
  182. {$ifdef i8086}
  183. { FPATAN's range is limited to (0 <= value < 1) on the 8087 and 80287,
  184. so we need to use the RTL helper on these FPUs }
  185. if current_settings.cputype < cpu_386 then
  186. begin
  187. result := inherited;
  188. exit;
  189. end;
  190. {$endif i8086}
  191. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  192. begin
  193. expectloc:=LOC_FPUREGISTER;
  194. first_arctan_real := nil;
  195. end
  196. else
  197. result:=inherited;
  198. end;
  199. function tx86inlinenode.first_abs_real : tnode;
  200. begin
  201. if use_vectorfpu(resultdef) then
  202. expectloc:=LOC_MMREGISTER
  203. else
  204. expectloc:=LOC_FPUREGISTER;
  205. first_abs_real := nil;
  206. end;
  207. function tx86inlinenode.first_sqr_real : tnode;
  208. begin
  209. if use_vectorfpu(resultdef) then
  210. expectloc:=LOC_MMREGISTER
  211. else
  212. expectloc:=LOC_FPUREGISTER;
  213. first_sqr_real := nil;
  214. end;
  215. function tx86inlinenode.first_sqrt_real : tnode;
  216. begin
  217. if use_vectorfpu(resultdef) then
  218. expectloc:=LOC_MMREGISTER
  219. else
  220. expectloc:=LOC_FPUREGISTER;
  221. first_sqrt_real := nil;
  222. end;
  223. function tx86inlinenode.first_ln_real : tnode;
  224. begin
  225. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  226. begin
  227. expectloc:=LOC_FPUREGISTER;
  228. first_ln_real := nil;
  229. end
  230. else
  231. result:=inherited;
  232. end;
  233. function tx86inlinenode.first_cos_real : tnode;
  234. begin
  235. {$ifdef i8086}
  236. { FCOS is 387+ }
  237. if current_settings.cputype < cpu_386 then
  238. begin
  239. result := inherited;
  240. exit;
  241. end;
  242. {$endif i8086}
  243. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  244. begin
  245. expectloc:=LOC_FPUREGISTER;
  246. result:=nil;
  247. end
  248. else
  249. result:=inherited;
  250. end;
  251. function tx86inlinenode.first_sin_real : tnode;
  252. begin
  253. {$ifdef i8086}
  254. { FSIN is 387+ }
  255. if current_settings.cputype < cpu_386 then
  256. begin
  257. result := inherited;
  258. exit;
  259. end;
  260. {$endif i8086}
  261. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  262. begin
  263. expectloc:=LOC_FPUREGISTER;
  264. result:=nil;
  265. end
  266. else
  267. result:=inherited;
  268. end;
  269. function tx86inlinenode.first_round_real : tnode;
  270. begin
  271. maybe_remove_round_trunc_typeconv;
  272. {$ifdef x86_64}
  273. if use_vectorfpu(left.resultdef) then
  274. expectloc:=LOC_REGISTER
  275. else
  276. {$endif x86_64}
  277. expectloc:=LOC_REFERENCE;
  278. result:=nil;
  279. end;
  280. function tx86inlinenode.first_trunc_real: tnode;
  281. begin
  282. maybe_remove_round_trunc_typeconv;
  283. if (cs_opt_size in current_settings.optimizerswitches)
  284. {$ifdef x86_64}
  285. and not(use_vectorfpu(left.resultdef))
  286. {$endif x86_64}
  287. then
  288. result:=inherited
  289. else
  290. begin
  291. {$ifdef x86_64}
  292. if use_vectorfpu(left.resultdef) then
  293. expectloc:=LOC_REGISTER
  294. else
  295. {$endif x86_64}
  296. expectloc:=LOC_REFERENCE;
  297. result:=nil;
  298. end;
  299. end;
  300. function tx86inlinenode.first_popcnt: tnode;
  301. begin
  302. Result:=nil;
  303. {$ifndef i8086}
  304. if (CPUX86_HAS_POPCNT in cpu_capabilities[current_settings.cputype])
  305. {$ifdef i386}
  306. and not is_64bit(left.resultdef)
  307. {$endif i386}
  308. then
  309. expectloc:=LOC_REGISTER
  310. else
  311. {$endif not i8086}
  312. Result:=inherited first_popcnt
  313. end;
  314. function tx86inlinenode.first_fma : tnode;
  315. begin
  316. {$ifndef i8086}
  317. if ((fpu_capabilities[current_settings.fputype]*[FPUX86_HAS_FMA,FPUX86_HAS_FMA4])<>[]) and
  318. ((is_double(resultdef)) or (is_single(resultdef))) then
  319. begin
  320. expectloc:=LOC_MMREGISTER;
  321. Result:=nil;
  322. end
  323. else
  324. {$endif i8086}
  325. Result:=inherited first_fma;
  326. end;
  327. function tx86inlinenode.first_frac_real : tnode;
  328. begin
  329. if (current_settings.fputype>=fpu_sse41) and
  330. ((is_double(resultdef)) or (is_single(resultdef))) then
  331. begin
  332. maybe_remove_round_trunc_typeconv;
  333. expectloc:=LOC_MMREGISTER;
  334. Result:=nil;
  335. end
  336. else
  337. Result:=inherited first_frac_real;
  338. end;
  339. function tx86inlinenode.first_int_real : tnode;
  340. begin
  341. if (current_settings.fputype>=fpu_sse41) and
  342. ((is_double(resultdef)) or (is_single(resultdef))) then
  343. begin
  344. Result:=nil;
  345. expectloc:=LOC_MMREGISTER;
  346. end
  347. else
  348. Result:=inherited first_int_real;
  349. end;
  350. function tx86inlinenode.first_minmax: tnode;
  351. begin
  352. {$ifndef i8086}
  353. if
  354. {$ifdef i386}
  355. ((current_settings.fputype>=fpu_sse) and is_single(resultdef)) or
  356. ((current_settings.fputype>=fpu_sse2) and is_double(resultdef))
  357. {$else i386}
  358. ((is_double(resultdef)) or (is_single(resultdef)))
  359. {$endif i386}
  360. then
  361. begin
  362. expectloc:=LOC_MMREGISTER;
  363. Result:=nil;
  364. end
  365. else
  366. {$endif i8086}
  367. if
  368. {$ifndef x86_64}
  369. (CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) and
  370. {$endif x86_64}
  371. (
  372. {$ifdef x86_64}
  373. is_64bitint(resultdef) or
  374. {$endif x86_64}
  375. is_32bitint(resultdef)
  376. ) then
  377. begin
  378. expectloc:=LOC_REGISTER;
  379. Result:=nil;
  380. end
  381. else
  382. Result:=inherited first_minmax;
  383. end;
  384. function tx86inlinenode.simplify(forinline : boolean) : tnode;
  385. var
  386. temp : tnode;
  387. begin
  388. if (current_settings.fputype>=fpu_sse41) and
  389. (inlinenumber=in_int_real) and (left.nodetype=typeconvn) and
  390. not(nf_explicit in left.flags) and
  391. (ttypeconvnode(left).left.resultdef.typ=floatdef) and
  392. ((is_double(ttypeconvnode(left).left.resultdef)) or (is_single(ttypeconvnode(left).left.resultdef))) then
  393. begin
  394. { get rid of the type conversion }
  395. temp:=ttypeconvnode(left).left;
  396. ttypeconvnode(left).left:=nil;
  397. left.free;
  398. left:=temp;
  399. result:=self.getcopy;
  400. tinlinenode(result).resultdef:=temp.resultdef;
  401. typecheckpass(result);
  402. end
  403. else
  404. Result:=inherited simplify(forinline);
  405. end;
  406. procedure tx86inlinenode.pass_generate_code_cpu;
  407. var
  408. paraarray : array[1..4] of tnode;
  409. i : integer;
  410. op: TAsmOp;
  411. procedure inport(dreg:TRegister;dsize:topsize;dtype:tdef);
  412. var
  413. portnumber: tnode;
  414. begin
  415. portnumber:=left;
  416. secondpass(portnumber);
  417. if (portnumber.location.loc=LOC_CONSTANT) and
  418. (portnumber.location.value>=0) and
  419. (portnumber.location.value<=255) then
  420. begin
  421. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  422. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_IN,dsize,portnumber.location.value,dreg));
  423. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  424. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  425. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  426. hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,dtype,resultdef,dreg,location.register);
  427. end
  428. else
  429. begin
  430. hlcg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
  431. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portnumber.resultdef,u16inttype,portnumber.location,NR_DX);
  432. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  433. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_IN,dsize,NR_DX,dreg));
  434. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX);
  435. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  436. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  437. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  438. hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,dtype,resultdef,dreg,location.register);
  439. end;
  440. end;
  441. procedure outport(dreg:TRegister;dsize:topsize;dtype:tdef);
  442. var
  443. portnumber, portdata: tnode;
  444. begin
  445. portnumber:=tcallparanode(tcallparanode(left).right).left;
  446. portdata:=tcallparanode(left).left;
  447. secondpass(portdata);
  448. secondpass(portnumber);
  449. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  450. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portdata.resultdef,dtype,portdata.location,dreg);
  451. if (portnumber.location.loc=LOC_CONSTANT) and
  452. (portnumber.location.value>=0) and
  453. (portnumber.location.value<=255) then
  454. current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_OUT,dsize,dreg,portnumber.location.value))
  455. else
  456. begin
  457. hlcg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
  458. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portnumber.resultdef,u16inttype,portnumber.location,NR_DX);
  459. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_OUT,dsize,dreg,NR_DX));
  460. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX);
  461. end;
  462. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  463. end;
  464. procedure get_segreg(segreg:tregister);
  465. begin
  466. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  467. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  468. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MOV,TCGSize2OpSize[def_cgsize(resultdef)],segreg,location.register));
  469. end;
  470. function GetConstInt(n: tnode): longint;
  471. begin
  472. Result:=0;
  473. if is_constintnode(n) then
  474. result:=tordconstnode(n).value.svalue
  475. else
  476. Message(type_e_constant_expr_expected);
  477. end;
  478. procedure GetParameters(count: longint);
  479. var
  480. i: longint;
  481. p: tnode;
  482. begin
  483. if (count=1) and
  484. (not (left is tcallparanode)) then
  485. paraarray[1]:=left
  486. else
  487. begin
  488. p:=left;
  489. for i := count downto 1 do
  490. begin
  491. paraarray[i]:=tcallparanode(p).paravalue;
  492. p:=tcallparanode(p).nextpara;
  493. end;
  494. end;
  495. end;
  496. procedure location_force_mmxreg(list:TAsmList;var l: tlocation;maybeconst:boolean);
  497. var
  498. reg : tregister;
  499. begin
  500. if (l.loc<>LOC_MMXREGISTER) and
  501. ((l.loc<>LOC_CMMXREGISTER) or (not maybeconst)) then
  502. begin
  503. reg:=tcgx86(cg).getmmxregister(list);
  504. cg.a_loadmm_loc_reg(list,OS_M64,l,reg,nil);
  505. location_freetemp(list,l);
  506. location_reset(l,LOC_MMXREGISTER,OS_M64);
  507. l.register:=reg;
  508. end;
  509. end;
  510. procedure location_make_ref(var loc: tlocation);
  511. var
  512. hloc: tlocation;
  513. begin
  514. case loc.loc of
  515. LOC_CREGISTER,
  516. LOC_REGISTER:
  517. begin
  518. location_reset_ref(hloc, LOC_REFERENCE, OS_32, 1, []);
  519. hloc.reference.base:=loc.register;
  520. loc:=hloc;
  521. end;
  522. LOC_CREFERENCE,
  523. LOC_REFERENCE:
  524. begin
  525. end;
  526. else
  527. begin
  528. hlcg.location_force_reg(current_asmdata.CurrAsmList,loc,u32inttype,u32inttype,false);
  529. location_reset_ref(hloc, LOC_REFERENCE, OS_32, 1, []);
  530. hloc.reference.base:=loc.register;
  531. loc:=hloc;
  532. end;
  533. end;
  534. end;
  535. begin
  536. FillChar(paraarray,sizeof(paraarray),0);
  537. case inlinenumber of
  538. in_x86_inportb:
  539. inport(NR_AL,S_B,u8inttype);
  540. in_x86_inportw:
  541. inport(NR_AX,S_W,u16inttype);
  542. in_x86_inportl:
  543. inport(NR_EAX,S_L,s32inttype);
  544. in_x86_outportb:
  545. outport(NR_AL,S_B,u8inttype);
  546. in_x86_outportw:
  547. outport(NR_AX,S_W,u16inttype);
  548. in_x86_outportl:
  549. outport(NR_EAX,S_L,s32inttype);
  550. in_x86_cli:
  551. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_CLI));
  552. in_x86_sti:
  553. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_STI));
  554. in_x86_pause:
  555. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_PAUSE));
  556. in_x86_get_cs:
  557. get_segreg(NR_CS);
  558. in_x86_get_ss:
  559. get_segreg(NR_SS);
  560. in_x86_get_ds:
  561. get_segreg(NR_DS);
  562. in_x86_get_es:
  563. get_segreg(NR_ES);
  564. in_x86_get_fs:
  565. get_segreg(NR_FS);
  566. in_x86_get_gs:
  567. get_segreg(NR_GS);
  568. {$i x86mmsecond.inc}
  569. else
  570. inherited pass_generate_code_cpu;
  571. end;
  572. end;
  573. procedure tx86inlinenode.second_AndOrXorShiftRot_assign;
  574. {$ifndef i8086}
  575. var
  576. opsize : tcgsize;
  577. valuenode, indexnode, loadnode: TNode;
  578. DestReg: TRegister;
  579. {$endif i8086}
  580. begin
  581. {$ifndef i8086}
  582. if (cs_opt_level2 in current_settings.optimizerswitches) then
  583. begin
  584. { Saves on a lot of typecasting and potential coding mistakes }
  585. valuenode := tcallparanode(left).left;
  586. loadnode := tcallparanode(tcallparanode(left).right).left;
  587. opsize := def_cgsize(loadnode.resultdef);
  588. { BMI2 optimisations }
  589. if (CPUX86_HAS_BMI2 in cpu_capabilities[current_settings.cputype]) and (inlinenumber=in_and_assign_x_y) then
  590. begin
  591. { If the second operand is "((1 shl y) - 1)", we can turn it
  592. into a BZHI operator instead }
  593. if (opsize in [OS_32, OS_S32{$ifdef x86_64}, OS_64, OS_S64{$endif x86_64}]) and
  594. (valuenode.nodetype = subn) and
  595. (taddnode(valuenode).right.nodetype = ordconstn) and
  596. (tordconstnode(taddnode(valuenode).right).value = 1) and
  597. (taddnode(valuenode).left.nodetype = shln) and
  598. (tshlshrnode(taddnode(valuenode).left).left.nodetype = ordconstn) and
  599. (tordconstnode(tshlshrnode(taddnode(valuenode).left).left).value = 1) then
  600. begin
  601. { Skip the subtract and shift nodes completely }
  602. { Helps avoid all the awkward typecasts }
  603. indexnode := tshlshrnode(taddnode(valuenode).left).right;
  604. {$ifdef x86_64}
  605. { The code generator sometimes extends the shift result to 64-bit unnecessarily }
  606. if (indexnode.nodetype = typeconvn) and (opsize in [OS_32, OS_S32]) and
  607. (def_cgsize(TTypeConvNode(indexnode).resultdef) in [OS_64, OS_S64]) then
  608. begin
  609. { Convert to the 32-bit type }
  610. indexnode.resultdef:=loadnode.resultdef;
  611. node_reset_flags(indexnode,[],[tnf_pass1_done]);
  612. { We should't be getting any new errors }
  613. if do_firstpass(indexnode) then
  614. InternalError(2022110202);
  615. { Keep things internally consistent in case indexnode changed }
  616. tshlshrnode(taddnode(valuenode).left).right:=indexnode;
  617. end;
  618. {$endif x86_64}
  619. secondpass(indexnode);
  620. secondpass(loadnode);
  621. { allocate registers }
  622. hlcg.location_force_reg(
  623. current_asmdata.CurrAsmList,
  624. indexnode.location,
  625. indexnode.resultdef,
  626. loadnode.resultdef,
  627. false
  628. );
  629. case loadnode.location.loc of
  630. LOC_REFERENCE,
  631. LOC_CREFERENCE:
  632. begin
  633. { BZHI can only write to a register }
  634. DestReg := cg.getintregister(current_asmdata.CurrAsmList,opsize);
  635. emit_reg_ref_reg(A_BZHI, TCGSize2OpSize[opsize], indexnode.location.register, loadnode.location.reference, DestReg);
  636. emit_reg_ref(A_MOV, TCGSize2OpSize[opsize], DestReg, loadnode.location.reference);
  637. end;
  638. LOC_REGISTER,
  639. LOC_CREGISTER:
  640. emit_reg_reg_reg(A_BZHI, TCGSize2OpSize[opsize], indexnode.location.register, loadnode.location.register, loadnode.location.register);
  641. else
  642. InternalError(2022102120);
  643. end;
  644. Exit;
  645. end;
  646. end;
  647. end;
  648. {$endif not i8086}
  649. inherited second_AndOrXorShiftRot_assign;
  650. end;
  651. procedure tx86inlinenode.second_pi;
  652. begin
  653. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  654. emit_none(A_FLDPI,S_NO);
  655. tcgx86(cg).inc_fpu_stack;
  656. location.register:=NR_FPU_RESULT_REG;
  657. end;
  658. { load the FPU into the an fpu register }
  659. procedure tx86inlinenode.load_fpu_location(lnode: tnode);
  660. begin
  661. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  662. location.register:=NR_FPU_RESULT_REG;
  663. secondpass(lnode);
  664. case lnode.location.loc of
  665. LOC_FPUREGISTER:
  666. ;
  667. LOC_CFPUREGISTER:
  668. begin
  669. cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmList,lnode.location.size,
  670. lnode.location.size,lnode.location.register,location.register);
  671. end;
  672. LOC_REFERENCE,LOC_CREFERENCE:
  673. begin
  674. cg.a_loadfpu_ref_reg(current_asmdata.CurrAsmList,
  675. lnode.location.size,lnode.location.size,
  676. lnode.location.reference,location.register);
  677. end;
  678. LOC_MMREGISTER,LOC_CMMREGISTER:
  679. begin
  680. location:=lnode.location;
  681. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,location,lnode.resultdef,false);
  682. end;
  683. else
  684. internalerror(309991);
  685. end;
  686. end;
  687. procedure tx86inlinenode.second_arctan_real;
  688. begin
  689. load_fpu_location(left);
  690. emit_none(A_FLD1,S_NO);
  691. emit_none(A_FPATAN,S_NO);
  692. end;
  693. procedure tx86inlinenode.second_abs_real;
  694. function needs_indirect:boolean; inline;
  695. begin
  696. result:=(tf_supports_packages in target_info.flags) and
  697. (target_info.system in systems_indirect_var_imports);
  698. end;
  699. var
  700. href : treference;
  701. sym : tasmsymbol;
  702. begin
  703. if use_vectorfpu(resultdef) then
  704. begin
  705. secondpass(left);
  706. if left.location.loc<>LOC_MMREGISTER then
  707. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,UseAVX);
  708. if UseAVX then
  709. begin
  710. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  711. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
  712. end
  713. else
  714. location:=left.location;
  715. case tfloatdef(resultdef).floattype of
  716. s32real:
  717. begin
  718. sym:=current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_SINGLE',AT_DATA,needs_indirect);
  719. reference_reset_symbol(href,sym,0,4,[]);
  720. current_module.add_extern_asmsym(sym);
  721. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
  722. if UseAVX then
  723. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
  724. A_VANDPS,S_XMM,href,left.location.register,location.register))
  725. else
  726. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPS,S_XMM,href,location.register));
  727. end;
  728. s64real:
  729. begin
  730. sym:=current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_DOUBLE',AT_DATA,needs_indirect);
  731. reference_reset_symbol(href,sym,0,4,[]);
  732. current_module.add_extern_asmsym(sym);
  733. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
  734. if UseAVX then
  735. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
  736. A_VANDPD,S_XMM,href,left.location.register,location.register))
  737. else
  738. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPD,S_XMM,href,location.register))
  739. end;
  740. else
  741. internalerror(200506081);
  742. end;
  743. end
  744. else
  745. begin
  746. load_fpu_location(left);
  747. emit_none(A_FABS,S_NO);
  748. end;
  749. end;
  750. procedure tx86inlinenode.second_round_real;
  751. begin
  752. {$ifdef x86_64}
  753. if use_vectorfpu(left.resultdef) then
  754. begin
  755. secondpass(left);
  756. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  757. location_reset(location,LOC_REGISTER,OS_S64);
  758. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
  759. if UseAVX then
  760. case left.location.size of
  761. OS_F32:
  762. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSS2SI,S_NO,left.location.register,location.register));
  763. OS_F64:
  764. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSD2SI,S_NO,left.location.register,location.register));
  765. else
  766. internalerror(2007031402);
  767. end
  768. else
  769. case left.location.size of
  770. OS_F32:
  771. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSS2SI,S_NO,left.location.register,location.register));
  772. OS_F64:
  773. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSD2SI,S_NO,left.location.register,location.register));
  774. else
  775. internalerror(2007031404);
  776. end;
  777. end
  778. else
  779. {$endif x86_64}
  780. begin
  781. load_fpu_location(left);
  782. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  783. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  784. emit_ref(A_FISTP,S_IQ,location.reference);
  785. tcgx86(cg).dec_fpu_stack;
  786. emit_none(A_FWAIT,S_NO);
  787. end;
  788. end;
  789. procedure tx86inlinenode.second_trunc_real;
  790. var
  791. oldcw,newcw : treference;
  792. begin
  793. {$ifdef x86_64}
  794. if use_vectorfpu(left.resultdef) and
  795. not((left.location.loc=LOC_FPUREGISTER) and (current_settings.fputype>=fpu_sse3)) then
  796. begin
  797. secondpass(left);
  798. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  799. location_reset(location,LOC_REGISTER,OS_S64);
  800. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
  801. if UseAVX then
  802. case left.location.size of
  803. OS_F32:
  804. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSS2SI,S_NO,left.location.register,location.register));
  805. OS_F64:
  806. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSD2SI,S_NO,left.location.register,location.register));
  807. else
  808. internalerror(2007031401);
  809. end
  810. else
  811. case left.location.size of
  812. OS_F32:
  813. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSS2SI,S_NO,left.location.register,location.register));
  814. OS_F64:
  815. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSD2SI,S_NO,left.location.register,location.register));
  816. else
  817. internalerror(2007031403);
  818. end;
  819. end
  820. else
  821. {$endif x86_64}
  822. begin
  823. if (current_settings.fputype>=fpu_sse3) then
  824. begin
  825. load_fpu_location(left);
  826. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  827. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  828. emit_ref(A_FISTTP,S_IQ,location.reference);
  829. tcgx86(cg).dec_fpu_stack;
  830. end
  831. else
  832. begin
  833. tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,oldcw);
  834. tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,newcw);
  835. {$ifdef i8086}
  836. if current_settings.cputype<=cpu_286 then
  837. begin
  838. emit_ref(A_FSTCW,S_NO,newcw);
  839. emit_ref(A_FSTCW,S_NO,oldcw);
  840. emit_none(A_FWAIT,S_NO);
  841. end
  842. else
  843. {$endif i8086}
  844. begin
  845. emit_ref(A_FNSTCW,S_NO,newcw);
  846. emit_ref(A_FNSTCW,S_NO,oldcw);
  847. end;
  848. emit_const_ref(A_OR,S_W,$0f00,newcw);
  849. load_fpu_location(left);
  850. emit_ref(A_FLDCW,S_NO,newcw);
  851. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  852. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  853. emit_ref(A_FISTP,S_IQ,location.reference);
  854. tcgx86(cg).dec_fpu_stack;
  855. emit_ref(A_FLDCW,S_NO,oldcw);
  856. emit_none(A_FWAIT,S_NO);
  857. tg.UnGetTemp(current_asmdata.CurrAsmList,oldcw);
  858. tg.UnGetTemp(current_asmdata.CurrAsmList,newcw);
  859. end;
  860. end;
  861. end;
  862. procedure tx86inlinenode.second_sqr_real;
  863. begin
  864. if use_vectorfpu(resultdef) then
  865. begin
  866. secondpass(left);
  867. location_reset(location,LOC_MMREGISTER,left.location.size);
  868. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  869. if UseAVX then
  870. begin
  871. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  872. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,left.location.register,left.location.register,location.register,mms_movescalar);
  873. end
  874. else
  875. begin
  876. if left.location.loc in [LOC_CFPUREGISTER,LOC_FPUREGISTER] then
  877. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  878. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  879. cg.a_opmm_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,location.register,location.register,mms_movescalar);
  880. end;
  881. end
  882. else
  883. begin
  884. load_fpu_location(left);
  885. emit_reg_reg(A_FMUL,S_NO,NR_ST0,NR_ST0);
  886. end;
  887. end;
  888. procedure tx86inlinenode.second_sqrt_real;
  889. begin
  890. if use_vectorfpu(resultdef) then
  891. begin
  892. secondpass(left);
  893. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  894. location_reset(location,LOC_MMREGISTER,left.location.size);
  895. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  896. if UseAVX then
  897. case tfloatdef(resultdef).floattype of
  898. s32real:
  899. { we use S_NO instead of S_XMM here, regardless of the register size, as the size of the memory location is 32/64 bit }
  900. { using left.location.register here as 2nd parameter is crucial to break dependency chains }
  901. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSS,S_NO,left.location.register,left.location.register,location.register));
  902. s64real:
  903. { we use S_NO instead of S_XMM here, regardless of the register size, as the size of the memory location is 32/64 bit }
  904. { using left.location.register here as 2nd parameter is crucial to break dependency chains }
  905. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSD,S_NO,left.location.register,left.location.register,location.register));
  906. else
  907. internalerror(200510031);
  908. end
  909. else
  910. case tfloatdef(resultdef).floattype of
  911. s32real:
  912. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSS,S_NO,left.location.register,location.register));
  913. s64real:
  914. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSD,S_NO,left.location.register,location.register));
  915. else
  916. internalerror(2005100303);
  917. end;
  918. end
  919. else
  920. begin
  921. load_fpu_location(left);
  922. if left.location.loc=LOC_REFERENCE then
  923. tg.ungetiftemp(current_asmdata.CurrAsmList,left.location.reference);
  924. emit_none(A_FSQRT,S_NO);
  925. end;
  926. end;
  927. procedure tx86inlinenode.second_ln_real;
  928. begin
  929. load_fpu_location(left);
  930. emit_none(A_FLDLN2,S_NO);
  931. emit_none(A_FXCH,S_NO);
  932. emit_none(A_FYL2X,S_NO);
  933. end;
  934. procedure tx86inlinenode.second_cos_real;
  935. begin
  936. {$ifdef i8086}
  937. { FCOS is 387+ }
  938. if current_settings.cputype < cpu_386 then
  939. begin
  940. inherited;
  941. exit;
  942. end;
  943. {$endif i8086}
  944. load_fpu_location(left);
  945. emit_none(A_FCOS,S_NO);
  946. end;
  947. procedure tx86inlinenode.second_sin_real;
  948. begin
  949. {$ifdef i8086}
  950. { FSIN is 387+ }
  951. if current_settings.cputype < cpu_386 then
  952. begin
  953. inherited;
  954. exit;
  955. end;
  956. {$endif i8086}
  957. load_fpu_location(left);
  958. emit_none(A_FSIN,S_NO)
  959. end;
  960. procedure tx86inlinenode.second_prefetch;
  961. var
  962. ref : treference;
  963. r : tregister;
  964. checkpointer_used : boolean;
  965. begin
  966. {$if defined(i386) or defined(i8086)}
  967. if current_settings.cputype>=cpu_Pentium3 then
  968. {$endif i386 or i8086}
  969. begin
  970. { do not call Checkpointer for left node }
  971. checkpointer_used:=(cs_checkpointer in current_settings.localswitches);
  972. if checkpointer_used then
  973. node_change_local_switch(left,cs_checkpointer,false);
  974. secondpass(left);
  975. if checkpointer_used then
  976. node_change_local_switch(left,cs_checkpointer,false);
  977. case left.location.loc of
  978. LOC_CREFERENCE,
  979. LOC_REFERENCE:
  980. begin
  981. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
  982. cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.location.reference,r);
  983. reference_reset_base(ref,r,0,left.location.reference.temppos,left.location.reference.alignment,left.location.reference.volatility);
  984. current_asmdata.CurrAsmList.concat(taicpu.op_ref(A_PREFETCHNTA,S_NO,ref));
  985. end;
  986. else
  987. { nothing to prefetch };
  988. end;
  989. end;
  990. end;
  991. procedure tx86inlinenode.second_abs_long;
  992. var
  993. hregister : tregister;
  994. opsize : tcgsize;
  995. hp : taicpu;
  996. hl: TAsmLabel;
  997. begin
  998. {$if defined(i8086) or defined(i386)}
  999. if is_64bitint(resultdef) then
  1000. inherited
  1001. else if not(CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) then
  1002. begin
  1003. opsize:=def_cgsize(left.resultdef);
  1004. secondpass(left);
  1005. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
  1006. location:=left.location;
  1007. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1008. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,location.register);
  1009. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,opsize,tcgsize2size[opsize]*8-1,left.location.register);
  1010. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_XOR,opsize,left.location.register,location.register);
  1011. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_SUB,opsize,left.location.register,location.register);
  1012. if cs_check_overflow in current_settings.localswitches then
  1013. begin
  1014. current_asmdata.getjumplabel(hl);
  1015. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl);
  1016. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  1017. cg.a_label(current_asmdata.CurrAsmList,hl);
  1018. end;
  1019. end
  1020. else
  1021. {$endif i8086 or i386}
  1022. begin
  1023. opsize:=def_cgsize(left.resultdef);
  1024. secondpass(left);
  1025. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
  1026. hregister:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1027. location:=left.location;
  1028. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1029. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,hregister);
  1030. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,location.register);
  1031. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  1032. emit_reg(A_NEG,tcgsize2opsize[opsize],hregister);
  1033. if cs_check_overflow in current_settings.localswitches then
  1034. begin
  1035. current_asmdata.getjumplabel(hl);
  1036. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl);
  1037. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  1038. cg.a_label(current_asmdata.CurrAsmList,hl);
  1039. end;
  1040. hp:=taicpu.op_reg_reg(A_CMOVcc,tcgsize2opsize[opsize],hregister,location.register);
  1041. hp.condition:=C_NS;
  1042. cg.a_reg_dealloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  1043. current_asmdata.CurrAsmList.concat(hp);
  1044. end;
  1045. end;
  1046. {*****************************************************************************
  1047. INCLUDE/EXCLUDE GENERIC HANDLING
  1048. *****************************************************************************}
  1049. procedure tx86inlinenode.second_IncludeExclude;
  1050. var
  1051. hregister,
  1052. hregister2: tregister;
  1053. setbase : aint;
  1054. bitsperop,l : longint;
  1055. cgop : topcg;
  1056. asmop : tasmop;
  1057. opdef : tdef;
  1058. opsize,
  1059. orgsize: tcgsize;
  1060. begin
  1061. {$ifdef i8086}
  1062. { BTS and BTR are 386+ }
  1063. if current_settings.cputype < cpu_386 then
  1064. {$else i8086}
  1065. { bts on memory locations is very slow, so even the default code is faster }
  1066. if not(cs_opt_size in current_settings.optimizerswitches) and (tcallparanode(tcallparanode(left).right).left.expectloc<>LOC_CONSTANT) and
  1067. (tcallparanode(left).left.expectloc=LOC_REFERENCE) then
  1068. {$endif i8086}
  1069. begin
  1070. inherited;
  1071. exit;
  1072. end;
  1073. if is_smallset(tcallparanode(left).resultdef) then
  1074. begin
  1075. opdef:=tcallparanode(left).resultdef;
  1076. opsize:=int_cgsize(opdef.size)
  1077. end
  1078. else
  1079. begin
  1080. opdef:=u32inttype;
  1081. opsize:=OS_32;
  1082. end;
  1083. bitsperop:=(8*tcgsize2size[opsize]);
  1084. secondpass(tcallparanode(left).left);
  1085. secondpass(tcallparanode(tcallparanode(left).right).left);
  1086. setbase:=tsetdef(tcallparanode(left).left.resultdef).setbase;
  1087. if tcallparanode(tcallparanode(left).right).left.location.loc=LOC_CONSTANT then
  1088. begin
  1089. { calculate bit position }
  1090. l:=1 shl ((tcallparanode(tcallparanode(left).right).left.location.value-setbase) mod bitsperop);
  1091. { determine operator }
  1092. if inlinenumber=in_include_x_y then
  1093. cgop:=OP_OR
  1094. else
  1095. begin
  1096. cgop:=OP_AND;
  1097. l:=not(l);
  1098. end;
  1099. case tcallparanode(left).left.location.loc of
  1100. LOC_REFERENCE :
  1101. begin
  1102. inc(tcallparanode(left).left.location.reference.offset,
  1103. ((tcallparanode(tcallparanode(left).right).left.location.value-setbase) div bitsperop)*tcgsize2size[opsize]);
  1104. cg.a_op_const_ref(current_asmdata.CurrAsmList,cgop,opsize,l,tcallparanode(left).left.location.reference);
  1105. end;
  1106. LOC_CSUBSETREG,
  1107. LOC_CREGISTER :
  1108. hlcg.a_op_const_loc(current_asmdata.CurrAsmList,cgop,tcallparanode(left).left.resultdef,l,tcallparanode(left).left.location);
  1109. else
  1110. internalerror(200405022);
  1111. end;
  1112. end
  1113. else
  1114. begin
  1115. orgsize:=opsize;
  1116. if opsize in [OS_8,OS_S8] then
  1117. begin
  1118. opdef:=u32inttype;
  1119. opsize:=OS_32;
  1120. end;
  1121. { determine asm operator }
  1122. if inlinenumber=in_include_x_y then
  1123. asmop:=A_BTS
  1124. else
  1125. asmop:=A_BTR;
  1126. hlcg.location_force_reg(current_asmdata.CurrAsmList,tcallparanode(tcallparanode(left).right).left.location,tcallparanode(tcallparanode(left).right).left.resultdef,opdef,true);
  1127. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,tcallparanode(tcallparanode(left).right).left.resultdef,tcallparanode(tcallparanode(left).right).left.location,setbase);
  1128. hregister:=tcallparanode(tcallparanode(left).right).left.location.register;
  1129. if tcallparanode(left).left.location.loc=LOC_REFERENCE then
  1130. emit_reg_ref(asmop,tcgsize2opsize[opsize],hregister,tcallparanode(left).left.location.reference)
  1131. else
  1132. begin
  1133. { second argument can't be an 8 bit register either }
  1134. hregister2:=tcallparanode(left).left.location.register;
  1135. if (orgsize in [OS_8,OS_S8]) then
  1136. hregister2:=cg.makeregsize(current_asmdata.CurrAsmList,hregister2,opsize);
  1137. emit_reg_reg(asmop,tcgsize2opsize[opsize],hregister,hregister2);
  1138. end;
  1139. end;
  1140. end;
  1141. procedure tx86inlinenode.second_popcnt;
  1142. var
  1143. opsize: tcgsize;
  1144. begin
  1145. secondpass(left);
  1146. opsize:=tcgsize2unsigned[left.location.size];
  1147. { no 8 Bit popcont }
  1148. if opsize=OS_8 then
  1149. opsize:=OS_16;
  1150. if not(left.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_REFERENCE,LOC_CREFERENCE]) or
  1151. (left.location.size<>opsize) then
  1152. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,cgsize_orddef(opsize),true);
  1153. location_reset(location,LOC_REGISTER,opsize);
  1154. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1155. if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
  1156. emit_reg_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.register,location.register)
  1157. else
  1158. emit_ref_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.reference,location.register);
  1159. if resultdef.size=1 then
  1160. begin
  1161. location.size:=OS_8;
  1162. location.register:=cg.makeregsize(current_asmdata.CurrAsmList,location.register,location.size);
  1163. end;
  1164. end;
  1165. procedure tx86inlinenode.second_fma;
  1166. {$ifndef i8086}
  1167. const
  1168. op : array[false..true,false..true,s32real..s64real,0..3] of TAsmOp =
  1169. (
  1170. { positive product }
  1171. (
  1172. { positive third operand }
  1173. ((A_VFMADD231SS,A_VFMADD231SS,A_VFMADD231SS,A_VFMADD213SS),
  1174. (A_VFMADD231SD,A_VFMADD231SD,A_VFMADD231SD,A_VFMADD213SD)
  1175. ),
  1176. { negative third operand }
  1177. ((A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB213SS),
  1178. (A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB213SD)
  1179. )
  1180. ),
  1181. { negative product }
  1182. (
  1183. { positive third operand }
  1184. ((A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD213SS),
  1185. (A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD213SD)
  1186. ),
  1187. { negative third operand }
  1188. ((A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB213SS),
  1189. (A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB213SD)
  1190. )
  1191. )
  1192. );
  1193. var
  1194. paraarray : array[1..3] of tnode;
  1195. memop,
  1196. i : integer;
  1197. negop3,
  1198. negproduct,
  1199. gotmem : boolean;
  1200. {$endif i8086}
  1201. begin
  1202. {$ifndef i8086}
  1203. if (fpu_capabilities[current_settings.fputype]*[FPUX86_HAS_FMA,FPUX86_HAS_FMA4])<>[] then
  1204. begin
  1205. negop3:=false;
  1206. negproduct:=false;
  1207. paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
  1208. paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  1209. paraarray[3]:=tcallparanode(parameters).paravalue;
  1210. { check if a neg. node can be removed
  1211. this is possible because changing the sign of
  1212. a floating point number does not affect its absolute
  1213. value in any way
  1214. }
  1215. if paraarray[1].nodetype=unaryminusn then
  1216. begin
  1217. paraarray[1]:=tunarynode(paraarray[1]).left;
  1218. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1219. only no code is generated for it }
  1220. negproduct:=not(negproduct);
  1221. end;
  1222. if paraarray[2].nodetype=unaryminusn then
  1223. begin
  1224. paraarray[2]:=tunarynode(paraarray[2]).left;
  1225. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1226. only no code is generated for it }
  1227. negproduct:=not(negproduct);
  1228. end;
  1229. if paraarray[3].nodetype=unaryminusn then
  1230. begin
  1231. paraarray[3]:=tunarynode(paraarray[3]).left;
  1232. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1233. only no code is generated for it }
  1234. negop3:=true;
  1235. end;
  1236. for i:=1 to 3 do
  1237. secondpass(paraarray[i]);
  1238. { only one memory operand is allowed }
  1239. gotmem:=false;
  1240. memop:=0;
  1241. { in case parameters come on the FPU stack, we have to pop them in reverse order as we
  1242. called secondpass }
  1243. for i:=3 downto 1 do
  1244. begin
  1245. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1246. begin
  1247. if (paraarray[i].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and not(gotmem) then
  1248. begin
  1249. memop:=i;
  1250. gotmem:=true;
  1251. end
  1252. else
  1253. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
  1254. end;
  1255. end;
  1256. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  1257. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1258. if gotmem then
  1259. begin
  1260. case memop of
  1261. 1:
  1262. begin
  1263. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1264. paraarray[3].location.register,location.register,mms_movescalar);
  1265. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1266. paraarray[1].location.reference,paraarray[2].location.register,location.register);
  1267. end;
  1268. 2:
  1269. begin
  1270. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1271. paraarray[3].location.register,location.register,mms_movescalar);
  1272. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1273. paraarray[2].location.reference,paraarray[1].location.register,location.register);
  1274. end;
  1275. 3:
  1276. begin
  1277. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1278. paraarray[1].location.register,location.register,mms_movescalar);
  1279. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1280. paraarray[3].location.reference,paraarray[2].location.register,location.register);
  1281. end
  1282. else
  1283. internalerror(2014041301);
  1284. end;
  1285. end
  1286. else
  1287. begin
  1288. { try to use the location which is already in a temp. mm register as destination,
  1289. so the compiler might be able to re-use the register }
  1290. if paraarray[1].location.loc=LOC_MMREGISTER then
  1291. begin
  1292. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1293. paraarray[1].location.register,location.register,mms_movescalar);
  1294. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
  1295. paraarray[3].location.register,paraarray[2].location.register,location.register);
  1296. end
  1297. else if paraarray[2].location.loc=LOC_MMREGISTER then
  1298. begin
  1299. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[2].resultdef,resultdef,
  1300. paraarray[2].location.register,location.register,mms_movescalar);
  1301. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
  1302. paraarray[3].location.register,paraarray[1].location.register,location.register);
  1303. end
  1304. else
  1305. begin
  1306. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1307. paraarray[3].location.register,location.register,mms_movescalar);
  1308. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,0],S_NO,
  1309. paraarray[1].location.register,paraarray[2].location.register,location.register);
  1310. end;
  1311. end;
  1312. end
  1313. else
  1314. {$endif i8086}
  1315. internalerror(2014032301);
  1316. end;
  1317. procedure tx86inlinenode.second_frac_real;
  1318. var
  1319. extrareg : TRegister;
  1320. begin
  1321. if use_vectorfpu(resultdef) then
  1322. begin
  1323. secondpass(left);
  1324. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1325. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  1326. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1327. if UseAVX then
  1328. case tfloatdef(left.resultdef).floattype of
  1329. s32real:
  1330. begin
  1331. {$ifndef i8086}
  1332. if UseAVX512 and (FPUX86_HAS_AVX512DQ in fpu_capabilities[current_settings.fputype]) then
  1333. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VREDUCESS,S_NO,3,left.location.register,left.location.register,location.register))
  1334. else
  1335. {$endif not i8086}
  1336. begin
  1337. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1338. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSS,S_NO,3,left.location.register,left.location.register,location.register));
  1339. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSS,S_NO,location.register,left.location.register,location.register));
  1340. end;
  1341. end;
  1342. s64real:
  1343. begin
  1344. {$ifndef i8086}
  1345. if UseAVX512 and (FPUX86_HAS_AVX512DQ in fpu_capabilities[current_settings.fputype]) then
  1346. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VREDUCESD,S_NO,3,left.location.register,left.location.register,location.register))
  1347. else
  1348. {$endif not i8086}
  1349. begin
  1350. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1351. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSD,S_NO,3,left.location.register,left.location.register,location.register));
  1352. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSD,S_NO,location.register,left.location.register,location.register));
  1353. end;
  1354. end;
  1355. else
  1356. internalerror(2017052102);
  1357. end
  1358. else
  1359. begin
  1360. extrareg:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1361. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  1362. case tfloatdef(left.resultdef).floattype of
  1363. s32real:
  1364. begin
  1365. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSS,S_NO,3,left.location.register,extrareg));
  1366. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SUBSS,S_NO,extrareg,location.register));
  1367. end;
  1368. s64real:
  1369. begin
  1370. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSD,S_NO,3,left.location.register,extrareg));
  1371. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SUBSD,S_NO,extrareg,location.register));
  1372. end;
  1373. else
  1374. internalerror(2017052103);
  1375. end;
  1376. end;
  1377. if tfloatdef(left.resultdef).floattype<>tfloatdef(resultdef).floattype then
  1378. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,left.resultdef,resultdef,location.register,location.register,mms_movescalar);
  1379. end
  1380. else
  1381. internalerror(2017052101);
  1382. end;
  1383. procedure tx86inlinenode.second_int_real;
  1384. begin
  1385. if use_vectorfpu(resultdef) then
  1386. begin
  1387. secondpass(left);
  1388. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1389. location_reset(location,LOC_MMREGISTER,left.location.size);
  1390. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1391. if UseAVX then
  1392. case tfloatdef(resultdef).floattype of
  1393. s32real:
  1394. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1395. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSS,S_NO,3,left.location.register,left.location.register,location.register));
  1396. s64real:
  1397. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1398. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSD,S_NO,3,left.location.register,left.location.register,location.register));
  1399. else
  1400. internalerror(2017052105);
  1401. end
  1402. else
  1403. begin
  1404. case tfloatdef(resultdef).floattype of
  1405. s32real:
  1406. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSS,S_NO,3,left.location.register,location.register));
  1407. s64real:
  1408. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSD,S_NO,3,left.location.register,location.register));
  1409. else
  1410. internalerror(2017052106);
  1411. end;
  1412. end;
  1413. end
  1414. else
  1415. internalerror(2017052107);
  1416. end;
  1417. procedure tx86inlinenode.second_high;
  1418. var
  1419. donelab: tasmlabel;
  1420. hregister : tregister;
  1421. href : treference;
  1422. begin
  1423. secondpass(left);
  1424. if not(is_dynamic_array(left.resultdef)) then
  1425. Internalerror(2019122809);
  1426. { length in dynamic arrays is at offset -sizeof(pint) }
  1427. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
  1428. current_asmdata.getjumplabel(donelab);
  1429. { by subtracting 1 here, we get the -1 into the register we need if the dyn. array is nil and the carry
  1430. flag is set in this case, so we can jump depending on it
  1431. when loading the actual high value, we have to take care later of the decreased value
  1432. do not use the cgs, as they might emit dec instead of a sub instruction, however with dec the trick
  1433. we are using is not working as dec does not touch the carry flag }
  1434. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_SUB,TCGSize2OpSize[def_cgsize(left.resultdef)],1,left.location.register));
  1435. { volatility of the dyn. array refers to the volatility of the
  1436. string pointer, not of the string data }
  1437. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_C,donelab);
  1438. hlcg.reference_reset_base(href,left.resultdef,left.location.register,-ossinttype.size+1,ctempposinvalid,ossinttype.alignment,[]);
  1439. { if the string pointer is nil, the length is 0 -> reuse the register
  1440. that originally held the string pointer for the length, so that we
  1441. can keep the original nil/0 as length in that case }
  1442. hregister:=cg.makeregsize(current_asmdata.CurrAsmList,left.location.register,def_cgsize(resultdef));
  1443. hlcg.a_load_ref_reg(current_asmdata.CurrAsmList,ossinttype,resultdef,href,hregister);
  1444. cg.a_label(current_asmdata.CurrAsmList,donelab);
  1445. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  1446. location.register:=hregister;
  1447. end;
  1448. procedure tx86inlinenode.second_minmax;
  1449. {$ifndef i8086}
  1450. const
  1451. oparray : array[false..true,false..true,s32real..s64real] of TAsmOp =
  1452. (
  1453. (
  1454. (A_MINSS,A_MINSD),
  1455. (A_VMINSS,A_VMINSD)
  1456. ),
  1457. (
  1458. (A_MAXSS,A_MAXSD),
  1459. (A_VMAXSS,A_VMAXSD)
  1460. )
  1461. );
  1462. {$endif i8086}
  1463. var
  1464. {$ifndef i8086}
  1465. memop : integer;
  1466. gotmem : boolean;
  1467. op: TAsmOp;
  1468. {$endif i8086}
  1469. i : integer;
  1470. paraarray : array[1..2] of tnode;
  1471. instr: TAiCpu;
  1472. opsize: topsize;
  1473. finalval: TCgInt;
  1474. tmpreg: TRegister;
  1475. begin
  1476. {$ifndef i8086}
  1477. if
  1478. {$ifdef i386}
  1479. ((current_settings.fputype>=fpu_sse) and is_single(resultdef)) or
  1480. ((current_settings.fputype>=fpu_sse2) and is_double(resultdef))
  1481. {$else i386}
  1482. is_single(resultdef) or is_double(resultdef)
  1483. {$endif i386}
  1484. then
  1485. begin
  1486. paraarray[1]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  1487. paraarray[2]:=tcallparanode(parameters).paravalue;
  1488. for i:=low(paraarray) to high(paraarray) do
  1489. secondpass(paraarray[i]);
  1490. { only one memory operand is allowed }
  1491. gotmem:=false;
  1492. memop:=0;
  1493. for i:=low(paraarray) to high(paraarray) do
  1494. begin
  1495. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1496. begin
  1497. if (paraarray[i].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and not(gotmem) then
  1498. begin
  1499. memop:=i;
  1500. gotmem:=true;
  1501. end
  1502. else
  1503. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
  1504. end;
  1505. end;
  1506. { due to min/max behaviour that it loads always the second operand (must be the else assignment) into destination if
  1507. one of the operands is a NaN, we cannot swap operands to omit a mova operation in case fastmath is off }
  1508. if not(cs_opt_fastmath in current_settings.optimizerswitches) and gotmem and (memop=1) then
  1509. begin
  1510. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[1].location,paraarray[1].resultdef,true);
  1511. gotmem:=false;
  1512. end;
  1513. op:=oparray[inlinenumber in [in_max_single,in_max_double],UseAVX,tfloatdef(resultdef).floattype];
  1514. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  1515. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1516. if gotmem then
  1517. begin
  1518. if UseAVX then
  1519. case memop of
  1520. 1:
  1521. emit_ref_reg_reg(op,S_NO,
  1522. paraarray[1].location.reference,paraarray[2].location.register,location.register);
  1523. 2:
  1524. emit_ref_reg_reg(op,S_NO,
  1525. paraarray[2].location.reference,paraarray[1].location.register,location.register);
  1526. else
  1527. internalerror(2020120504);
  1528. end
  1529. else
  1530. case memop of
  1531. 1:
  1532. begin
  1533. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[2].resultdef,resultdef,
  1534. paraarray[2].location.register,location.register,mms_movescalar);
  1535. emit_ref_reg(op,S_NO,
  1536. paraarray[1].location.reference,location.register);
  1537. end;
  1538. 2:
  1539. begin
  1540. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1541. paraarray[1].location.register,location.register,mms_movescalar);
  1542. emit_ref_reg(op,S_NO,
  1543. paraarray[2].location.reference,location.register);
  1544. end;
  1545. else
  1546. internalerror(2020120601);
  1547. end;
  1548. end
  1549. else
  1550. begin
  1551. if UseAVX then
  1552. emit_reg_reg_reg(op,S_NO,
  1553. paraarray[2].location.register,paraarray[1].location.register,location.register)
  1554. else
  1555. begin
  1556. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1557. paraarray[1].location.register,location.register,mms_movescalar);
  1558. emit_reg_reg(op,S_NO,
  1559. paraarray[2].location.register,location.register)
  1560. end;
  1561. end;
  1562. end
  1563. else
  1564. {$endif i8086}
  1565. if
  1566. {$ifndef x86_64}
  1567. (CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) and
  1568. {$endif x86_64}
  1569. (
  1570. {$ifdef x86_64}
  1571. is_64bitint(resultdef) or
  1572. {$endif x86_64}
  1573. is_32bitint(resultdef)
  1574. ) then
  1575. begin
  1576. { paraarray[1] is the right-hand side }
  1577. paraarray[1]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  1578. paraarray[2]:=tcallparanode(parameters).paravalue;
  1579. for i:=low(paraarray) to high(paraarray) do
  1580. secondpass(paraarray[i]);
  1581. if paraarray[2].location.loc = LOC_CONSTANT then
  1582. begin
  1583. { Swap the parameters so the constant is on the right }
  1584. paraarray[2]:=paraarray[1];
  1585. paraarray[1]:=tcallparanode(parameters).paravalue;
  1586. end;
  1587. if not(paraarray[1].location.loc in [LOC_CONSTANT,LOC_REFERENCE,LOC_CREFERENCE,LOC_REGISTER,LOC_CREGISTER]) then
  1588. hlcg.location_force_reg(current_asmdata.CurrAsmList,paraarray[1].location,
  1589. paraarray[1].resultdef,paraarray[1].resultdef,true);
  1590. if not(paraarray[2].location.loc in [LOC_REFERENCE,LOC_CREFERENCE,LOC_REGISTER,LOC_CREGISTER]) then
  1591. hlcg.location_force_reg(current_asmdata.CurrAsmList,paraarray[2].location,
  1592. paraarray[2].resultdef,paraarray[2].resultdef,true);
  1593. location_reset(location,LOC_REGISTER,paraarray[1].location.size);
  1594. location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
  1595. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,paraarray[1].location,location.register);
  1596. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  1597. {$ifdef x86_64}
  1598. if is_64bitint(resultdef) then
  1599. opsize := S_Q
  1600. else
  1601. {$endif x86_64}
  1602. opsize := S_L;
  1603. { Try to use references as is, unless they would trigger internal
  1604. error 200502052 }
  1605. if (cs_create_pic in current_settings.moduleswitches) and
  1606. (paraarray[1].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and
  1607. Assigned(paraarray[1].location.reference.symbol) then
  1608. hlcg.location_force_reg(current_asmdata.CurrAsmList,paraarray[1].location,
  1609. paraarray[1].resultdef,paraarray[1].resultdef,true);
  1610. { Try to use references as is, unless they would trigger internal
  1611. error 200502052 }
  1612. if (cs_create_pic in current_settings.moduleswitches) and
  1613. (paraarray[2].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and
  1614. Assigned(paraarray[2].location.reference.symbol) then
  1615. hlcg.location_force_reg(current_asmdata.CurrAsmList,paraarray[2].location,
  1616. paraarray[2].resultdef,paraarray[2].resultdef,true);
  1617. case paraarray[1].location.loc of
  1618. LOC_CONSTANT:
  1619. case paraarray[2].location.loc of
  1620. LOC_REFERENCE,LOC_CREFERENCE:
  1621. begin
  1622. {$ifdef x86_64}
  1623. { x86_64 only supports signed 32 bits constants directly }
  1624. if (opsize=S_Q) and
  1625. ((paraarray[1].location.value<low(longint)) or (paraarray[1].location.value>high(longint))) then
  1626. begin
  1627. tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  1628. hlcg.a_load_const_reg(current_asmdata.CurrAsmList,resultdef,paraarray[1].location.value,tmpreg);
  1629. emit_reg_ref(A_CMP,opsize,tmpreg,paraarray[2].location.reference);
  1630. end
  1631. else
  1632. {$endif x86_64}
  1633. emit_const_ref(A_CMP,opsize,paraarray[1].location.value,paraarray[2].location.reference);
  1634. emit_ref_reg(A_CMOVcc,opsize,paraarray[2].location.reference,location.register);
  1635. instr:=TAiCpu(current_asmdata.CurrAsmList.Last); { The instruction just inserted; we need to modify its condition below }
  1636. end;
  1637. LOC_REGISTER,LOC_CREGISTER:
  1638. begin
  1639. {$ifdef x86_64}
  1640. { x86_64 only supports signed 32 bits constants directly }
  1641. if (opsize=S_Q) and
  1642. ((paraarray[1].location.value<low(longint)) or (paraarray[1].location.value>high(longint))) then
  1643. begin
  1644. tmpreg:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  1645. hlcg.a_load_const_reg(current_asmdata.CurrAsmList,resultdef,paraarray[1].location.value,tmpreg);
  1646. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,opsize,
  1647. tmpreg,paraarray[2].location.register));
  1648. end
  1649. else
  1650. {$endif x86_64}
  1651. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_CMP,opsize,
  1652. paraarray[1].location.value,paraarray[2].location.register));
  1653. instr:=TAiCpu.op_reg_reg(A_CMOVcc,opsize,paraarray[2].location.register,location.register);
  1654. current_asmdata.CurrAsmList.concat(instr); { We need to modify the instruction's condition below }
  1655. end;
  1656. else
  1657. InternalError(2021121907);
  1658. end;
  1659. LOC_REFERENCE,LOC_CREFERENCE:
  1660. case paraarray[2].location.loc of
  1661. LOC_REFERENCE,LOC_CREFERENCE:
  1662. begin
  1663. { The reference has already been stored at location.register, so use that }
  1664. emit_reg_ref(A_CMP,opsize,location.register,paraarray[2].location.reference);
  1665. emit_ref_reg(A_CMOVcc,opsize,paraarray[2].location.reference,location.register);
  1666. instr:=TAiCpu(current_asmdata.CurrAsmList.Last); { The instruction just inserted; we need to modify its condition below }
  1667. end;
  1668. LOC_REGISTER,LOC_CREGISTER:
  1669. begin
  1670. emit_ref_reg(A_CMP,opsize,paraarray[1].location.reference,paraarray[2].location.register);
  1671. instr:=TAiCpu.op_reg_reg(A_CMOVcc,opsize,paraarray[2].location.register,location.register);
  1672. current_asmdata.CurrAsmList.concat(instr); { We need to modify the instruction's condition below }
  1673. end;
  1674. else
  1675. InternalError(2021121906);
  1676. end;
  1677. LOC_REGISTER,LOC_CREGISTER:
  1678. case paraarray[2].location.loc of
  1679. LOC_REFERENCE,LOC_CREFERENCE:
  1680. begin
  1681. emit_reg_ref(A_CMP,opsize,paraarray[1].location.register,paraarray[2].location.reference);
  1682. emit_ref_reg(A_CMOVcc,opsize,paraarray[2].location.reference,location.register);
  1683. instr:=TAiCpu(current_asmdata.CurrAsmList.Last); { The instruction just inserted; we need to modify its condition below }
  1684. end;
  1685. LOC_REGISTER,LOC_CREGISTER:
  1686. begin
  1687. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,opsize,
  1688. paraarray[1].location.register,paraarray[2].location.register));
  1689. instr:=TAiCpu.op_reg_reg(A_CMOVcc,opsize,paraarray[2].location.register,location.register);
  1690. current_asmdata.CurrAsmList.concat(instr); { We need to modify the instruction's condition below }
  1691. end;
  1692. else
  1693. InternalError(2021121905);
  1694. end;
  1695. else
  1696. InternalError(2021121904);
  1697. end;
  1698. case inlinenumber of
  1699. in_min_longint,
  1700. in_min_int64:
  1701. instr.condition := C_L;
  1702. in_min_dword,
  1703. in_min_qword:
  1704. instr.condition := C_B;
  1705. in_max_longint,
  1706. in_max_int64:
  1707. instr.condition := C_G;
  1708. in_max_dword,
  1709. in_max_qword:
  1710. instr.condition := C_A;
  1711. else
  1712. Internalerror(2021121903);
  1713. end;
  1714. cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  1715. end
  1716. else
  1717. internalerror(2020120503);
  1718. end;
  1719. end.