nx86inl.pas 72 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835
  1. {
  2. Copyright (c) 1998-2002 by Florian Klaempfl
  3. Generate x86 inline nodes
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  15. ****************************************************************************
  16. }
  17. unit nx86inl;
  18. {$i fpcdefs.inc}
  19. interface
  20. uses
  21. node,ninl,ncginl;
  22. type
  23. tx86inlinenode = class(tcginlinenode)
  24. protected
  25. procedure maybe_remove_round_trunc_typeconv; virtual;
  26. public
  27. function pass_typecheck_cpu:tnode;override;
  28. { first pass override
  29. so that the code generator will actually generate
  30. these nodes.
  31. }
  32. function first_cpu: tnode;override;
  33. function first_pi: tnode ; override;
  34. function first_arctan_real: tnode; override;
  35. function first_abs_real: tnode; override;
  36. function first_sqr_real: tnode; override;
  37. function first_sqrt_real: tnode; override;
  38. function first_ln_real: tnode; override;
  39. function first_cos_real: tnode; override;
  40. function first_sin_real: tnode; override;
  41. function first_round_real: tnode; override;
  42. function first_trunc_real: tnode; override;
  43. function first_popcnt: tnode; override;
  44. function first_fma: tnode; override;
  45. function first_frac_real : tnode; override;
  46. function first_int_real : tnode; override;
  47. function first_minmax: tnode; override;
  48. function simplify(forinline : boolean) : tnode; override;
  49. { second pass override to generate these nodes }
  50. procedure pass_generate_code_cpu;override;
  51. procedure second_IncludeExclude;override;
  52. procedure second_AndOrXorShiftRot_assign;override;
  53. procedure second_pi; override;
  54. procedure second_arctan_real; override;
  55. procedure second_abs_real; override;
  56. procedure second_round_real; override;
  57. procedure second_sqr_real; override;
  58. procedure second_sqrt_real; override;
  59. procedure second_ln_real; override;
  60. procedure second_cos_real; override;
  61. procedure second_sin_real; override;
  62. procedure second_trunc_real; override;
  63. procedure second_prefetch;override;
  64. procedure second_abs_long;override;
  65. procedure second_popcnt;override;
  66. procedure second_fma;override;
  67. procedure second_frac_real;override;
  68. procedure second_int_real;override;
  69. procedure second_high;override;
  70. procedure second_minmax;override;
  71. private
  72. procedure load_fpu_location(lnode: tnode);
  73. end;
  74. implementation
  75. uses
  76. systems,
  77. globtype,globals,
  78. verbose,compinnr,fmodule,
  79. defutil,
  80. aasmbase,aasmdata,aasmcpu,
  81. symconst,symtype,symdef,symcpu,
  82. ncnv,
  83. htypechk,
  84. cgbase,pass_1,pass_2,
  85. cpuinfo,cpubase,nutils,
  86. ncal,ncgutil,nld,ncon,nadd,nmat,constexp,
  87. tgobj,
  88. cga,cgutils,cgx86,cgobj,hlcgobj,cutils;
  89. {*****************************************************************************
  90. TX86INLINENODE
  91. *****************************************************************************}
  92. procedure tx86inlinenode.maybe_remove_round_trunc_typeconv;
  93. begin
  94. { only makes a difference for x86_64 }
  95. end;
  96. function tx86inlinenode.pass_typecheck_cpu: tnode;
  97. begin
  98. Result:=nil;
  99. case inlinenumber of
  100. in_x86_inportb:
  101. begin
  102. CheckParameters(1);
  103. resultdef:=u8inttype;
  104. end;
  105. in_x86_inportw:
  106. begin
  107. CheckParameters(1);
  108. resultdef:=u16inttype;
  109. end;
  110. in_x86_inportl:
  111. begin
  112. CheckParameters(1);
  113. resultdef:=s32inttype;
  114. end;
  115. in_x86_outportb,
  116. in_x86_outportw,
  117. in_x86_outportl:
  118. begin
  119. CheckParameters(2);
  120. resultdef:=voidtype;
  121. end;
  122. in_x86_cli,
  123. in_x86_sti:
  124. resultdef:=voidtype;
  125. in_x86_get_cs,
  126. in_x86_get_ss,
  127. in_x86_get_ds,
  128. in_x86_get_es,
  129. in_x86_get_fs,
  130. in_x86_get_gs:
  131. {$ifdef i8086}
  132. resultdef:=u16inttype;
  133. {$else i8086}
  134. resultdef:=s32inttype;
  135. {$endif i8086}
  136. { include automatically generated code }
  137. {$i x86mmtype.inc}
  138. else
  139. Result:=inherited pass_typecheck_cpu;
  140. end;
  141. end;
  142. function tx86inlinenode.first_cpu: tnode;
  143. begin
  144. Result:=nil;
  145. case inlinenumber of
  146. in_x86_inportb,
  147. in_x86_inportw,
  148. in_x86_inportl,
  149. in_x86_get_cs,
  150. in_x86_get_ss,
  151. in_x86_get_ds,
  152. in_x86_get_es,
  153. in_x86_get_fs,
  154. in_x86_get_gs:
  155. expectloc:=LOC_REGISTER;
  156. in_x86_outportb,
  157. in_x86_outportw,
  158. in_x86_outportl,
  159. in_x86_cli,
  160. in_x86_sti:
  161. expectloc:=LOC_VOID;
  162. { include automatically generated code }
  163. {$i x86mmfirst.inc}
  164. else
  165. Result:=inherited first_cpu;
  166. end;
  167. end;
  168. function tx86inlinenode.first_pi : tnode;
  169. begin
  170. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  171. begin
  172. expectloc:=LOC_FPUREGISTER;
  173. first_pi := nil;
  174. end
  175. else
  176. result:=inherited;
  177. end;
  178. function tx86inlinenode.first_arctan_real : tnode;
  179. begin
  180. {$ifdef i8086}
  181. { FPATAN's range is limited to (0 <= value < 1) on the 8087 and 80287,
  182. so we need to use the RTL helper on these FPUs }
  183. if current_settings.cputype < cpu_386 then
  184. begin
  185. result := inherited;
  186. exit;
  187. end;
  188. {$endif i8086}
  189. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  190. begin
  191. expectloc:=LOC_FPUREGISTER;
  192. first_arctan_real := nil;
  193. end
  194. else
  195. result:=inherited;
  196. end;
  197. function tx86inlinenode.first_abs_real : tnode;
  198. begin
  199. if use_vectorfpu(resultdef) then
  200. expectloc:=LOC_MMREGISTER
  201. else
  202. expectloc:=LOC_FPUREGISTER;
  203. first_abs_real := nil;
  204. end;
  205. function tx86inlinenode.first_sqr_real : tnode;
  206. begin
  207. if use_vectorfpu(resultdef) then
  208. expectloc:=LOC_MMREGISTER
  209. else
  210. expectloc:=LOC_FPUREGISTER;
  211. first_sqr_real := nil;
  212. end;
  213. function tx86inlinenode.first_sqrt_real : tnode;
  214. begin
  215. if use_vectorfpu(resultdef) then
  216. expectloc:=LOC_MMREGISTER
  217. else
  218. expectloc:=LOC_FPUREGISTER;
  219. first_sqrt_real := nil;
  220. end;
  221. function tx86inlinenode.first_ln_real : tnode;
  222. begin
  223. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  224. begin
  225. expectloc:=LOC_FPUREGISTER;
  226. first_ln_real := nil;
  227. end
  228. else
  229. result:=inherited;
  230. end;
  231. function tx86inlinenode.first_cos_real : tnode;
  232. begin
  233. {$ifdef i8086}
  234. { FCOS is 387+ }
  235. if current_settings.cputype < cpu_386 then
  236. begin
  237. result := inherited;
  238. exit;
  239. end;
  240. {$endif i8086}
  241. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  242. begin
  243. expectloc:=LOC_FPUREGISTER;
  244. result:=nil;
  245. end
  246. else
  247. result:=inherited;
  248. end;
  249. function tx86inlinenode.first_sin_real : tnode;
  250. begin
  251. {$ifdef i8086}
  252. { FSIN is 387+ }
  253. if current_settings.cputype < cpu_386 then
  254. begin
  255. result := inherited;
  256. exit;
  257. end;
  258. {$endif i8086}
  259. if (tfloatdef(pbestrealtype^).floattype=s80real) then
  260. begin
  261. expectloc:=LOC_FPUREGISTER;
  262. result:=nil;
  263. end
  264. else
  265. result:=inherited;
  266. end;
  267. function tx86inlinenode.first_round_real : tnode;
  268. begin
  269. maybe_remove_round_trunc_typeconv;
  270. {$ifdef x86_64}
  271. if use_vectorfpu(left.resultdef) then
  272. expectloc:=LOC_REGISTER
  273. else
  274. {$endif x86_64}
  275. expectloc:=LOC_REFERENCE;
  276. result:=nil;
  277. end;
  278. function tx86inlinenode.first_trunc_real: tnode;
  279. begin
  280. maybe_remove_round_trunc_typeconv;
  281. if (cs_opt_size in current_settings.optimizerswitches)
  282. {$ifdef x86_64}
  283. and not(use_vectorfpu(left.resultdef))
  284. {$endif x86_64}
  285. then
  286. result:=inherited
  287. else
  288. begin
  289. {$ifdef x86_64}
  290. if use_vectorfpu(left.resultdef) then
  291. expectloc:=LOC_REGISTER
  292. else
  293. {$endif x86_64}
  294. expectloc:=LOC_REFERENCE;
  295. result:=nil;
  296. end;
  297. end;
  298. function tx86inlinenode.first_popcnt: tnode;
  299. begin
  300. Result:=nil;
  301. {$ifndef i8086}
  302. if (CPUX86_HAS_POPCNT in cpu_capabilities[current_settings.cputype])
  303. {$ifdef i386}
  304. and not is_64bit(left.resultdef)
  305. {$endif i386}
  306. then
  307. expectloc:=LOC_REGISTER
  308. else
  309. {$endif not i8086}
  310. Result:=inherited first_popcnt
  311. end;
  312. function tx86inlinenode.first_fma : tnode;
  313. begin
  314. {$ifndef i8086}
  315. if ((fpu_capabilities[current_settings.fputype]*[FPUX86_HAS_FMA,FPUX86_HAS_FMA4])<>[]) and
  316. ((is_double(resultdef)) or (is_single(resultdef))) then
  317. begin
  318. expectloc:=LOC_MMREGISTER;
  319. Result:=nil;
  320. end
  321. else
  322. {$endif i8086}
  323. Result:=inherited first_fma;
  324. end;
  325. function tx86inlinenode.first_frac_real : tnode;
  326. begin
  327. if (current_settings.fputype>=fpu_sse41) and
  328. ((is_double(resultdef)) or (is_single(resultdef))) then
  329. begin
  330. maybe_remove_round_trunc_typeconv;
  331. expectloc:=LOC_MMREGISTER;
  332. Result:=nil;
  333. end
  334. else
  335. Result:=inherited first_frac_real;
  336. end;
  337. function tx86inlinenode.first_int_real : tnode;
  338. begin
  339. if (current_settings.fputype>=fpu_sse41) and
  340. ((is_double(resultdef)) or (is_single(resultdef))) then
  341. begin
  342. Result:=nil;
  343. expectloc:=LOC_MMREGISTER;
  344. end
  345. else
  346. Result:=inherited first_int_real;
  347. end;
  348. function tx86inlinenode.first_minmax: tnode;
  349. begin
  350. {$ifndef i8086}
  351. if
  352. {$ifdef i386}
  353. ((current_settings.fputype>=fpu_sse) and is_single(resultdef)) or
  354. ((current_settings.fputype>=fpu_sse2) and is_double(resultdef))
  355. {$else i386}
  356. ((is_double(resultdef)) or (is_single(resultdef)))
  357. {$endif i386}
  358. then
  359. begin
  360. expectloc:=LOC_MMREGISTER;
  361. Result:=nil;
  362. end
  363. else
  364. {$endif i8086}
  365. if
  366. {$ifndef x86_64}
  367. (CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) and
  368. {$endif x86_64}
  369. (
  370. {$ifdef x86_64}
  371. is_64bitint(resultdef) or
  372. {$endif x86_64}
  373. is_32bitint(resultdef)
  374. ) then
  375. begin
  376. expectloc:=LOC_REGISTER;
  377. Result:=nil;
  378. end
  379. else
  380. Result:=inherited first_minmax;
  381. end;
  382. function tx86inlinenode.simplify(forinline : boolean) : tnode;
  383. var
  384. temp : tnode;
  385. begin
  386. if (current_settings.fputype>=fpu_sse41) and
  387. (inlinenumber=in_int_real) and (left.nodetype=typeconvn) and
  388. not(nf_explicit in left.flags) and
  389. (ttypeconvnode(left).left.resultdef.typ=floatdef) and
  390. ((is_double(ttypeconvnode(left).left.resultdef)) or (is_single(ttypeconvnode(left).left.resultdef))) then
  391. begin
  392. { get rid of the type conversion }
  393. temp:=ttypeconvnode(left).left;
  394. ttypeconvnode(left).left:=nil;
  395. left.free;
  396. left:=temp;
  397. result:=self.getcopy;
  398. tinlinenode(result).resultdef:=temp.resultdef;
  399. typecheckpass(result);
  400. end
  401. else
  402. Result:=inherited simplify(forinline);
  403. end;
  404. procedure tx86inlinenode.pass_generate_code_cpu;
  405. var
  406. paraarray : array[1..4] of tnode;
  407. i : integer;
  408. op: TAsmOp;
  409. procedure inport(dreg:TRegister;dsize:topsize;dtype:tdef);
  410. var
  411. portnumber: tnode;
  412. begin
  413. portnumber:=left;
  414. secondpass(portnumber);
  415. if (portnumber.location.loc=LOC_CONSTANT) and
  416. (portnumber.location.value>=0) and
  417. (portnumber.location.value<=255) then
  418. begin
  419. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  420. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_IN,dsize,portnumber.location.value,dreg));
  421. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  422. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  423. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  424. hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,dtype,resultdef,dreg,location.register);
  425. end
  426. else
  427. begin
  428. hlcg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
  429. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portnumber.resultdef,u16inttype,portnumber.location,NR_DX);
  430. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  431. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_IN,dsize,NR_DX,dreg));
  432. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX);
  433. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  434. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  435. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  436. hlcg.a_load_reg_reg(current_asmdata.CurrAsmList,dtype,resultdef,dreg,location.register);
  437. end;
  438. end;
  439. procedure outport(dreg:TRegister;dsize:topsize;dtype:tdef);
  440. var
  441. portnumber, portdata: tnode;
  442. begin
  443. portnumber:=tcallparanode(tcallparanode(left).right).left;
  444. portdata:=tcallparanode(left).left;
  445. secondpass(portdata);
  446. secondpass(portnumber);
  447. hlcg.getcpuregister(current_asmdata.CurrAsmList,dreg);
  448. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portdata.resultdef,dtype,portdata.location,dreg);
  449. if (portnumber.location.loc=LOC_CONSTANT) and
  450. (portnumber.location.value>=0) and
  451. (portnumber.location.value<=255) then
  452. current_asmdata.CurrAsmList.concat(taicpu.op_reg_const(A_OUT,dsize,dreg,portnumber.location.value))
  453. else
  454. begin
  455. hlcg.getcpuregister(current_asmdata.CurrAsmList,NR_DX);
  456. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,portnumber.resultdef,u16inttype,portnumber.location,NR_DX);
  457. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_OUT,dsize,dreg,NR_DX));
  458. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,NR_DX);
  459. end;
  460. hlcg.ungetcpuregister(current_asmdata.CurrAsmList,dreg);
  461. end;
  462. procedure get_segreg(segreg:tregister);
  463. begin
  464. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  465. location.register:=hlcg.getintregister(current_asmdata.CurrAsmList,resultdef);
  466. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_MOV,TCGSize2OpSize[def_cgsize(resultdef)],segreg,location.register));
  467. end;
  468. function GetConstInt(n: tnode): longint;
  469. begin
  470. Result:=0;
  471. if is_constintnode(n) then
  472. result:=tordconstnode(n).value.svalue
  473. else
  474. Message(type_e_constant_expr_expected);
  475. end;
  476. procedure GetParameters(count: longint);
  477. var
  478. i: longint;
  479. p: tnode;
  480. begin
  481. if (count=1) and
  482. (not (left is tcallparanode)) then
  483. paraarray[1]:=left
  484. else
  485. begin
  486. p:=left;
  487. for i := count downto 1 do
  488. begin
  489. paraarray[i]:=tcallparanode(p).paravalue;
  490. p:=tcallparanode(p).nextpara;
  491. end;
  492. end;
  493. end;
  494. procedure location_force_mmxreg(list:TAsmList;var l: tlocation;maybeconst:boolean);
  495. var
  496. reg : tregister;
  497. begin
  498. if (l.loc<>LOC_MMXREGISTER) and
  499. ((l.loc<>LOC_CMMXREGISTER) or (not maybeconst)) then
  500. begin
  501. reg:=tcgx86(cg).getmmxregister(list);
  502. cg.a_loadmm_loc_reg(list,OS_M64,l,reg,nil);
  503. location_freetemp(list,l);
  504. location_reset(l,LOC_MMXREGISTER,OS_M64);
  505. l.register:=reg;
  506. end;
  507. end;
  508. procedure location_make_ref(var loc: tlocation);
  509. var
  510. hloc: tlocation;
  511. begin
  512. case loc.loc of
  513. LOC_CREGISTER,
  514. LOC_REGISTER:
  515. begin
  516. location_reset_ref(hloc, LOC_REFERENCE, OS_32, 1, []);
  517. hloc.reference.base:=loc.register;
  518. loc:=hloc;
  519. end;
  520. LOC_CREFERENCE,
  521. LOC_REFERENCE:
  522. begin
  523. end;
  524. else
  525. begin
  526. hlcg.location_force_reg(current_asmdata.CurrAsmList,loc,u32inttype,u32inttype,false);
  527. location_reset_ref(hloc, LOC_REFERENCE, OS_32, 1, []);
  528. hloc.reference.base:=loc.register;
  529. loc:=hloc;
  530. end;
  531. end;
  532. end;
  533. begin
  534. FillChar(paraarray,sizeof(paraarray),0);
  535. case inlinenumber of
  536. in_x86_inportb:
  537. inport(NR_AL,S_B,u8inttype);
  538. in_x86_inportw:
  539. inport(NR_AX,S_W,u16inttype);
  540. in_x86_inportl:
  541. inport(NR_EAX,S_L,s32inttype);
  542. in_x86_outportb:
  543. outport(NR_AL,S_B,u8inttype);
  544. in_x86_outportw:
  545. outport(NR_AX,S_W,u16inttype);
  546. in_x86_outportl:
  547. outport(NR_EAX,S_L,s32inttype);
  548. in_x86_cli:
  549. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_CLI));
  550. in_x86_sti:
  551. current_asmdata.CurrAsmList.concat(taicpu.op_none(A_STI));
  552. in_x86_get_cs:
  553. get_segreg(NR_CS);
  554. in_x86_get_ss:
  555. get_segreg(NR_SS);
  556. in_x86_get_ds:
  557. get_segreg(NR_DS);
  558. in_x86_get_es:
  559. get_segreg(NR_ES);
  560. in_x86_get_fs:
  561. get_segreg(NR_FS);
  562. in_x86_get_gs:
  563. get_segreg(NR_GS);
  564. {$i x86mmsecond.inc}
  565. else
  566. inherited pass_generate_code_cpu;
  567. end;
  568. end;
  569. procedure tx86inlinenode.second_AndOrXorShiftRot_assign;
  570. var
  571. opsize : tcgsize;
  572. valuenode, indexnode, loadnode: TNode;
  573. DestReg: TRegister;
  574. begin
  575. {$ifndef i8086}
  576. if (cs_opt_level2 in current_settings.optimizerswitches) then
  577. begin
  578. { Saves on a lot of typecasting and potential coding mistakes }
  579. valuenode := tcallparanode(left).left;
  580. loadnode := tcallparanode(tcallparanode(left).right).left;
  581. opsize := def_cgsize(loadnode.resultdef);
  582. { BMI2 optimisations }
  583. if (CPUX86_HAS_BMI2 in cpu_capabilities[current_settings.cputype]) and (inlinenumber=in_and_assign_x_y) then
  584. begin
  585. { If the second operand is "((1 shl y) - 1)", we can turn it
  586. into a BZHI operator instead }
  587. if (opsize in [OS_32, OS_S32{$ifdef x86_64}, OS_64, OS_S64{$endif x86_64}]) and
  588. (valuenode.nodetype = subn) and
  589. (taddnode(valuenode).right.nodetype = ordconstn) and
  590. (tordconstnode(taddnode(valuenode).right).value = 1) and
  591. (taddnode(valuenode).left.nodetype = shln) and
  592. (tshlshrnode(taddnode(valuenode).left).left.nodetype = ordconstn) and
  593. (tordconstnode(tshlshrnode(taddnode(valuenode).left).left).value = 1) then
  594. begin
  595. { Skip the subtract and shift nodes completely }
  596. { Helps avoid all the awkward typecasts }
  597. indexnode := tshlshrnode(taddnode(valuenode).left).right;
  598. {$ifdef x86_64}
  599. { The code generator sometimes extends the shift result to 64-bit unnecessarily }
  600. if (indexnode.nodetype = typeconvn) and (opsize in [OS_32, OS_S32]) and
  601. (def_cgsize(TTypeConvNode(indexnode).resultdef) in [OS_64, OS_S64]) then
  602. begin
  603. { Convert to the 32-bit type }
  604. indexnode.resultdef:=loadnode.resultdef;
  605. node_reset_flags(indexnode,[],[tnf_pass1_done]);
  606. { We should't be getting any new errors }
  607. if do_firstpass(indexnode) then
  608. InternalError(2022110202);
  609. { Keep things internally consistent in case indexnode changed }
  610. tshlshrnode(taddnode(valuenode).left).right:=indexnode;
  611. end;
  612. {$endif x86_64}
  613. secondpass(indexnode);
  614. secondpass(loadnode);
  615. { allocate registers }
  616. hlcg.location_force_reg(
  617. current_asmdata.CurrAsmList,
  618. indexnode.location,
  619. indexnode.resultdef,
  620. loadnode.resultdef,
  621. false
  622. );
  623. case loadnode.location.loc of
  624. LOC_REFERENCE,
  625. LOC_CREFERENCE:
  626. begin
  627. { BZHI can only write to a register }
  628. DestReg := cg.getintregister(current_asmdata.CurrAsmList,opsize);
  629. emit_reg_ref_reg(A_BZHI, TCGSize2OpSize[opsize], indexnode.location.register, loadnode.location.reference, DestReg);
  630. emit_reg_ref(A_MOV, TCGSize2OpSize[opsize], DestReg, loadnode.location.reference);
  631. end;
  632. LOC_REGISTER,
  633. LOC_CREGISTER:
  634. emit_reg_reg_reg(A_BZHI, TCGSize2OpSize[opsize], indexnode.location.register, loadnode.location.register, loadnode.location.register);
  635. else
  636. InternalError(2022102120);
  637. end;
  638. Exit;
  639. end;
  640. end;
  641. end;
  642. {$endif not i8086}
  643. inherited second_AndOrXorShiftRot_assign;
  644. end;
  645. procedure tx86inlinenode.second_pi;
  646. begin
  647. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  648. emit_none(A_FLDPI,S_NO);
  649. tcgx86(cg).inc_fpu_stack;
  650. location.register:=NR_FPU_RESULT_REG;
  651. end;
  652. { load the FPU into the an fpu register }
  653. procedure tx86inlinenode.load_fpu_location(lnode: tnode);
  654. begin
  655. location_reset(location,LOC_FPUREGISTER,def_cgsize(resultdef));
  656. location.register:=NR_FPU_RESULT_REG;
  657. secondpass(lnode);
  658. case lnode.location.loc of
  659. LOC_FPUREGISTER:
  660. ;
  661. LOC_CFPUREGISTER:
  662. begin
  663. cg.a_loadfpu_reg_reg(current_asmdata.CurrAsmList,lnode.location.size,
  664. lnode.location.size,lnode.location.register,location.register);
  665. end;
  666. LOC_REFERENCE,LOC_CREFERENCE:
  667. begin
  668. cg.a_loadfpu_ref_reg(current_asmdata.CurrAsmList,
  669. lnode.location.size,lnode.location.size,
  670. lnode.location.reference,location.register);
  671. end;
  672. LOC_MMREGISTER,LOC_CMMREGISTER:
  673. begin
  674. location:=lnode.location;
  675. hlcg.location_force_fpureg(current_asmdata.CurrAsmList,location,lnode.resultdef,false);
  676. end;
  677. else
  678. internalerror(309991);
  679. end;
  680. end;
  681. procedure tx86inlinenode.second_arctan_real;
  682. begin
  683. load_fpu_location(left);
  684. emit_none(A_FLD1,S_NO);
  685. emit_none(A_FPATAN,S_NO);
  686. end;
  687. procedure tx86inlinenode.second_abs_real;
  688. function needs_indirect:boolean; inline;
  689. begin
  690. result:=(tf_supports_packages in target_info.flags) and
  691. (target_info.system in systems_indirect_var_imports);
  692. end;
  693. var
  694. href : treference;
  695. sym : tasmsymbol;
  696. begin
  697. if use_vectorfpu(resultdef) then
  698. begin
  699. secondpass(left);
  700. if left.location.loc<>LOC_MMREGISTER then
  701. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,UseAVX);
  702. if UseAVX then
  703. begin
  704. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  705. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,def_cgsize(resultdef));
  706. end
  707. else
  708. location:=left.location;
  709. case tfloatdef(resultdef).floattype of
  710. s32real:
  711. begin
  712. sym:=current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_SINGLE',AT_DATA,needs_indirect);
  713. reference_reset_symbol(href,sym,0,4,[]);
  714. current_module.add_extern_asmsym(sym);
  715. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
  716. if UseAVX then
  717. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
  718. A_VANDPS,S_XMM,href,left.location.register,location.register))
  719. else
  720. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPS,S_XMM,href,location.register));
  721. end;
  722. s64real:
  723. begin
  724. sym:=current_asmdata.RefAsmSymbol(target_info.cprefix+'FPC_ABSMASK_DOUBLE',AT_DATA,needs_indirect);
  725. reference_reset_symbol(href,sym,0,4,[]);
  726. current_module.add_extern_asmsym(sym);
  727. tcgx86(cg).make_simple_ref(current_asmdata.CurrAsmList, href);
  728. if UseAVX then
  729. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg_reg(
  730. A_VANDPD,S_XMM,href,left.location.register,location.register))
  731. else
  732. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_ANDPD,S_XMM,href,location.register))
  733. end;
  734. else
  735. internalerror(200506081);
  736. end;
  737. end
  738. else
  739. begin
  740. load_fpu_location(left);
  741. emit_none(A_FABS,S_NO);
  742. end;
  743. end;
  744. procedure tx86inlinenode.second_round_real;
  745. begin
  746. {$ifdef x86_64}
  747. if use_vectorfpu(left.resultdef) then
  748. begin
  749. secondpass(left);
  750. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  751. location_reset(location,LOC_REGISTER,OS_S64);
  752. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
  753. if UseAVX then
  754. case left.location.size of
  755. OS_F32:
  756. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSS2SI,S_NO,left.location.register,location.register));
  757. OS_F64:
  758. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTSD2SI,S_NO,left.location.register,location.register));
  759. else
  760. internalerror(2007031402);
  761. end
  762. else
  763. case left.location.size of
  764. OS_F32:
  765. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSS2SI,S_NO,left.location.register,location.register));
  766. OS_F64:
  767. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTSD2SI,S_NO,left.location.register,location.register));
  768. else
  769. internalerror(2007031404);
  770. end;
  771. end
  772. else
  773. {$endif x86_64}
  774. begin
  775. load_fpu_location(left);
  776. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  777. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  778. emit_ref(A_FISTP,S_IQ,location.reference);
  779. tcgx86(cg).dec_fpu_stack;
  780. emit_none(A_FWAIT,S_NO);
  781. end;
  782. end;
  783. procedure tx86inlinenode.second_trunc_real;
  784. var
  785. oldcw,newcw : treference;
  786. begin
  787. {$ifdef x86_64}
  788. if use_vectorfpu(left.resultdef) and
  789. not((left.location.loc=LOC_FPUREGISTER) and (current_settings.fputype>=fpu_sse3)) then
  790. begin
  791. secondpass(left);
  792. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  793. location_reset(location,LOC_REGISTER,OS_S64);
  794. location.register:=cg.getintregister(current_asmdata.CurrAsmList,OS_S64);
  795. if UseAVX then
  796. case left.location.size of
  797. OS_F32:
  798. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSS2SI,S_NO,left.location.register,location.register));
  799. OS_F64:
  800. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_VCVTTSD2SI,S_NO,left.location.register,location.register));
  801. else
  802. internalerror(2007031401);
  803. end
  804. else
  805. case left.location.size of
  806. OS_F32:
  807. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSS2SI,S_NO,left.location.register,location.register));
  808. OS_F64:
  809. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CVTTSD2SI,S_NO,left.location.register,location.register));
  810. else
  811. internalerror(2007031403);
  812. end;
  813. end
  814. else
  815. {$endif x86_64}
  816. begin
  817. if (current_settings.fputype>=fpu_sse3) then
  818. begin
  819. load_fpu_location(left);
  820. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  821. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  822. emit_ref(A_FISTTP,S_IQ,location.reference);
  823. tcgx86(cg).dec_fpu_stack;
  824. end
  825. else
  826. begin
  827. tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,oldcw);
  828. tg.GetTemp(current_asmdata.CurrAsmList,2,2,tt_normal,newcw);
  829. {$ifdef i8086}
  830. if current_settings.cputype<=cpu_286 then
  831. begin
  832. emit_ref(A_FSTCW,S_NO,newcw);
  833. emit_ref(A_FSTCW,S_NO,oldcw);
  834. emit_none(A_FWAIT,S_NO);
  835. end
  836. else
  837. {$endif i8086}
  838. begin
  839. emit_ref(A_FNSTCW,S_NO,newcw);
  840. emit_ref(A_FNSTCW,S_NO,oldcw);
  841. end;
  842. emit_const_ref(A_OR,S_W,$0f00,newcw);
  843. load_fpu_location(left);
  844. emit_ref(A_FLDCW,S_NO,newcw);
  845. location_reset_ref(location,LOC_REFERENCE,OS_S64,0,[]);
  846. tg.GetTemp(current_asmdata.CurrAsmList,resultdef.size,resultdef.alignment,tt_normal,location.reference);
  847. emit_ref(A_FISTP,S_IQ,location.reference);
  848. tcgx86(cg).dec_fpu_stack;
  849. emit_ref(A_FLDCW,S_NO,oldcw);
  850. emit_none(A_FWAIT,S_NO);
  851. tg.UnGetTemp(current_asmdata.CurrAsmList,oldcw);
  852. tg.UnGetTemp(current_asmdata.CurrAsmList,newcw);
  853. end;
  854. end;
  855. end;
  856. procedure tx86inlinenode.second_sqr_real;
  857. begin
  858. if use_vectorfpu(resultdef) then
  859. begin
  860. secondpass(left);
  861. location_reset(location,LOC_MMREGISTER,left.location.size);
  862. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  863. if UseAVX then
  864. begin
  865. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  866. cg.a_opmm_reg_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,left.location.register,left.location.register,location.register,mms_movescalar);
  867. end
  868. else
  869. begin
  870. if left.location.loc in [LOC_CFPUREGISTER,LOC_FPUREGISTER] then
  871. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  872. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  873. cg.a_opmm_reg_reg(current_asmdata.CurrAsmList,OP_MUL,left.location.size,location.register,location.register,mms_movescalar);
  874. end;
  875. end
  876. else
  877. begin
  878. load_fpu_location(left);
  879. emit_reg_reg(A_FMUL,S_NO,NR_ST0,NR_ST0);
  880. end;
  881. end;
  882. procedure tx86inlinenode.second_sqrt_real;
  883. begin
  884. if use_vectorfpu(resultdef) then
  885. begin
  886. secondpass(left);
  887. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  888. location_reset(location,LOC_MMREGISTER,left.location.size);
  889. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  890. if UseAVX then
  891. case tfloatdef(resultdef).floattype of
  892. s32real:
  893. { we use S_NO instead of S_XMM here, regardless of the register size, as the size of the memory location is 32/64 bit }
  894. { using left.location.register here as 2nd parameter is crucial to break dependency chains }
  895. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSS,S_NO,left.location.register,left.location.register,location.register));
  896. s64real:
  897. { we use S_NO instead of S_XMM here, regardless of the register size, as the size of the memory location is 32/64 bit }
  898. { using left.location.register here as 2nd parameter is crucial to break dependency chains }
  899. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSQRTSD,S_NO,left.location.register,left.location.register,location.register));
  900. else
  901. internalerror(200510031);
  902. end
  903. else
  904. case tfloatdef(resultdef).floattype of
  905. s32real:
  906. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSS,S_NO,left.location.register,location.register));
  907. s64real:
  908. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SQRTSD,S_NO,left.location.register,location.register));
  909. else
  910. internalerror(2005100303);
  911. end;
  912. end
  913. else
  914. begin
  915. load_fpu_location(left);
  916. if left.location.loc=LOC_REFERENCE then
  917. tg.ungetiftemp(current_asmdata.CurrAsmList,left.location.reference);
  918. emit_none(A_FSQRT,S_NO);
  919. end;
  920. end;
  921. procedure tx86inlinenode.second_ln_real;
  922. begin
  923. load_fpu_location(left);
  924. emit_none(A_FLDLN2,S_NO);
  925. emit_none(A_FXCH,S_NO);
  926. emit_none(A_FYL2X,S_NO);
  927. end;
  928. procedure tx86inlinenode.second_cos_real;
  929. begin
  930. {$ifdef i8086}
  931. { FCOS is 387+ }
  932. if current_settings.cputype < cpu_386 then
  933. begin
  934. inherited;
  935. exit;
  936. end;
  937. {$endif i8086}
  938. load_fpu_location(left);
  939. emit_none(A_FCOS,S_NO);
  940. end;
  941. procedure tx86inlinenode.second_sin_real;
  942. begin
  943. {$ifdef i8086}
  944. { FSIN is 387+ }
  945. if current_settings.cputype < cpu_386 then
  946. begin
  947. inherited;
  948. exit;
  949. end;
  950. {$endif i8086}
  951. load_fpu_location(left);
  952. emit_none(A_FSIN,S_NO)
  953. end;
  954. procedure tx86inlinenode.second_prefetch;
  955. var
  956. ref : treference;
  957. r : tregister;
  958. checkpointer_used : boolean;
  959. begin
  960. {$if defined(i386) or defined(i8086)}
  961. if current_settings.cputype>=cpu_Pentium3 then
  962. {$endif i386 or i8086}
  963. begin
  964. { do not call Checkpointer for left node }
  965. checkpointer_used:=(cs_checkpointer in current_settings.localswitches);
  966. if checkpointer_used then
  967. node_change_local_switch(left,cs_checkpointer,false);
  968. secondpass(left);
  969. if checkpointer_used then
  970. node_change_local_switch(left,cs_checkpointer,false);
  971. case left.location.loc of
  972. LOC_CREFERENCE,
  973. LOC_REFERENCE:
  974. begin
  975. r:=cg.getintregister(current_asmdata.CurrAsmList,OS_ADDR);
  976. cg.a_loadaddr_ref_reg(current_asmdata.CurrAsmList,left.location.reference,r);
  977. reference_reset_base(ref,r,0,left.location.reference.temppos,left.location.reference.alignment,left.location.reference.volatility);
  978. current_asmdata.CurrAsmList.concat(taicpu.op_ref(A_PREFETCHNTA,S_NO,ref));
  979. end;
  980. else
  981. { nothing to prefetch };
  982. end;
  983. end;
  984. end;
  985. procedure tx86inlinenode.second_abs_long;
  986. var
  987. hregister : tregister;
  988. opsize : tcgsize;
  989. hp : taicpu;
  990. hl: TAsmLabel;
  991. begin
  992. {$if defined(i8086) or defined(i386)}
  993. if not(CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) then
  994. begin
  995. opsize:=def_cgsize(left.resultdef);
  996. secondpass(left);
  997. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
  998. location:=left.location;
  999. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1000. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,location.register);
  1001. cg.a_op_const_reg(current_asmdata.CurrAsmList,OP_SAR,opsize,tcgsize2size[opsize]*8-1,left.location.register);
  1002. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_XOR,opsize,left.location.register,location.register);
  1003. cg.a_op_reg_reg(current_asmdata.CurrAsmList,OP_SUB,opsize,left.location.register,location.register);
  1004. if cs_check_overflow in current_settings.localswitches then
  1005. begin
  1006. current_asmdata.getjumplabel(hl);
  1007. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl);
  1008. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  1009. cg.a_label(current_asmdata.CurrAsmList,hl);
  1010. end;
  1011. end
  1012. else
  1013. {$endif i8086 or i386}
  1014. begin
  1015. opsize:=def_cgsize(left.resultdef);
  1016. secondpass(left);
  1017. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,true);
  1018. hregister:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1019. location:=left.location;
  1020. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1021. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,hregister);
  1022. cg.a_load_reg_reg(current_asmdata.CurrAsmList,opsize,opsize,left.location.register,location.register);
  1023. cg.a_reg_alloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  1024. emit_reg(A_NEG,tcgsize2opsize[opsize],hregister);
  1025. if cs_check_overflow in current_settings.localswitches then
  1026. begin
  1027. current_asmdata.getjumplabel(hl);
  1028. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_NO,hl);
  1029. cg.a_call_name(current_asmdata.CurrAsmList,'FPC_OVERFLOW',false);
  1030. cg.a_label(current_asmdata.CurrAsmList,hl);
  1031. end;
  1032. hp:=taicpu.op_reg_reg(A_CMOVcc,tcgsize2opsize[opsize],hregister,location.register);
  1033. hp.condition:=C_NS;
  1034. cg.a_reg_dealloc(current_asmdata.CurrAsmList, NR_DEFAULTFLAGS);
  1035. current_asmdata.CurrAsmList.concat(hp);
  1036. end;
  1037. end;
  1038. {*****************************************************************************
  1039. INCLUDE/EXCLUDE GENERIC HANDLING
  1040. *****************************************************************************}
  1041. procedure tx86inlinenode.second_IncludeExclude;
  1042. var
  1043. hregister,
  1044. hregister2: tregister;
  1045. setbase : aint;
  1046. bitsperop,l : longint;
  1047. cgop : topcg;
  1048. asmop : tasmop;
  1049. opdef : tdef;
  1050. opsize,
  1051. orgsize: tcgsize;
  1052. begin
  1053. {$ifdef i8086}
  1054. { BTS and BTR are 386+ }
  1055. if current_settings.cputype < cpu_386 then
  1056. {$else i8086}
  1057. { bts on memory locations is very slow, so even the default code is faster }
  1058. if not(cs_opt_size in current_settings.optimizerswitches) and (tcallparanode(tcallparanode(left).right).left.expectloc<>LOC_CONSTANT) and
  1059. (tcallparanode(left).left.expectloc=LOC_REFERENCE) then
  1060. {$endif i8086}
  1061. begin
  1062. inherited;
  1063. exit;
  1064. end;
  1065. if is_smallset(tcallparanode(left).resultdef) then
  1066. begin
  1067. opdef:=tcallparanode(left).resultdef;
  1068. opsize:=int_cgsize(opdef.size)
  1069. end
  1070. else
  1071. begin
  1072. opdef:=u32inttype;
  1073. opsize:=OS_32;
  1074. end;
  1075. bitsperop:=(8*tcgsize2size[opsize]);
  1076. secondpass(tcallparanode(left).left);
  1077. secondpass(tcallparanode(tcallparanode(left).right).left);
  1078. setbase:=tsetdef(tcallparanode(left).left.resultdef).setbase;
  1079. if tcallparanode(tcallparanode(left).right).left.location.loc=LOC_CONSTANT then
  1080. begin
  1081. { calculate bit position }
  1082. l:=1 shl ((tcallparanode(tcallparanode(left).right).left.location.value-setbase) mod bitsperop);
  1083. { determine operator }
  1084. if inlinenumber=in_include_x_y then
  1085. cgop:=OP_OR
  1086. else
  1087. begin
  1088. cgop:=OP_AND;
  1089. l:=not(l);
  1090. end;
  1091. case tcallparanode(left).left.location.loc of
  1092. LOC_REFERENCE :
  1093. begin
  1094. inc(tcallparanode(left).left.location.reference.offset,
  1095. ((tcallparanode(tcallparanode(left).right).left.location.value-setbase) div bitsperop)*tcgsize2size[opsize]);
  1096. cg.a_op_const_ref(current_asmdata.CurrAsmList,cgop,opsize,l,tcallparanode(left).left.location.reference);
  1097. end;
  1098. LOC_CSUBSETREG,
  1099. LOC_CREGISTER :
  1100. hlcg.a_op_const_loc(current_asmdata.CurrAsmList,cgop,tcallparanode(left).left.resultdef,l,tcallparanode(left).left.location);
  1101. else
  1102. internalerror(200405022);
  1103. end;
  1104. end
  1105. else
  1106. begin
  1107. orgsize:=opsize;
  1108. if opsize in [OS_8,OS_S8] then
  1109. begin
  1110. opdef:=u32inttype;
  1111. opsize:=OS_32;
  1112. end;
  1113. { determine asm operator }
  1114. if inlinenumber=in_include_x_y then
  1115. asmop:=A_BTS
  1116. else
  1117. asmop:=A_BTR;
  1118. hlcg.location_force_reg(current_asmdata.CurrAsmList,tcallparanode(tcallparanode(left).right).left.location,tcallparanode(tcallparanode(left).right).left.resultdef,opdef,true);
  1119. register_maybe_adjust_setbase(current_asmdata.CurrAsmList,tcallparanode(tcallparanode(left).right).left.resultdef,tcallparanode(tcallparanode(left).right).left.location,setbase);
  1120. hregister:=tcallparanode(tcallparanode(left).right).left.location.register;
  1121. if tcallparanode(left).left.location.loc=LOC_REFERENCE then
  1122. emit_reg_ref(asmop,tcgsize2opsize[opsize],hregister,tcallparanode(left).left.location.reference)
  1123. else
  1124. begin
  1125. { second argument can't be an 8 bit register either }
  1126. hregister2:=tcallparanode(left).left.location.register;
  1127. if (orgsize in [OS_8,OS_S8]) then
  1128. hregister2:=cg.makeregsize(current_asmdata.CurrAsmList,hregister2,opsize);
  1129. emit_reg_reg(asmop,tcgsize2opsize[opsize],hregister,hregister2);
  1130. end;
  1131. end;
  1132. end;
  1133. procedure tx86inlinenode.second_popcnt;
  1134. var
  1135. opsize: tcgsize;
  1136. begin
  1137. secondpass(left);
  1138. opsize:=tcgsize2unsigned[left.location.size];
  1139. { no 8 Bit popcont }
  1140. if opsize=OS_8 then
  1141. opsize:=OS_16;
  1142. if not(left.location.loc in [LOC_REGISTER,LOC_CREGISTER,LOC_REFERENCE,LOC_CREFERENCE]) or
  1143. (left.location.size<>opsize) then
  1144. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,cgsize_orddef(opsize),true);
  1145. location_reset(location,LOC_REGISTER,opsize);
  1146. location.register:=cg.getintregister(current_asmdata.CurrAsmList,opsize);
  1147. if left.location.loc in [LOC_REGISTER,LOC_CREGISTER] then
  1148. emit_reg_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.register,location.register)
  1149. else
  1150. emit_ref_reg(A_POPCNT,TCGSize2OpSize[opsize],left.location.reference,location.register);
  1151. if resultdef.size=1 then
  1152. begin
  1153. location.size:=OS_8;
  1154. location.register:=cg.makeregsize(current_asmdata.CurrAsmList,location.register,location.size);
  1155. end;
  1156. end;
  1157. procedure tx86inlinenode.second_fma;
  1158. {$ifndef i8086}
  1159. const
  1160. op : array[false..true,false..true,s32real..s64real,0..3] of TAsmOp =
  1161. (
  1162. { positive product }
  1163. (
  1164. { positive third operand }
  1165. ((A_VFMADD231SS,A_VFMADD231SS,A_VFMADD231SS,A_VFMADD213SS),
  1166. (A_VFMADD231SD,A_VFMADD231SD,A_VFMADD231SD,A_VFMADD213SD)
  1167. ),
  1168. { negative third operand }
  1169. ((A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB231SS,A_VFMSUB213SS),
  1170. (A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB231SD,A_VFMSUB213SD)
  1171. )
  1172. ),
  1173. { negative product }
  1174. (
  1175. { positive third operand }
  1176. ((A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD231SS,A_VFNMADD213SS),
  1177. (A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD231SD,A_VFNMADD213SD)
  1178. ),
  1179. { negative third operand }
  1180. ((A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB231SS,A_VFNMSUB213SS),
  1181. (A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB231SD,A_VFNMSUB213SD)
  1182. )
  1183. )
  1184. );
  1185. var
  1186. paraarray : array[1..3] of tnode;
  1187. memop,
  1188. i : integer;
  1189. negop3,
  1190. negproduct,
  1191. gotmem : boolean;
  1192. {$endif i8086}
  1193. begin
  1194. {$ifndef i8086}
  1195. if (fpu_capabilities[current_settings.fputype]*[FPUX86_HAS_FMA,FPUX86_HAS_FMA4])<>[] then
  1196. begin
  1197. negop3:=false;
  1198. negproduct:=false;
  1199. paraarray[1]:=tcallparanode(tcallparanode(tcallparanode(parameters).nextpara).nextpara).paravalue;
  1200. paraarray[2]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  1201. paraarray[3]:=tcallparanode(parameters).paravalue;
  1202. { check if a neg. node can be removed
  1203. this is possible because changing the sign of
  1204. a floating point number does not affect its absolute
  1205. value in any way
  1206. }
  1207. if paraarray[1].nodetype=unaryminusn then
  1208. begin
  1209. paraarray[1]:=tunarynode(paraarray[1]).left;
  1210. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1211. only no code is generated for it }
  1212. negproduct:=not(negproduct);
  1213. end;
  1214. if paraarray[2].nodetype=unaryminusn then
  1215. begin
  1216. paraarray[2]:=tunarynode(paraarray[2]).left;
  1217. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1218. only no code is generated for it }
  1219. negproduct:=not(negproduct);
  1220. end;
  1221. if paraarray[3].nodetype=unaryminusn then
  1222. begin
  1223. paraarray[3]:=tunarynode(paraarray[3]).left;
  1224. { do not release the unused unary minus node, it is kept and release together with the other nodes,
  1225. only no code is generated for it }
  1226. negop3:=true;
  1227. end;
  1228. for i:=1 to 3 do
  1229. secondpass(paraarray[i]);
  1230. { only one memory operand is allowed }
  1231. gotmem:=false;
  1232. memop:=0;
  1233. { in case parameters come on the FPU stack, we have to pop them in reverse order as we
  1234. called secondpass }
  1235. for i:=3 downto 1 do
  1236. begin
  1237. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1238. begin
  1239. if (paraarray[i].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and not(gotmem) then
  1240. begin
  1241. memop:=i;
  1242. gotmem:=true;
  1243. end
  1244. else
  1245. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
  1246. end;
  1247. end;
  1248. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  1249. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1250. if gotmem then
  1251. begin
  1252. case memop of
  1253. 1:
  1254. begin
  1255. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1256. paraarray[3].location.register,location.register,mms_movescalar);
  1257. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1258. paraarray[1].location.reference,paraarray[2].location.register,location.register);
  1259. end;
  1260. 2:
  1261. begin
  1262. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1263. paraarray[3].location.register,location.register,mms_movescalar);
  1264. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1265. paraarray[2].location.reference,paraarray[1].location.register,location.register);
  1266. end;
  1267. 3:
  1268. begin
  1269. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1270. paraarray[1].location.register,location.register,mms_movescalar);
  1271. emit_ref_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,memop],S_NO,
  1272. paraarray[3].location.reference,paraarray[2].location.register,location.register);
  1273. end
  1274. else
  1275. internalerror(2014041301);
  1276. end;
  1277. end
  1278. else
  1279. begin
  1280. { try to use the location which is already in a temp. mm register as destination,
  1281. so the compiler might be able to re-use the register }
  1282. if paraarray[1].location.loc=LOC_MMREGISTER then
  1283. begin
  1284. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1285. paraarray[1].location.register,location.register,mms_movescalar);
  1286. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
  1287. paraarray[3].location.register,paraarray[2].location.register,location.register);
  1288. end
  1289. else if paraarray[2].location.loc=LOC_MMREGISTER then
  1290. begin
  1291. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[2].resultdef,resultdef,
  1292. paraarray[2].location.register,location.register,mms_movescalar);
  1293. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,3],S_NO,
  1294. paraarray[3].location.register,paraarray[1].location.register,location.register);
  1295. end
  1296. else
  1297. begin
  1298. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[3].resultdef,resultdef,
  1299. paraarray[3].location.register,location.register,mms_movescalar);
  1300. emit_reg_reg_reg(op[negproduct,negop3,tfloatdef(resultdef).floattype,0],S_NO,
  1301. paraarray[1].location.register,paraarray[2].location.register,location.register);
  1302. end;
  1303. end;
  1304. end
  1305. else
  1306. {$endif i8086}
  1307. internalerror(2014032301);
  1308. end;
  1309. procedure tx86inlinenode.second_frac_real;
  1310. var
  1311. extrareg : TRegister;
  1312. begin
  1313. if use_vectorfpu(resultdef) then
  1314. begin
  1315. secondpass(left);
  1316. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1317. location_reset(location,LOC_MMREGISTER,def_cgsize(resultdef));
  1318. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1319. if UseAVX then
  1320. case tfloatdef(left.resultdef).floattype of
  1321. s32real:
  1322. begin
  1323. {$ifndef i8086}
  1324. if UseAVX512 and (FPUX86_HAS_AVX512DQ in fpu_capabilities[current_settings.fputype]) then
  1325. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VREDUCESS,S_NO,3,left.location.register,left.location.register,location.register))
  1326. else
  1327. {$endif not i8086}
  1328. begin
  1329. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1330. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSS,S_NO,3,left.location.register,left.location.register,location.register));
  1331. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSS,S_NO,location.register,left.location.register,location.register));
  1332. end;
  1333. end;
  1334. s64real:
  1335. begin
  1336. {$ifndef i8086}
  1337. if UseAVX512 and (FPUX86_HAS_AVX512DQ in fpu_capabilities[current_settings.fputype]) then
  1338. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VREDUCESD,S_NO,3,left.location.register,left.location.register,location.register))
  1339. else
  1340. {$endif not i8086}
  1341. begin
  1342. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1343. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSD,S_NO,3,left.location.register,left.location.register,location.register));
  1344. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg_reg(A_VSUBSD,S_NO,location.register,left.location.register,location.register));
  1345. end;
  1346. end;
  1347. else
  1348. internalerror(2017052102);
  1349. end
  1350. else
  1351. begin
  1352. extrareg:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1353. cg.a_loadmm_loc_reg(current_asmdata.CurrAsmList,location.size,left.location,location.register,mms_movescalar);
  1354. case tfloatdef(left.resultdef).floattype of
  1355. s32real:
  1356. begin
  1357. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSS,S_NO,3,left.location.register,extrareg));
  1358. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SUBSS,S_NO,extrareg,location.register));
  1359. end;
  1360. s64real:
  1361. begin
  1362. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSD,S_NO,3,left.location.register,extrareg));
  1363. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_SUBSD,S_NO,extrareg,location.register));
  1364. end;
  1365. else
  1366. internalerror(2017052103);
  1367. end;
  1368. end;
  1369. if tfloatdef(left.resultdef).floattype<>tfloatdef(resultdef).floattype then
  1370. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,left.resultdef,resultdef,location.register,location.register,mms_movescalar);
  1371. end
  1372. else
  1373. internalerror(2017052101);
  1374. end;
  1375. procedure tx86inlinenode.second_int_real;
  1376. begin
  1377. if use_vectorfpu(resultdef) then
  1378. begin
  1379. secondpass(left);
  1380. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,left.location,left.resultdef,true);
  1381. location_reset(location,LOC_MMREGISTER,left.location.size);
  1382. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1383. if UseAVX then
  1384. case tfloatdef(resultdef).floattype of
  1385. s32real:
  1386. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1387. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSS,S_NO,3,left.location.register,left.location.register,location.register));
  1388. s64real:
  1389. { using left.location.register here as 3rd parameter is crucial to break dependency chains }
  1390. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg_reg(A_VROUNDSD,S_NO,3,left.location.register,left.location.register,location.register));
  1391. else
  1392. internalerror(2017052105);
  1393. end
  1394. else
  1395. begin
  1396. case tfloatdef(resultdef).floattype of
  1397. s32real:
  1398. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSS,S_NO,3,left.location.register,location.register));
  1399. s64real:
  1400. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg_reg(A_ROUNDSD,S_NO,3,left.location.register,location.register));
  1401. else
  1402. internalerror(2017052106);
  1403. end;
  1404. end;
  1405. end
  1406. else
  1407. internalerror(2017052107);
  1408. end;
  1409. procedure tx86inlinenode.second_high;
  1410. var
  1411. donelab: tasmlabel;
  1412. hregister : tregister;
  1413. href : treference;
  1414. begin
  1415. secondpass(left);
  1416. if not(is_dynamic_array(left.resultdef)) then
  1417. Internalerror(2019122809);
  1418. { length in dynamic arrays is at offset -sizeof(pint) }
  1419. hlcg.location_force_reg(current_asmdata.CurrAsmList,left.location,left.resultdef,left.resultdef,false);
  1420. current_asmdata.getjumplabel(donelab);
  1421. { by subtracting 1 here, we get the -1 into the register we need if the dyn. array is nil and the carry
  1422. flag is set in this case, so we can jump depending on it
  1423. when loading the actual high value, we have to take care later of the decreased value
  1424. do not use the cgs, as they might emit dec instead of a sub instruction, however with dec the trick
  1425. we are using is not working as dec does not touch the carry flag }
  1426. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_SUB,TCGSize2OpSize[def_cgsize(left.resultdef)],1,left.location.register));
  1427. { volatility of the dyn. array refers to the volatility of the
  1428. string pointer, not of the string data }
  1429. cg.a_jmp_flags(current_asmdata.CurrAsmList,F_C,donelab);
  1430. hlcg.reference_reset_base(href,left.resultdef,left.location.register,-ossinttype.size+1,ctempposinvalid,ossinttype.alignment,[]);
  1431. { if the string pointer is nil, the length is 0 -> reuse the register
  1432. that originally held the string pointer for the length, so that we
  1433. can keep the original nil/0 as length in that case }
  1434. hregister:=cg.makeregsize(current_asmdata.CurrAsmList,left.location.register,def_cgsize(resultdef));
  1435. hlcg.a_load_ref_reg(current_asmdata.CurrAsmList,ossinttype,resultdef,href,hregister);
  1436. cg.a_label(current_asmdata.CurrAsmList,donelab);
  1437. location_reset(location,LOC_REGISTER,def_cgsize(resultdef));
  1438. location.register:=hregister;
  1439. end;
  1440. procedure tx86inlinenode.second_minmax;
  1441. {$ifndef i8086}
  1442. const
  1443. oparray : array[false..true,false..true,s32real..s64real] of TAsmOp =
  1444. (
  1445. (
  1446. (A_MINSS,A_MINSD),
  1447. (A_VMINSS,A_VMINSD)
  1448. ),
  1449. (
  1450. (A_MAXSS,A_MAXSD),
  1451. (A_VMAXSS,A_VMAXSD)
  1452. )
  1453. );
  1454. {$endif i8086}
  1455. var
  1456. {$ifndef i8086}
  1457. memop : integer;
  1458. gotmem : boolean;
  1459. op: TAsmOp;
  1460. {$endif i8086}
  1461. i : integer;
  1462. paraarray : array[1..2] of tnode;
  1463. instr: TAiCpu;
  1464. opsize: topsize;
  1465. finalval: TCgInt;
  1466. begin
  1467. {$ifndef i8086}
  1468. if
  1469. {$ifdef i386}
  1470. ((current_settings.fputype>=fpu_sse) and is_single(resultdef)) or
  1471. ((current_settings.fputype>=fpu_sse2) and is_double(resultdef))
  1472. {$else i386}
  1473. is_single(resultdef) or is_double(resultdef)
  1474. {$endif i386}
  1475. then
  1476. begin
  1477. paraarray[1]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  1478. paraarray[2]:=tcallparanode(parameters).paravalue;
  1479. for i:=low(paraarray) to high(paraarray) do
  1480. secondpass(paraarray[i]);
  1481. { only one memory operand is allowed }
  1482. gotmem:=false;
  1483. memop:=0;
  1484. for i:=low(paraarray) to high(paraarray) do
  1485. begin
  1486. if not(paraarray[i].location.loc in [LOC_MMREGISTER,LOC_CMMREGISTER]) then
  1487. begin
  1488. if (paraarray[i].location.loc in [LOC_REFERENCE,LOC_CREFERENCE]) and not(gotmem) then
  1489. begin
  1490. memop:=i;
  1491. gotmem:=true;
  1492. end
  1493. else
  1494. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[i].location,paraarray[i].resultdef,true);
  1495. end;
  1496. end;
  1497. { due to min/max behaviour that it loads always the second operand (must be the else assignment) into destination if
  1498. one of the operands is a NaN, we cannot swap operands to omit a mova operation in case fastmath is off }
  1499. if not(cs_opt_fastmath in current_settings.optimizerswitches) and gotmem and (memop=1) then
  1500. begin
  1501. hlcg.location_force_mmregscalar(current_asmdata.CurrAsmList,paraarray[1].location,paraarray[1].resultdef,true);
  1502. gotmem:=false;
  1503. end;
  1504. op:=oparray[inlinenumber in [in_max_single,in_max_double],UseAVX,tfloatdef(resultdef).floattype];
  1505. location_reset(location,LOC_MMREGISTER,paraarray[1].location.size);
  1506. location.register:=cg.getmmregister(current_asmdata.CurrAsmList,location.size);
  1507. if gotmem then
  1508. begin
  1509. if UseAVX then
  1510. case memop of
  1511. 1:
  1512. emit_ref_reg_reg(op,S_NO,
  1513. paraarray[1].location.reference,paraarray[2].location.register,location.register);
  1514. 2:
  1515. emit_ref_reg_reg(op,S_NO,
  1516. paraarray[2].location.reference,paraarray[1].location.register,location.register);
  1517. else
  1518. internalerror(2020120504);
  1519. end
  1520. else
  1521. case memop of
  1522. 1:
  1523. begin
  1524. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[2].resultdef,resultdef,
  1525. paraarray[2].location.register,location.register,mms_movescalar);
  1526. emit_ref_reg(op,S_NO,
  1527. paraarray[1].location.reference,location.register);
  1528. end;
  1529. 2:
  1530. begin
  1531. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1532. paraarray[1].location.register,location.register,mms_movescalar);
  1533. emit_ref_reg(op,S_NO,
  1534. paraarray[2].location.reference,location.register);
  1535. end;
  1536. else
  1537. internalerror(2020120601);
  1538. end;
  1539. end
  1540. else
  1541. begin
  1542. if UseAVX then
  1543. emit_reg_reg_reg(op,S_NO,
  1544. paraarray[2].location.register,paraarray[1].location.register,location.register)
  1545. else
  1546. begin
  1547. hlcg.a_loadmm_reg_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,
  1548. paraarray[1].location.register,location.register,mms_movescalar);
  1549. emit_reg_reg(op,S_NO,
  1550. paraarray[2].location.register,location.register)
  1551. end;
  1552. end;
  1553. end
  1554. else
  1555. {$endif i8086}
  1556. if
  1557. {$ifndef x86_64}
  1558. (CPUX86_HAS_CMOV in cpu_capabilities[current_settings.cputype]) and
  1559. {$endif x86_64}
  1560. (
  1561. {$ifdef x86_64}
  1562. is_64bitint(resultdef) or
  1563. {$endif x86_64}
  1564. is_32bitint(resultdef)
  1565. ) then
  1566. begin
  1567. { paraarray[1] is the right-hand side }
  1568. paraarray[1]:=tcallparanode(tcallparanode(parameters).nextpara).paravalue;
  1569. paraarray[2]:=tcallparanode(parameters).paravalue;
  1570. for i:=low(paraarray) to high(paraarray) do
  1571. secondpass(paraarray[i]);
  1572. if paraarray[2].location.loc = LOC_CONSTANT then
  1573. begin
  1574. { Swap the parameters so the constant is on the right }
  1575. paraarray[2]:=paraarray[1];
  1576. paraarray[1]:=tcallparanode(parameters).paravalue;
  1577. end;
  1578. location_reset(location,LOC_REGISTER,paraarray[1].location.size);
  1579. location.register:=cg.getintregister(current_asmdata.CurrAsmList,location.size);
  1580. hlcg.a_load_loc_reg(current_asmdata.CurrAsmList,paraarray[1].resultdef,resultdef,paraarray[1].location,location.register);
  1581. cg.a_reg_alloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  1582. {$ifdef x86_64}
  1583. if is_64bitint(resultdef) then
  1584. opsize := S_Q
  1585. else
  1586. {$endif x86_64}
  1587. opsize := S_L;
  1588. { Try to use references as is, unless they would trigger internal
  1589. error 200502052 }
  1590. if (cs_create_pic in current_settings.moduleswitches) and
  1591. Assigned(paraarray[2].location.reference.symbol) then
  1592. hlcg.location_force_reg(current_asmdata.CurrAsmList,paraarray[2].location,
  1593. paraarray[2].resultdef,paraarray[2].resultdef,true);
  1594. case paraarray[1].location.loc of
  1595. LOC_CONSTANT:
  1596. case paraarray[2].location.loc of
  1597. LOC_REFERENCE,LOC_CREFERENCE:
  1598. begin
  1599. current_asmdata.CurrAsmList.concat(taicpu.op_const_ref(A_CMP,opsize,
  1600. paraarray[1].location.value,paraarray[2].location.reference));
  1601. instr:=TAiCpu.op_ref_reg(A_CMOVcc,opsize,paraarray[2].location.reference,location.register);
  1602. end;
  1603. LOC_REGISTER,LOC_CREGISTER:
  1604. begin
  1605. current_asmdata.CurrAsmList.concat(taicpu.op_const_reg(A_CMP,opsize,
  1606. paraarray[1].location.value,paraarray[2].location.register));
  1607. instr:=TAiCpu.op_reg_reg(A_CMOVcc,opsize,paraarray[2].location.register,location.register);
  1608. end;
  1609. else
  1610. InternalError(2021121907);
  1611. end;
  1612. LOC_REFERENCE,LOC_CREFERENCE:
  1613. case paraarray[2].location.loc of
  1614. LOC_REFERENCE,LOC_CREFERENCE:
  1615. begin
  1616. { The reference has already been stored at location.register, so use that }
  1617. current_asmdata.CurrAsmList.concat(taicpu.op_reg_ref(A_CMP,opsize,
  1618. location.register,paraarray[2].location.reference));
  1619. instr:=TAiCpu.op_ref_reg(A_CMOVcc,opsize,paraarray[2].location.reference,location.register);
  1620. end;
  1621. LOC_REGISTER,LOC_CREGISTER:
  1622. begin
  1623. current_asmdata.CurrAsmList.concat(taicpu.op_ref_reg(A_CMP,opsize,
  1624. paraarray[1].location.reference,paraarray[2].location.register));
  1625. instr:=TAiCpu.op_reg_reg(A_CMOVcc,opsize,paraarray[2].location.register,location.register);
  1626. end;
  1627. else
  1628. InternalError(2021121906);
  1629. end;
  1630. LOC_REGISTER,LOC_CREGISTER:
  1631. case paraarray[2].location.loc of
  1632. LOC_REFERENCE,LOC_CREFERENCE:
  1633. begin
  1634. current_asmdata.CurrAsmList.concat(taicpu.op_reg_ref(A_CMP,opsize,
  1635. paraarray[1].location.register,paraarray[2].location.reference));
  1636. instr:=TAiCpu.op_ref_reg(A_CMOVcc,opsize,paraarray[2].location.reference,location.register);
  1637. end;
  1638. LOC_REGISTER,LOC_CREGISTER:
  1639. begin
  1640. current_asmdata.CurrAsmList.concat(taicpu.op_reg_reg(A_CMP,opsize,
  1641. paraarray[1].location.register,paraarray[2].location.register));
  1642. instr:=TAiCpu.op_reg_reg(A_CMOVcc,opsize,paraarray[2].location.register,location.register);
  1643. end;
  1644. else
  1645. InternalError(2021121905);
  1646. end;
  1647. else
  1648. InternalError(2021121904);
  1649. end;
  1650. case inlinenumber of
  1651. in_min_longint,
  1652. in_min_int64:
  1653. instr.condition := C_L;
  1654. in_min_dword,
  1655. in_min_qword:
  1656. instr.condition := C_B;
  1657. in_max_longint,
  1658. in_max_int64:
  1659. instr.condition := C_G;
  1660. in_max_dword,
  1661. in_max_qword:
  1662. instr.condition := C_A;
  1663. else
  1664. Internalerror(2021121903);
  1665. end;
  1666. current_asmdata.CurrAsmList.concat(instr);
  1667. cg.a_reg_dealloc(current_asmdata.CurrAsmList,NR_DEFAULTFLAGS);
  1668. end
  1669. else
  1670. internalerror(2020120503);
  1671. end;
  1672. end.